feat(v2): various markdown string parsing improvements/fixes (#4590)

* extract createExcerpt code in separate file + add bad test

* almost working markdown parsing refactor

* complete parseMarkdownString refactor

* fix tests

* fix blog test issue

* fix docusaurus utils imports
This commit is contained in:
Sébastien Lorber 2021-04-09 17:09:33 +02:00 committed by GitHub
parent b743edf5fb
commit 4efe6824b3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 895 additions and 563 deletions

View file

@ -9,7 +9,10 @@ const {getOptions} = require('loader-utils');
const {readFile} = require('fs-extra');
const mdx = require('@mdx-js/mdx');
const emoji = require('remark-emoji');
const {readFrontMatter} = require('@docusaurus/utils');
const {
parseFrontMatter,
parseMarkdownContentTitle,
} = require('@docusaurus/utils');
const stringifyObject = require('stringify-object');
const headings = require('./remark/headings');
const toc = require('./remark/toc');
@ -26,12 +29,14 @@ module.exports = async function docusaurusMdxLoader(fileString) {
const callback = this.async();
const reqOptions = getOptions(this) || {};
const {frontMatter, content, hasFrontMatter} = readFrontMatter(
fileString,
this.resourcePath,
{},
reqOptions.removeTitleHeading,
);
const {frontMatter, content: contentWithTitle} = parseFrontMatter(fileString);
// By default, will remove the markdown title from the content
const {content} = parseMarkdownContentTitle(contentWithTitle, {
keepContentTitle: reqOptions.keepContentTitle,
});
const hasFrontMatter = Object.keys(frontMatter).length > 0;
const options = {
...reqOptions,

View file

@ -12,9 +12,26 @@ import path from 'path';
import pluginContentBlog from '../index';
import {DocusaurusConfig, LoadContext, I18n} from '@docusaurus/types';
import {PluginOptionSchema} from '../pluginOptionSchema';
import {PluginOptions, EditUrlFunction} from '../types';
import {PluginOptions, EditUrlFunction, BlogPost} from '../types';
import {Joi} from '@docusaurus/utils-validation';
function findByTitle(
blogPosts: BlogPost[],
title: string,
): BlogPost | undefined {
return blogPosts.find((v) => v.metadata.title === title);
}
function getByTitle(blogPosts: BlogPost[], title: string): BlogPost {
const post = findByTitle(blogPosts, title);
if (!post) {
throw new Error(`can't find blog post with title ${title}.
Available blog post titles are:\n- ${blogPosts
.map((p) => p.metadata.title)
.join('\n- ')}`);
}
return post;
}
function getI18n(locale: string): I18n {
return {
currentLocale: locale,
@ -77,7 +94,7 @@ describe('loadBlog', () => {
const blogPosts = await getBlogPosts(siteDir);
expect({
...blogPosts.find((v) => v.metadata.title === 'date-matter')!.metadata,
...getByTitle(blogPosts, 'date-matter').metadata,
...{prevItem: undefined},
}).toEqual({
editUrl: `${BaseEditUrl}/blog/date-matter.md`,
@ -98,9 +115,7 @@ describe('loadBlog', () => {
});
expect(
blogPosts.find(
(v) => v.metadata.title === 'Happy 1st Birthday Slash! (translated)',
)!.metadata,
getByTitle(blogPosts, 'Happy 1st Birthday Slash! (translated)').metadata,
).toEqual({
editUrl: `${BaseEditUrl}/blog/2018-12-14-Happy-First-Birthday-Slash.md`,
permalink: '/blog/2018/12/14/Happy-First-Birthday-Slash',
@ -124,7 +139,7 @@ describe('loadBlog', () => {
});
expect({
...blogPosts.find((v) => v.metadata.title === 'Complex Slug')!.metadata,
...getByTitle(blogPosts, 'Complex Slug').metadata,
...{prevItem: undefined},
}).toEqual({
editUrl: `${BaseEditUrl}/blog/complex-slug.md`,
@ -145,7 +160,7 @@ describe('loadBlog', () => {
});
expect({
...blogPosts.find((v) => v.metadata.title === 'Simple Slug')!.metadata,
...getByTitle(blogPosts, 'Simple Slug').metadata,
...{prevItem: undefined},
}).toEqual({
editUrl: `${BaseEditUrl}/blog/simple-slug.md`,
@ -166,7 +181,7 @@ describe('loadBlog', () => {
});
expect({
...blogPosts.find((v) => v.metadata.title === 'some heading')!.metadata,
...getByTitle(blogPosts, 'some heading').metadata,
prevItem: undefined,
}).toEqual({
editUrl: `${BaseEditUrl}/blog/heading-as-title.md`,
@ -301,7 +316,7 @@ describe('loadBlog', () => {
}).format(noDateSourceBirthTime);
expect({
...blogPosts.find((v) => v.metadata.title === 'no date')!.metadata,
...getByTitle(blogPosts, 'no date').metadata,
...{prevItem: undefined},
}).toEqual({
editUrl: `${BaseEditUrl}/blog/no date.md`,

View file

@ -0,0 +1,43 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import {Joi} from '@docusaurus/utils-validation';
import {Tag} from './types';
// TODO complete this frontmatter + add unit tests
type BlogPostFrontMatter = {
id?: string;
title?: string;
description?: string;
tags?: (string | Tag)[];
slug?: string;
draft?: boolean;
date?: string;
};
const BlogTagSchema = Joi.alternatives().try(
Joi.string().required(),
Joi.object<Tag>({
label: Joi.string().required(),
permalink: Joi.string().required(),
}),
);
const BlogFrontMatterSchema = Joi.object<BlogPostFrontMatter>({
id: Joi.string(),
title: Joi.string(),
description: Joi.string(),
tags: Joi.array().items(BlogTagSchema),
slug: Joi.string(),
draft: Joi.boolean(),
}).unknown();
export function assertBlogPostFrontMatter(
frontMatter: Record<string, unknown>,
): asserts frontMatter is BlogPostFrontMatter {
Joi.attempt(frontMatter, BlogFrontMatterSchema);
}

View file

@ -26,9 +26,10 @@ import {
getEditUrl,
getFolderContainingFile,
posixPath,
replaceMarkdownLinks,
} from '@docusaurus/utils';
import {LoadContext} from '@docusaurus/types';
import {replaceMarkdownLinks} from '@docusaurus/utils/lib/markdownLinks';
import {assertBlogPostFrontMatter} from './blogFrontMatter';
export function truncate(fileString: string, truncateMarker: RegExp): string {
return fileString.split(truncateMarker, 1).shift()!;
@ -140,12 +141,18 @@ export async function generateBlogPosts(
const source = path.join(blogDirPath, blogSourceFile);
const {
frontMatter,
content,
contentTitle,
excerpt,
} = await parseMarkdownFile(source);
assertBlogPostFrontMatter(frontMatter);
const aliasedSource = aliasedSitePath(source, siteDir);
const blogFileName = path.basename(blogSourceFile);
const {frontMatter, content, excerpt} = await parseMarkdownFile(source);
if (frontMatter.draft && process.env.NODE_ENV === 'production') {
return;
}
@ -182,9 +189,11 @@ export async function generateBlogPosts(
year: 'numeric',
}).format(date);
const title = frontMatter.title ?? contentTitle ?? linkName;
const description = frontMatter.description ?? excerpt ?? '';
const slug =
frontMatter.slug || (match ? toUrl({date, link: linkName}) : linkName);
frontMatter.title = frontMatter.title || linkName;
const permalink = normalizeUrl([baseUrl, routeBasePath, slug]);
@ -220,16 +229,16 @@ export async function generateBlogPosts(
}
blogPosts.push({
id: frontMatter.slug || frontMatter.title,
id: frontMatter.slug ?? title,
metadata: {
permalink,
editUrl: getBlogEditUrl(),
source: aliasedSource,
description: frontMatter.description || excerpt,
title,
description,
date,
formattedDate,
tags: frontMatter.tags,
title: frontMatter.title,
tags: frontMatter.tags ?? [],
readingTime: showReadingTime
? readingTime(content).minutes
: undefined,

View file

@ -198,9 +198,7 @@ Object {
\\"slug\\": \\"/headingAsTitle\\",
\\"permalink\\": \\"/docs/headingAsTitle\\",
\\"version\\": \\"current\\",
\\"frontMatter\\": {
\\"title\\": \\"My heading as title\\"
}
\\"frontMatter\\": {}
}",
"site-docs-hello-md-9df.json": "{
\\"unversionedId\\": \\"hello\\",

View file

@ -0,0 +1,33 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import {Joi} from '@docusaurus/utils-validation';
// TODO complete this frontmatter + add unit tests
type DocFrontMatter = {
id?: string;
title?: string;
description?: string;
slug?: string;
sidebar_label?: string;
custom_edit_url?: string;
};
const DocFrontMatterSchema = Joi.object<DocFrontMatter>({
id: Joi.string(),
title: Joi.string(),
description: Joi.string(),
slug: Joi.string(),
sidebar_label: Joi.string(),
custom_edit_url: Joi.string().allow(null),
}).unknown();
export function assertDocFrontMatter(
frontMatter: Record<string, unknown>,
): asserts frontMatter is DocFrontMatter {
Joi.attempt(frontMatter, DocFrontMatterSchema);
}

View file

@ -30,6 +30,7 @@ import getSlug from './slug';
import {CURRENT_VERSION_NAME} from './constants';
import globby from 'globby';
import {getDocsDirPaths} from './versions';
import {assertDocFrontMatter} from './docFrontMatter';
type LastUpdateOptions = Pick<
PluginOptions,
@ -115,11 +116,15 @@ export function processDocMetadata({
const {homePageId} = options;
const {siteDir, i18n} = context;
const {frontMatter, contentTitle, excerpt} = parseMarkdownString(content, {
source,
});
assertDocFrontMatter(frontMatter);
// ex: api/myDoc -> api
// ex: myDoc -> .
const docsFileDirName = path.dirname(source);
const {frontMatter = {}, excerpt} = parseMarkdownString(content, source);
const {
sidebar_label: sidebarLabel,
custom_edit_url: customEditURL,
@ -165,9 +170,9 @@ export function processDocMetadata({
});
// Default title is the id.
const title: string = frontMatter.title || baseID;
const title: string = frontMatter.title ?? contentTitle ?? baseID;
const description: string = frontMatter.description || excerpt;
const description: string = frontMatter.description ?? excerpt ?? '';
const permalink = normalizeUrl([versionMetadata.versionPath, docSlug]);

View file

@ -7,7 +7,7 @@
import {DocsMarkdownOption} from '../types';
import {getDocsDirPaths} from '../versions';
import {replaceMarkdownLinks} from '@docusaurus/utils/lib/markdownLinks';
import {replaceMarkdownLinks} from '@docusaurus/utils';
function getVersion(filePath: string, options: DocsMarkdownOption) {
const versionFound = options.versionsMetadata.find((version) =>

View file

@ -223,7 +223,7 @@ export default function pluginContentPages(
rehypePlugins,
beforeDefaultRehypePlugins,
beforeDefaultRemarkPlugins,
removeTitleHeading: false,
keepContentTitle: true,
staticDir: path.join(siteDir, STATIC_DIR_NAME),
// Note that metadataPath must be the same/in-sync as
// the path from createData for each MDX.

View file

@ -1,148 +0,0 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`load utils: parseMarkdown parseMarkdownString should delete only first heading 1`] = `
Object {
"content": "
test test test test test test
test test test # test bar
# test
### test",
"excerpt": "",
"frontMatter": Object {
"title": "test",
},
"hasFrontMatter": false,
}
`;
exports[`load utils: parseMarkdown parseMarkdownString should ignore heading if its not a first text 1`] = `
Object {
"content": "foo
# test",
"excerpt": "foo",
"frontMatter": Object {},
"hasFrontMatter": false,
}
`;
exports[`load utils: parseMarkdown parseMarkdownString should parse first heading as title 1`] = `
Object {
"content": "",
"excerpt": "",
"frontMatter": Object {
"title": "test",
},
"hasFrontMatter": false,
}
`;
exports[`load utils: parseMarkdown parseMarkdownString should preserve front-matter title and warn about duplication 1`] = `
Object {
"content": "# test",
"excerpt": "test",
"frontMatter": Object {
"title": "title",
},
"hasFrontMatter": true,
}
`;
exports[`load utils: parseMarkdown parseMarkdownString should read front matter 1`] = `
Object {
"content": "",
"excerpt": undefined,
"frontMatter": Object {
"title": "test",
},
"hasFrontMatter": true,
}
`;
exports[`load utils: parseMarkdown readFrontMatter should delete only first heading 1`] = `
Object {
"content": "test test test # test bar
# test
### test",
"excerpt": "",
"frontMatter": Object {
"title": "test",
},
"hasFrontMatter": false,
}
`;
exports[`load utils: parseMarkdown readFrontMatter should ignore heading if its not a first text 1`] = `
Object {
"content": "foo
# test",
"excerpt": "",
"frontMatter": Object {},
"hasFrontMatter": false,
}
`;
exports[`load utils: parseMarkdown readFrontMatter should not warn about duplicated title 1`] = `
Object {
"content": "# test",
"excerpt": "",
"frontMatter": Object {
"title": "title",
},
"hasFrontMatter": true,
}
`;
exports[`load utils: parseMarkdown readFrontMatter should parse first heading as title 1`] = `
Object {
"content": "",
"excerpt": "",
"frontMatter": Object {
"title": "test",
},
"hasFrontMatter": false,
}
`;
exports[`load utils: parseMarkdown readFrontMatter should parse first heading as title and keep it in content 1`] = `
Object {
"content": "# test",
"excerpt": "",
"frontMatter": Object {
"title": "test",
},
"hasFrontMatter": false,
}
`;
exports[`load utils: parseMarkdown readFrontMatter should parse front-matter and ignore h2 1`] = `
Object {
"content": "## test",
"excerpt": "",
"frontMatter": Object {
"title": "title",
},
"hasFrontMatter": true,
}
`;
exports[`load utils: parseMarkdown readFrontMatter should preserve front-matter title and warn about duplication 1`] = `
Object {
"content": "# test",
"excerpt": "",
"frontMatter": Object {
"title": "title",
},
"hasFrontMatter": true,
}
`;
exports[`load utils: parseMarkdown readFrontMatter should read front matter 1`] = `
Object {
"content": "",
"excerpt": "",
"frontMatter": Object {
"title": "test",
},
"hasFrontMatter": true,
}
`;

View file

@ -18,7 +18,6 @@ import {
posixPath,
objectWithKeySorted,
aliasedSitePath,
createExcerpt,
isValidPathname,
addTrailingSlash,
removeTrailingSlash,
@ -372,81 +371,6 @@ describe('load utils', () => {
);
});
test('createExcerpt', () => {
const asserts = [
// Regular content
{
input: `
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
`,
output:
'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
},
// Content with imports/exports declarations and Markdown markup, as well as Emoji
{
input: `
import Component from '@site/src/components/Component';
import Component from '@site/src/components/Component'
import './styles.css';
export function ItemCol(props) { return <Item {...props} className={'col col--6 margin-bottom--lg'}/> }
export function ItemCol(props) { return <Item {...props} className={'col col--6 margin-bottom--lg'}/> };
Lorem **ipsum** dolor sit \`amet\`[^1], consectetur _adipiscing_ elit. [**Vestibulum**](https://wiktionary.org/wiki/vestibulum) ex urna[^bignote], ~molestie~ et sagittis ut, varius ac justo :wink:.
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
`,
output:
'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
},
// Content beginning with admonitions
{
input: `
import Component from '@site/src/components/Component'
:::caution
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
:::
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
`,
output: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
},
// Content beginning with heading
{
input: `
## Lorem ipsum dolor sit amet
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
`,
output: 'Lorem ipsum dolor sit amet',
},
// Content beginning with blockquote
{
input: `
> Lorem ipsum dolor sit amet
`,
output: 'Lorem ipsum dolor sit amet',
},
// Content beginning with image (eg. blog post)
{
input: `
![Lorem ipsum](/img/lorem-ipsum.svg)
`,
output: 'Lorem ipsum',
},
];
asserts.forEach((testCase) => {
expect(createExcerpt(testCase.input)).toEqual(testCase.output);
});
});
test('isValidPathname', () => {
expect(isValidPathname('/')).toBe(true);
expect(isValidPathname('/hey')).toBe(true);

View file

@ -0,0 +1,568 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import {
createExcerpt,
parseMarkdownContentTitle,
parseMarkdownString,
} from '../markdownParser';
import dedent from 'dedent';
describe('createExcerpt', () => {
test('should create excerpt for text-only content', () => {
expect(
createExcerpt(dedent`
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
`),
).toEqual(
'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
);
});
test('should create excerpt for regular content with regular title', () => {
expect(
createExcerpt(dedent`
# Markdown Regular Title
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
`),
).toEqual(
// h1 title is skipped on purpose, because we don't want the page to have SEO metadatas title === description
'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
);
});
test('should create excerpt for regular content with alternate title', () => {
expect(
createExcerpt(dedent`
Markdown Alternate Title
================
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
`),
).toEqual(
// h1 title is skipped on purpose, because we don't want the page to have SEO metadatas title === description
'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
);
});
test('should create excerpt for content with h2 heading', () => {
expect(
createExcerpt(dedent`
## Lorem ipsum dolor sit amet
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
`),
).toEqual('Lorem ipsum dolor sit amet');
});
test('should create excerpt for content beginning with blockquote', () => {
expect(
createExcerpt(dedent`
> Lorem ipsum dolor sit amet
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
`),
).toEqual('Lorem ipsum dolor sit amet');
});
test('should create excerpt for content beginning with image (eg. blog post)', () => {
expect(
createExcerpt(dedent`
![Lorem ipsum](/img/lorem-ipsum.svg)
`),
).toEqual('Lorem ipsum');
});
test('should create excerpt for content beginning with admonitions', () => {
expect(
createExcerpt(dedent`
import Component from '@site/src/components/Component'
:::caution
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
:::
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
`),
).toEqual('Lorem ipsum dolor sit amet, consectetur adipiscing elit.');
});
test('should create excerpt for content with imports/exports declarations and Markdown markup, as well as Emoji', () => {
expect(
createExcerpt(dedent`
import Component from '@site/src/components/Component';
import Component from '@site/src/components/Component'
import './styles.css';
export function ItemCol(props) { return <Item {...props} className={'col col--6 margin-bottom--lg'}/> }
export function ItemCol(props) { return <Item {...props} className={'col col--6 margin-bottom--lg'}/> };
Lorem **ipsum** dolor sit \`amet\`[^1], consectetur _adipiscing_ elit. [**Vestibulum**](https://wiktionary.org/wiki/vestibulum) ex urna[^bignote], ~molestie~ et sagittis ut, varius ac justo :wink:.
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
`),
).toEqual(
'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
);
});
});
describe('parseMarkdownContentTitle', () => {
test('Should parse markdown h1 title at the top', () => {
const markdown = dedent`
# Markdown Title
Lorem Ipsum
`;
expect(parseMarkdownContentTitle(markdown)).toEqual({
content: 'Lorem Ipsum',
contentTitle: 'Markdown Title',
});
});
test('Should parse markdown h1 title at the top (atx style with closing #)', () => {
const markdown = dedent`
# Markdown Title #
Lorem Ipsum
`;
expect(parseMarkdownContentTitle(markdown)).toEqual({
content: 'Lorem Ipsum',
contentTitle: 'Markdown Title',
});
});
test('Should parse markdown h1 alternate title', () => {
const markdown = dedent`
Markdown Title
================
Lorem Ipsum
`;
expect(parseMarkdownContentTitle(markdown)).toEqual({
content: 'Lorem Ipsum',
contentTitle: 'Markdown Title',
});
});
test('Should parse title-only', () => {
const markdown = '# Document With Only A Title ';
expect(parseMarkdownContentTitle(markdown)).toEqual({
content: '',
contentTitle: 'Document With Only A Title',
});
});
test('Should parse markdown h1 title at the top but keep it in content', () => {
const markdown = dedent`
# Markdown Title
Lorem Ipsum
`;
expect(
parseMarkdownContentTitle(markdown, {keepContentTitle: true}),
).toEqual({
content: markdown.trim(),
contentTitle: 'Markdown Title',
});
});
test('Should not parse markdown h1 title in the middle of a doc', () => {
const markdown = dedent`
Lorem Ipsum
# Markdown Title
Lorem Ipsum
`;
expect(parseMarkdownContentTitle(markdown)).toEqual({
content: markdown,
contentTitle: undefined,
});
});
test('Should not parse markdown h1 alternate title in the middle of the doc', () => {
const markdown = dedent`
Lorem Ipsum
Markdown Title
================
Lorem Ipsum
`;
expect(parseMarkdownContentTitle(markdown)).toEqual({
content: markdown,
contentTitle: undefined,
});
});
});
describe('parseMarkdownString', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
beforeEach(() => {
warn.mockReset();
});
function expectDuplicateTitleWarning() {
expect(warn).toBeCalledWith(
expect.stringMatching(/Duplicate title found in this file/),
);
}
function expectNoWarning() {
expect(warn).not.toBeCalled();
}
test('parse markdown with frontmatter', () => {
expect(
parseMarkdownString(dedent`
---
title: Frontmatter title
---
Some text
`),
).toMatchInlineSnapshot(`
Object {
"content": "Some text",
"contentTitle": undefined,
"excerpt": "Some text",
"frontMatter": Object {
"title": "Frontmatter title",
},
}
`);
expectNoWarning();
});
test('should parse first heading as contentTitle', () => {
expect(
parseMarkdownString(dedent`
# Markdown Title
Some text
`),
).toMatchInlineSnapshot(`
Object {
"content": "Some text",
"contentTitle": "Markdown Title",
"excerpt": "Some text",
"frontMatter": Object {},
}
`);
expectNoWarning();
});
test('should warn about duplicate titles (frontmatter + markdown)', () => {
expect(
parseMarkdownString(dedent`
---
title: Frontmatter title
---
# Markdown Title
Some text
`),
).toMatchInlineSnapshot(`
Object {
"content": "Some text",
"contentTitle": "Markdown Title",
"excerpt": "Some text",
"frontMatter": Object {
"title": "Frontmatter title",
},
}
`);
expectDuplicateTitleWarning();
});
test('should warn about duplicate titles (frontmatter + markdown alternate)', () => {
expect(
parseMarkdownString(dedent`
---
title: Frontmatter title
---
Markdown Title alternate
================
Some text
`),
).toMatchInlineSnapshot(`
Object {
"content": "Some text",
"contentTitle": "Markdown Title alternate",
"excerpt": "Some text",
"frontMatter": Object {
"title": "Frontmatter title",
},
}
`);
expectDuplicateTitleWarning();
});
test('should not warn for duplicate title if keepContentTitle=true', () => {
expect(
parseMarkdownString(
dedent`
---
title: Frontmatter title
---
# Markdown Title
Some text
`,
{keepContentTitle: true},
),
).toMatchInlineSnapshot(`
Object {
"content": "# Markdown Title
Some text",
"contentTitle": "Markdown Title",
"excerpt": "Some text",
"frontMatter": Object {
"title": "Frontmatter title",
},
}
`);
expectNoWarning();
});
test('should not warn for duplicate title if markdown title is not at the top', () => {
expect(
parseMarkdownString(dedent`
---
title: Frontmatter title
---
foo
# Markdown Title
`),
).toMatchInlineSnapshot(`
Object {
"content": "foo
# Markdown Title",
"contentTitle": undefined,
"excerpt": "foo",
"frontMatter": Object {
"title": "Frontmatter title",
},
}
`);
expectNoWarning();
});
test('should parse markdown title and keep it in content', () => {
expect(
parseMarkdownString(
dedent`
# Markdown Title
`,
{keepContentTitle: true},
),
).toMatchInlineSnapshot(`
Object {
"content": "# Markdown Title",
"contentTitle": "Markdown Title",
"excerpt": undefined,
"frontMatter": Object {},
}
`);
expectNoWarning();
});
test('should delete only first heading', () => {
expect(
parseMarkdownString(dedent`
# Markdown Title
test test test # test bar
# Markdown Title 2
### Markdown Title h3
`),
).toMatchInlineSnapshot(`
Object {
"content": "test test test # test bar
# Markdown Title 2
### Markdown Title h3",
"contentTitle": "Markdown Title",
"excerpt": "test test test # test bar",
"frontMatter": Object {},
}
`);
expectNoWarning();
});
test('should parse front-matter and ignore h2', () => {
expect(
parseMarkdownString(
dedent`
---
title: Frontmatter title
---
## test
`,
),
).toMatchInlineSnapshot(`
Object {
"content": "## test",
"contentTitle": undefined,
"excerpt": "test",
"frontMatter": Object {
"title": "Frontmatter title",
},
}
`);
expectNoWarning();
});
test('should read front matter only', () => {
expect(
parseMarkdownString(dedent`
---
title: test
---
`),
).toMatchInlineSnapshot(`
Object {
"content": "",
"contentTitle": undefined,
"excerpt": undefined,
"frontMatter": Object {
"title": "test",
},
}
`);
expectNoWarning();
});
test('should parse title only', () => {
expect(parseMarkdownString('# test')).toMatchInlineSnapshot(`
Object {
"content": "",
"contentTitle": "test",
"excerpt": undefined,
"frontMatter": Object {},
}
`);
expectNoWarning();
});
test('should parse title only alternate', () => {
expect(
parseMarkdownString(dedent`
test
===
`),
).toMatchInlineSnapshot(`
Object {
"content": "",
"contentTitle": "test",
"excerpt": undefined,
"frontMatter": Object {},
}
`);
expectNoWarning();
});
test('should warn about duplicate titles', () => {
expect(
parseMarkdownString(dedent`
---
title: Frontmatter title
---
# test
`),
).toMatchInlineSnapshot(`
Object {
"content": "",
"contentTitle": "test",
"excerpt": undefined,
"frontMatter": Object {
"title": "Frontmatter title",
},
}
`);
expectDuplicateTitleWarning();
});
test('should ignore markdown title if its not a first text', () => {
expect(
parseMarkdownString(dedent`
foo
# test
`),
).toMatchInlineSnapshot(`
Object {
"content": "foo
# test",
"contentTitle": undefined,
"excerpt": "foo",
"frontMatter": Object {},
}
`);
expectNoWarning();
});
test('should delete only first heading', () => {
expect(
parseMarkdownString(dedent`
# test
test test test test test test
test test test # test bar
# test2
### test
test3
`),
).toMatchInlineSnapshot(`
Object {
"content": "test test test test test test
test test test # test bar
# test2
### test
test3",
"contentTitle": "test",
"excerpt": "test test test test test test",
"frontMatter": Object {},
}
`);
expectNoWarning();
});
});

View file

@ -1,177 +0,0 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import {parseMarkdownString, readFrontMatter} from '../index';
import dedent from 'dedent';
describe('load utils: parseMarkdown', () => {
describe('readFrontMatter', () => {
test('should read front matter', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
expect(
readFrontMatter(dedent`
---
title: test
---
`),
).toMatchSnapshot();
expect(warn).not.toBeCalled();
});
test('should parse first heading as title', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
expect(
readFrontMatter(dedent`
# test
`),
).toMatchSnapshot();
expect(warn).not.toBeCalled();
});
test('should preserve front-matter title and warn about duplication', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
expect(
readFrontMatter(dedent`
---
title: title
---
# test
`),
).toMatchSnapshot();
expect(warn).toBeCalledWith('Duplicate title detected in `this` file');
warn.mockReset();
});
test('should ignore heading if its not a first text', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
expect(
readFrontMatter(dedent`
foo
# test
`),
).toMatchSnapshot();
expect(warn).not.toBeCalled();
});
test('should parse first heading as title and keep it in content', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
expect(
readFrontMatter(
dedent`
# test
`,
undefined,
{},
false,
),
).toMatchSnapshot();
expect(warn).not.toBeCalled();
});
test('should delete only first heading', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
expect(
readFrontMatter(dedent`
# test
test test test # test bar
# test
### test
`),
).toMatchSnapshot();
expect(warn).not.toBeCalled();
});
test('should parse front-matter and ignore h2', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
expect(
readFrontMatter(
dedent`
---
title: title
---
## test
`,
undefined,
{},
false,
),
).toMatchSnapshot();
expect(warn).not.toBeCalled();
});
test('should not warn about duplicated title', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
expect(
readFrontMatter(
dedent`
---
title: title
---
# test
`,
undefined,
{},
false,
),
).toMatchSnapshot();
expect(warn).not.toBeCalled();
});
});
describe('parseMarkdownString', () => {
test('should read front matter', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
expect(
parseMarkdownString(dedent`
---
title: test
---
`),
).toMatchSnapshot();
expect(warn).not.toBeCalled();
});
test('should parse first heading as title', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
expect(
parseMarkdownString(dedent`
# test
`),
).toMatchSnapshot();
expect(warn).not.toBeCalled();
});
test('should preserve front-matter title and warn about duplication', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
expect(
parseMarkdownString(dedent`
---
title: title
---
# test
`),
).toMatchSnapshot();
expect(warn).toBeCalledWith('Duplicate title detected in `this` file');
warn.mockReset();
});
test('should ignore heading if its not a first text', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
expect(
parseMarkdownString(dedent`
foo
# test
`),
).toMatchSnapshot();
expect(warn).not.toBeCalled();
});
test('should delete only first heading', () => {
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
expect(
parseMarkdownString(dedent`
# test
test test test test test test
test test test # test bar
# test
### test
`),
).toMatchSnapshot();
expect(warn).not.toBeCalled();
});
});
});

View file

@ -7,7 +7,6 @@
import chalk from 'chalk';
import path from 'path';
import matter from 'gray-matter';
import {createHash} from 'crypto';
import {camelCase, kebabCase, mapValues} from 'lodash';
import escapeStringRegexp from 'escape-string-regexp';
@ -23,6 +22,8 @@ import {
import resolvePathnameUnsafe from 'resolve-pathname';
export * from './codeTranslationsUtils';
export * from './markdownParser';
export * from './markdownLinks';
const fileHash = new Map();
export async function generate(
@ -206,135 +207,6 @@ export function getSubFolder(file: string, refDir: string): string | null {
return match && match[1];
}
export function createExcerpt(fileString: string): string | undefined {
const fileLines = fileString.trimLeft().split('\n');
/* eslint-disable no-continue */
// eslint-disable-next-line no-restricted-syntax
for (const fileLine of fileLines) {
// Skip empty line.
if (!fileLine.trim()) {
continue;
}
// Skip import/export declaration.
if (/^\s*?import\s.*(from.*)?;?|export\s.*{.*};?/.test(fileLine)) {
continue;
}
const cleanedLine = fileLine
// Remove HTML tags.
.replace(/<[^>]*>/g, '')
// Remove ATX-style headers.
.replace(/^\#{1,6}\s*([^#]*)\s*(\#{1,6})?/gm, '$1')
// Remove emphasis and strikethroughs.
.replace(/([\*_~]{1,3})(\S.*?\S{0,1})\1/g, '$2')
// Remove images.
.replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
// Remove footnotes.
.replace(/\[\^.+?\](\: .*?$)?/g, '')
// Remove inline links.
.replace(/\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
// Remove inline code.
.replace(/`(.+?)`/g, '$1')
// Remove blockquotes.
.replace(/^\s{0,3}>\s?/g, '')
// Remove admonition definition.
.replace(/(:{3}.*)/, '')
// Remove Emoji names within colons include preceding whitespace.
.replace(/\s?(:(::|[^:\n])+:)/g, '')
.trim();
if (cleanedLine) {
return cleanedLine;
}
}
return undefined;
}
type ParsedMarkdown = {
// Returned by gray-matter
// eslint-disable-next-line @typescript-eslint/no-explicit-any
frontMatter: Record<string, any>;
content: string;
excerpt: string | undefined;
hasFrontMatter: boolean;
};
export function readFrontMatter(
markdownString: string,
source?: string,
options: Record<string, unknown> = {},
removeTitleHeading = true,
): ParsedMarkdown {
try {
const result = matter(markdownString, options);
result.data = result.data || {};
result.content = result.content.trim();
const hasFrontMatter = Object.keys(result.data).length > 0;
const heading = /^# (.*)[\n\r]?/gi.exec(result.content);
if (heading) {
if (result.data.title) {
if (removeTitleHeading) {
console.warn(
`Duplicate title detected in \`${source || 'this'}\` file`,
);
}
} else {
result.data.title = heading[1].trim();
if (removeTitleHeading) {
result.content = result.content.replace(heading[0], '');
if (result.excerpt) {
result.excerpt = result.excerpt.replace(heading[1], '');
}
}
}
}
return {
frontMatter: result.data,
content: result.content,
excerpt: result.excerpt,
hasFrontMatter,
};
} catch (e) {
throw new Error(`Error while parsing markdown front matter.
This can happen if you use special characters like : in frontmatter values (try using "" around that value)
${e.message}`);
}
}
export function parseMarkdownString(
markdownString: string,
source?: string,
): ParsedMarkdown {
return readFrontMatter(markdownString, source, {
excerpt: (file: matter.GrayMatterFile<string>): void => {
// Hacky way of stripping out import statements from the excerpt
// TODO: Find a better way to do so, possibly by compiling the Markdown content,
// stripping out HTML tags and obtaining the first line.
file.excerpt = createExcerpt(file.content);
},
});
}
export async function parseMarkdownFile(
source: string,
): Promise<ParsedMarkdown> {
const markdownString = await fs.readFile(source, 'utf-8');
try {
return parseMarkdownString(markdownString, source);
} catch (e) {
throw new Error(
`Error while parsing markdown file ${source}
${e.message}`,
);
}
}
export function normalizeUrl(rawUrls: string[]): string {
const urls = rawUrls;
const resultArray = [];

View file

@ -0,0 +1,185 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import chalk from 'chalk';
import fs from 'fs-extra';
import matter from 'gray-matter';
// Hacky way of stripping out import statements from the excerpt
// TODO: Find a better way to do so, possibly by compiling the Markdown content,
// stripping out HTML tags and obtaining the first line.
export function createExcerpt(fileString: string): string | undefined {
const fileLines = fileString
.trimLeft()
// Remove Markdown alternate title
.replace(/^[^\n]*\n[=]+/g, '')
.split('\n');
/* eslint-disable no-continue */
// eslint-disable-next-line no-restricted-syntax
for (const fileLine of fileLines) {
// Skip empty line.
if (!fileLine.trim()) {
continue;
}
// Skip import/export declaration.
if (/^\s*?import\s.*(from.*)?;?|export\s.*{.*};?/.test(fileLine)) {
continue;
}
const cleanedLine = fileLine
// Remove HTML tags.
.replace(/<[^>]*>/g, '')
// Remove Title headers
.replace(/^\#\s*([^#]*)\s*\#?/gm, '')
// Remove Markdown + ATX-style headers
.replace(/^\#{1,6}\s*([^#]*)\s*(\#{1,6})?/gm, '$1')
// Remove emphasis and strikethroughs.
.replace(/([\*_~]{1,3})(\S.*?\S{0,1})\1/g, '$2')
// Remove images.
.replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
// Remove footnotes.
.replace(/\[\^.+?\](\: .*?$)?/g, '')
// Remove inline links.
.replace(/\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
// Remove inline code.
.replace(/`(.+?)`/g, '$1')
// Remove blockquotes.
.replace(/^\s{0,3}>\s?/g, '')
// Remove admonition definition.
.replace(/(:{3}.*)/, '')
// Remove Emoji names within colons include preceding whitespace.
.replace(/\s?(:(::|[^:\n])+:)/g, '')
.trim();
if (cleanedLine) {
return cleanedLine;
}
}
return undefined;
}
export function parseFrontMatter(
markdownFileContent: string,
): {
frontMatter: Record<string, unknown>;
content: string;
} {
const {data, content} = matter(markdownFileContent);
return {
frontMatter: data ?? {},
content: content?.trim() ?? '',
};
}
export function parseMarkdownContentTitle(
contentUntrimmed: string,
options?: {keepContentTitle?: boolean},
): {content: string; contentTitle: string | undefined} {
const keepContentTitleOption = options?.keepContentTitle ?? false;
const content = contentUntrimmed.trim();
const regularTitleMatch = /^(?<pattern>#\s*(?<title>[^#\n]*)+\s*#*[\s\r]*?\n*?)/g.exec(
content,
);
const alternateTitleMatch = /^(?<pattern>\s*(?<title>[^\n]*)\s*\n[=]+)/g.exec(
content,
);
const titleMatch = regularTitleMatch ?? alternateTitleMatch;
const {pattern, title} = titleMatch?.groups ?? {};
if (!pattern || !title) {
return {content, contentTitle: undefined};
}
const newContent = keepContentTitleOption
? content
: content.replace(pattern, '');
return {
content: newContent.trim(),
contentTitle: title.trim(),
};
}
type ParsedMarkdown = {
frontMatter: Record<string, unknown>;
content: string;
contentTitle: string | undefined;
excerpt: string | undefined;
};
export function parseMarkdownString(
markdownFileContent: string,
options?: {
source?: string;
keepContentTitle?: boolean;
},
): ParsedMarkdown {
try {
const sourceOption = options?.source;
const keepContentTitle = options?.keepContentTitle ?? false;
const {frontMatter, content: contentWithoutFrontMatter} = parseFrontMatter(
markdownFileContent,
);
const {content, contentTitle} = parseMarkdownContentTitle(
contentWithoutFrontMatter,
{
keepContentTitle,
},
);
const excerpt = createExcerpt(content);
// TODO not sure this is a good place for this warning
if (
frontMatter.title &&
contentTitle &&
!keepContentTitle &&
!(process.env.DOCUSAURUS_NO_DUPLICATE_TITLE_WARNING === 'false')
) {
console.warn(
chalk.yellow(`Duplicate title found in ${sourceOption ?? 'this'} file.
Use either a frontmatter title or a markdown title, not both.
If this is annoying you, use env DOCUSAURUS_NO_DUPLICATE_TITLE_WARNING=false`),
);
}
return {
frontMatter,
content,
contentTitle,
excerpt,
};
} catch (e) {
console.error(
chalk.red(`Error while parsing markdown front matter.
This can happen if you use special characters like : in frontmatter values (try using "" around that value)`),
);
throw e;
}
}
export async function parseMarkdownFile(
source: string,
): Promise<ParsedMarkdown> {
const markdownString = await fs.readFile(source, 'utf-8');
try {
return parseMarkdownString(markdownString, {source});
} catch (e) {
throw new Error(
`Error while parsing markdown file ${source}
${e.message}`,
);
}
}