mirror of
https://github.com/facebook/docusaurus.git
synced 2025-05-11 08:07:26 +02:00
feat(v2): various markdown string parsing improvements/fixes (#4590)
* extract createExcerpt code in separate file + add bad test * almost working markdown parsing refactor * complete parseMarkdownString refactor * fix tests * fix blog test issue * fix docusaurus utils imports
This commit is contained in:
parent
b743edf5fb
commit
4efe6824b3
15 changed files with 895 additions and 563 deletions
|
@ -1,148 +0,0 @@
|
|||
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
||||
|
||||
exports[`load utils: parseMarkdown parseMarkdownString should delete only first heading 1`] = `
|
||||
Object {
|
||||
"content": "
|
||||
test test test test test test
|
||||
test test test # test bar
|
||||
# test
|
||||
### test",
|
||||
"excerpt": "",
|
||||
"frontMatter": Object {
|
||||
"title": "test",
|
||||
},
|
||||
"hasFrontMatter": false,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`load utils: parseMarkdown parseMarkdownString should ignore heading if its not a first text 1`] = `
|
||||
Object {
|
||||
"content": "foo
|
||||
# test",
|
||||
"excerpt": "foo",
|
||||
"frontMatter": Object {},
|
||||
"hasFrontMatter": false,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`load utils: parseMarkdown parseMarkdownString should parse first heading as title 1`] = `
|
||||
Object {
|
||||
"content": "",
|
||||
"excerpt": "",
|
||||
"frontMatter": Object {
|
||||
"title": "test",
|
||||
},
|
||||
"hasFrontMatter": false,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`load utils: parseMarkdown parseMarkdownString should preserve front-matter title and warn about duplication 1`] = `
|
||||
Object {
|
||||
"content": "# test",
|
||||
"excerpt": "test",
|
||||
"frontMatter": Object {
|
||||
"title": "title",
|
||||
},
|
||||
"hasFrontMatter": true,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`load utils: parseMarkdown parseMarkdownString should read front matter 1`] = `
|
||||
Object {
|
||||
"content": "",
|
||||
"excerpt": undefined,
|
||||
"frontMatter": Object {
|
||||
"title": "test",
|
||||
},
|
||||
"hasFrontMatter": true,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`load utils: parseMarkdown readFrontMatter should delete only first heading 1`] = `
|
||||
Object {
|
||||
"content": "test test test # test bar
|
||||
# test
|
||||
### test",
|
||||
"excerpt": "",
|
||||
"frontMatter": Object {
|
||||
"title": "test",
|
||||
},
|
||||
"hasFrontMatter": false,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`load utils: parseMarkdown readFrontMatter should ignore heading if its not a first text 1`] = `
|
||||
Object {
|
||||
"content": "foo
|
||||
# test",
|
||||
"excerpt": "",
|
||||
"frontMatter": Object {},
|
||||
"hasFrontMatter": false,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`load utils: parseMarkdown readFrontMatter should not warn about duplicated title 1`] = `
|
||||
Object {
|
||||
"content": "# test",
|
||||
"excerpt": "",
|
||||
"frontMatter": Object {
|
||||
"title": "title",
|
||||
},
|
||||
"hasFrontMatter": true,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`load utils: parseMarkdown readFrontMatter should parse first heading as title 1`] = `
|
||||
Object {
|
||||
"content": "",
|
||||
"excerpt": "",
|
||||
"frontMatter": Object {
|
||||
"title": "test",
|
||||
},
|
||||
"hasFrontMatter": false,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`load utils: parseMarkdown readFrontMatter should parse first heading as title and keep it in content 1`] = `
|
||||
Object {
|
||||
"content": "# test",
|
||||
"excerpt": "",
|
||||
"frontMatter": Object {
|
||||
"title": "test",
|
||||
},
|
||||
"hasFrontMatter": false,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`load utils: parseMarkdown readFrontMatter should parse front-matter and ignore h2 1`] = `
|
||||
Object {
|
||||
"content": "## test",
|
||||
"excerpt": "",
|
||||
"frontMatter": Object {
|
||||
"title": "title",
|
||||
},
|
||||
"hasFrontMatter": true,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`load utils: parseMarkdown readFrontMatter should preserve front-matter title and warn about duplication 1`] = `
|
||||
Object {
|
||||
"content": "# test",
|
||||
"excerpt": "",
|
||||
"frontMatter": Object {
|
||||
"title": "title",
|
||||
},
|
||||
"hasFrontMatter": true,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`load utils: parseMarkdown readFrontMatter should read front matter 1`] = `
|
||||
Object {
|
||||
"content": "",
|
||||
"excerpt": "",
|
||||
"frontMatter": Object {
|
||||
"title": "test",
|
||||
},
|
||||
"hasFrontMatter": true,
|
||||
}
|
||||
`;
|
|
@ -18,7 +18,6 @@ import {
|
|||
posixPath,
|
||||
objectWithKeySorted,
|
||||
aliasedSitePath,
|
||||
createExcerpt,
|
||||
isValidPathname,
|
||||
addTrailingSlash,
|
||||
removeTrailingSlash,
|
||||
|
@ -372,81 +371,6 @@ describe('load utils', () => {
|
|||
);
|
||||
});
|
||||
|
||||
test('createExcerpt', () => {
|
||||
const asserts = [
|
||||
// Regular content
|
||||
{
|
||||
input: `
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.
|
||||
|
||||
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
|
||||
`,
|
||||
output:
|
||||
'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
|
||||
},
|
||||
// Content with imports/exports declarations and Markdown markup, as well as Emoji
|
||||
{
|
||||
input: `
|
||||
import Component from '@site/src/components/Component';
|
||||
import Component from '@site/src/components/Component'
|
||||
import './styles.css';
|
||||
|
||||
export function ItemCol(props) { return <Item {...props} className={'col col--6 margin-bottom--lg'}/> }
|
||||
|
||||
export function ItemCol(props) { return <Item {...props} className={'col col--6 margin-bottom--lg'}/> };
|
||||
|
||||
Lorem **ipsum** dolor sit \`amet\`[^1], consectetur _adipiscing_ elit. [**Vestibulum**](https://wiktionary.org/wiki/vestibulum) ex urna[^bignote], ~molestie~ et sagittis ut, varius ac justo :wink:.
|
||||
|
||||
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
|
||||
`,
|
||||
output:
|
||||
'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
|
||||
},
|
||||
// Content beginning with admonitions
|
||||
{
|
||||
input: `
|
||||
import Component from '@site/src/components/Component'
|
||||
|
||||
:::caution
|
||||
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
||||
|
||||
:::
|
||||
|
||||
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
|
||||
`,
|
||||
output: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
|
||||
},
|
||||
// Content beginning with heading
|
||||
{
|
||||
input: `
|
||||
## Lorem ipsum dolor sit amet
|
||||
|
||||
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
|
||||
`,
|
||||
output: 'Lorem ipsum dolor sit amet',
|
||||
},
|
||||
// Content beginning with blockquote
|
||||
{
|
||||
input: `
|
||||
> Lorem ipsum dolor sit amet
|
||||
`,
|
||||
output: 'Lorem ipsum dolor sit amet',
|
||||
},
|
||||
// Content beginning with image (eg. blog post)
|
||||
{
|
||||
input: `
|
||||

|
||||
`,
|
||||
output: 'Lorem ipsum',
|
||||
},
|
||||
];
|
||||
|
||||
asserts.forEach((testCase) => {
|
||||
expect(createExcerpt(testCase.input)).toEqual(testCase.output);
|
||||
});
|
||||
});
|
||||
|
||||
test('isValidPathname', () => {
|
||||
expect(isValidPathname('/')).toBe(true);
|
||||
expect(isValidPathname('/hey')).toBe(true);
|
||||
|
|
568
packages/docusaurus-utils/src/__tests__/markdownParser.test.ts
Normal file
568
packages/docusaurus-utils/src/__tests__/markdownParser.test.ts
Normal file
|
@ -0,0 +1,568 @@
|
|||
/**
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This source code is licensed under the MIT license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*/
|
||||
|
||||
import {
|
||||
createExcerpt,
|
||||
parseMarkdownContentTitle,
|
||||
parseMarkdownString,
|
||||
} from '../markdownParser';
|
||||
import dedent from 'dedent';
|
||||
|
||||
describe('createExcerpt', () => {
|
||||
test('should create excerpt for text-only content', () => {
|
||||
expect(
|
||||
createExcerpt(dedent`
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.
|
||||
|
||||
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
|
||||
`),
|
||||
).toEqual(
|
||||
'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
|
||||
);
|
||||
});
|
||||
|
||||
test('should create excerpt for regular content with regular title', () => {
|
||||
expect(
|
||||
createExcerpt(dedent`
|
||||
|
||||
# Markdown Regular Title
|
||||
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.
|
||||
|
||||
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
|
||||
`),
|
||||
).toEqual(
|
||||
// h1 title is skipped on purpose, because we don't want the page to have SEO metadatas title === description
|
||||
'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
|
||||
);
|
||||
});
|
||||
|
||||
test('should create excerpt for regular content with alternate title', () => {
|
||||
expect(
|
||||
createExcerpt(dedent`
|
||||
|
||||
Markdown Alternate Title
|
||||
================
|
||||
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.
|
||||
|
||||
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
|
||||
`),
|
||||
).toEqual(
|
||||
// h1 title is skipped on purpose, because we don't want the page to have SEO metadatas title === description
|
||||
'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
|
||||
);
|
||||
});
|
||||
|
||||
test('should create excerpt for content with h2 heading', () => {
|
||||
expect(
|
||||
createExcerpt(dedent`
|
||||
## Lorem ipsum dolor sit amet
|
||||
|
||||
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
|
||||
`),
|
||||
).toEqual('Lorem ipsum dolor sit amet');
|
||||
});
|
||||
|
||||
test('should create excerpt for content beginning with blockquote', () => {
|
||||
expect(
|
||||
createExcerpt(dedent`
|
||||
> Lorem ipsum dolor sit amet
|
||||
|
||||
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
|
||||
`),
|
||||
).toEqual('Lorem ipsum dolor sit amet');
|
||||
});
|
||||
|
||||
test('should create excerpt for content beginning with image (eg. blog post)', () => {
|
||||
expect(
|
||||
createExcerpt(dedent`
|
||||

|
||||
`),
|
||||
).toEqual('Lorem ipsum');
|
||||
});
|
||||
|
||||
test('should create excerpt for content beginning with admonitions', () => {
|
||||
expect(
|
||||
createExcerpt(dedent`
|
||||
import Component from '@site/src/components/Component'
|
||||
|
||||
:::caution
|
||||
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
||||
|
||||
:::
|
||||
|
||||
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
|
||||
`),
|
||||
).toEqual('Lorem ipsum dolor sit amet, consectetur adipiscing elit.');
|
||||
});
|
||||
|
||||
test('should create excerpt for content with imports/exports declarations and Markdown markup, as well as Emoji', () => {
|
||||
expect(
|
||||
createExcerpt(dedent`
|
||||
import Component from '@site/src/components/Component';
|
||||
import Component from '@site/src/components/Component'
|
||||
import './styles.css';
|
||||
|
||||
export function ItemCol(props) { return <Item {...props} className={'col col--6 margin-bottom--lg'}/> }
|
||||
|
||||
export function ItemCol(props) { return <Item {...props} className={'col col--6 margin-bottom--lg'}/> };
|
||||
|
||||
Lorem **ipsum** dolor sit \`amet\`[^1], consectetur _adipiscing_ elit. [**Vestibulum**](https://wiktionary.org/wiki/vestibulum) ex urna[^bignote], ~molestie~ et sagittis ut, varius ac justo :wink:.
|
||||
|
||||
Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
|
||||
`),
|
||||
).toEqual(
|
||||
'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseMarkdownContentTitle', () => {
|
||||
test('Should parse markdown h1 title at the top', () => {
|
||||
const markdown = dedent`
|
||||
|
||||
# Markdown Title
|
||||
|
||||
Lorem Ipsum
|
||||
|
||||
`;
|
||||
expect(parseMarkdownContentTitle(markdown)).toEqual({
|
||||
content: 'Lorem Ipsum',
|
||||
contentTitle: 'Markdown Title',
|
||||
});
|
||||
});
|
||||
|
||||
test('Should parse markdown h1 title at the top (atx style with closing #)', () => {
|
||||
const markdown = dedent`
|
||||
|
||||
# Markdown Title #
|
||||
|
||||
Lorem Ipsum
|
||||
|
||||
`;
|
||||
expect(parseMarkdownContentTitle(markdown)).toEqual({
|
||||
content: 'Lorem Ipsum',
|
||||
contentTitle: 'Markdown Title',
|
||||
});
|
||||
});
|
||||
|
||||
test('Should parse markdown h1 alternate title', () => {
|
||||
const markdown = dedent`
|
||||
|
||||
Markdown Title
|
||||
================
|
||||
|
||||
Lorem Ipsum
|
||||
|
||||
`;
|
||||
expect(parseMarkdownContentTitle(markdown)).toEqual({
|
||||
content: 'Lorem Ipsum',
|
||||
contentTitle: 'Markdown Title',
|
||||
});
|
||||
});
|
||||
|
||||
test('Should parse title-only', () => {
|
||||
const markdown = '# Document With Only A Title ';
|
||||
expect(parseMarkdownContentTitle(markdown)).toEqual({
|
||||
content: '',
|
||||
contentTitle: 'Document With Only A Title',
|
||||
});
|
||||
});
|
||||
|
||||
test('Should parse markdown h1 title at the top but keep it in content', () => {
|
||||
const markdown = dedent`
|
||||
|
||||
# Markdown Title
|
||||
|
||||
Lorem Ipsum
|
||||
|
||||
`;
|
||||
expect(
|
||||
parseMarkdownContentTitle(markdown, {keepContentTitle: true}),
|
||||
).toEqual({
|
||||
content: markdown.trim(),
|
||||
contentTitle: 'Markdown Title',
|
||||
});
|
||||
});
|
||||
|
||||
test('Should not parse markdown h1 title in the middle of a doc', () => {
|
||||
const markdown = dedent`
|
||||
|
||||
Lorem Ipsum
|
||||
|
||||
# Markdown Title
|
||||
|
||||
Lorem Ipsum
|
||||
|
||||
`;
|
||||
expect(parseMarkdownContentTitle(markdown)).toEqual({
|
||||
content: markdown,
|
||||
contentTitle: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
test('Should not parse markdown h1 alternate title in the middle of the doc', () => {
|
||||
const markdown = dedent`
|
||||
|
||||
Lorem Ipsum
|
||||
|
||||
Markdown Title
|
||||
================
|
||||
|
||||
Lorem Ipsum
|
||||
|
||||
`;
|
||||
expect(parseMarkdownContentTitle(markdown)).toEqual({
|
||||
content: markdown,
|
||||
contentTitle: undefined,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseMarkdownString', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
beforeEach(() => {
|
||||
warn.mockReset();
|
||||
});
|
||||
|
||||
function expectDuplicateTitleWarning() {
|
||||
expect(warn).toBeCalledWith(
|
||||
expect.stringMatching(/Duplicate title found in this file/),
|
||||
);
|
||||
}
|
||||
function expectNoWarning() {
|
||||
expect(warn).not.toBeCalled();
|
||||
}
|
||||
|
||||
test('parse markdown with frontmatter', () => {
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
---
|
||||
title: Frontmatter title
|
||||
---
|
||||
|
||||
Some text
|
||||
`),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "Some text",
|
||||
"contentTitle": undefined,
|
||||
"excerpt": "Some text",
|
||||
"frontMatter": Object {
|
||||
"title": "Frontmatter title",
|
||||
},
|
||||
}
|
||||
`);
|
||||
expectNoWarning();
|
||||
});
|
||||
|
||||
test('should parse first heading as contentTitle', () => {
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
# Markdown Title
|
||||
|
||||
Some text
|
||||
`),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "Some text",
|
||||
"contentTitle": "Markdown Title",
|
||||
"excerpt": "Some text",
|
||||
"frontMatter": Object {},
|
||||
}
|
||||
`);
|
||||
expectNoWarning();
|
||||
});
|
||||
|
||||
test('should warn about duplicate titles (frontmatter + markdown)', () => {
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
---
|
||||
title: Frontmatter title
|
||||
---
|
||||
|
||||
# Markdown Title
|
||||
|
||||
Some text
|
||||
`),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "Some text",
|
||||
"contentTitle": "Markdown Title",
|
||||
"excerpt": "Some text",
|
||||
"frontMatter": Object {
|
||||
"title": "Frontmatter title",
|
||||
},
|
||||
}
|
||||
`);
|
||||
expectDuplicateTitleWarning();
|
||||
});
|
||||
|
||||
test('should warn about duplicate titles (frontmatter + markdown alternate)', () => {
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
---
|
||||
title: Frontmatter title
|
||||
---
|
||||
|
||||
Markdown Title alternate
|
||||
================
|
||||
|
||||
Some text
|
||||
`),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "Some text",
|
||||
"contentTitle": "Markdown Title alternate",
|
||||
"excerpt": "Some text",
|
||||
"frontMatter": Object {
|
||||
"title": "Frontmatter title",
|
||||
},
|
||||
}
|
||||
`);
|
||||
expectDuplicateTitleWarning();
|
||||
});
|
||||
|
||||
test('should not warn for duplicate title if keepContentTitle=true', () => {
|
||||
expect(
|
||||
parseMarkdownString(
|
||||
dedent`
|
||||
---
|
||||
title: Frontmatter title
|
||||
---
|
||||
|
||||
# Markdown Title
|
||||
|
||||
Some text
|
||||
`,
|
||||
{keepContentTitle: true},
|
||||
),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "# Markdown Title
|
||||
|
||||
Some text",
|
||||
"contentTitle": "Markdown Title",
|
||||
"excerpt": "Some text",
|
||||
"frontMatter": Object {
|
||||
"title": "Frontmatter title",
|
||||
},
|
||||
}
|
||||
`);
|
||||
expectNoWarning();
|
||||
});
|
||||
|
||||
test('should not warn for duplicate title if markdown title is not at the top', () => {
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
---
|
||||
title: Frontmatter title
|
||||
---
|
||||
|
||||
foo
|
||||
|
||||
# Markdown Title
|
||||
`),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "foo
|
||||
|
||||
# Markdown Title",
|
||||
"contentTitle": undefined,
|
||||
"excerpt": "foo",
|
||||
"frontMatter": Object {
|
||||
"title": "Frontmatter title",
|
||||
},
|
||||
}
|
||||
`);
|
||||
expectNoWarning();
|
||||
});
|
||||
|
||||
test('should parse markdown title and keep it in content', () => {
|
||||
expect(
|
||||
parseMarkdownString(
|
||||
dedent`
|
||||
# Markdown Title
|
||||
`,
|
||||
{keepContentTitle: true},
|
||||
),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "# Markdown Title",
|
||||
"contentTitle": "Markdown Title",
|
||||
"excerpt": undefined,
|
||||
"frontMatter": Object {},
|
||||
}
|
||||
`);
|
||||
expectNoWarning();
|
||||
});
|
||||
|
||||
test('should delete only first heading', () => {
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
# Markdown Title
|
||||
|
||||
test test test # test bar
|
||||
|
||||
# Markdown Title 2
|
||||
|
||||
### Markdown Title h3
|
||||
`),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "test test test # test bar
|
||||
|
||||
# Markdown Title 2
|
||||
|
||||
### Markdown Title h3",
|
||||
"contentTitle": "Markdown Title",
|
||||
"excerpt": "test test test # test bar",
|
||||
"frontMatter": Object {},
|
||||
}
|
||||
`);
|
||||
expectNoWarning();
|
||||
});
|
||||
|
||||
test('should parse front-matter and ignore h2', () => {
|
||||
expect(
|
||||
parseMarkdownString(
|
||||
dedent`
|
||||
---
|
||||
title: Frontmatter title
|
||||
---
|
||||
## test
|
||||
`,
|
||||
),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "## test",
|
||||
"contentTitle": undefined,
|
||||
"excerpt": "test",
|
||||
"frontMatter": Object {
|
||||
"title": "Frontmatter title",
|
||||
},
|
||||
}
|
||||
`);
|
||||
expectNoWarning();
|
||||
});
|
||||
|
||||
test('should read front matter only', () => {
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
---
|
||||
title: test
|
||||
---
|
||||
`),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "",
|
||||
"contentTitle": undefined,
|
||||
"excerpt": undefined,
|
||||
"frontMatter": Object {
|
||||
"title": "test",
|
||||
},
|
||||
}
|
||||
`);
|
||||
expectNoWarning();
|
||||
});
|
||||
|
||||
test('should parse title only', () => {
|
||||
expect(parseMarkdownString('# test')).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "",
|
||||
"contentTitle": "test",
|
||||
"excerpt": undefined,
|
||||
"frontMatter": Object {},
|
||||
}
|
||||
`);
|
||||
expectNoWarning();
|
||||
});
|
||||
|
||||
test('should parse title only alternate', () => {
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
test
|
||||
===
|
||||
`),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "",
|
||||
"contentTitle": "test",
|
||||
"excerpt": undefined,
|
||||
"frontMatter": Object {},
|
||||
}
|
||||
`);
|
||||
expectNoWarning();
|
||||
});
|
||||
|
||||
test('should warn about duplicate titles', () => {
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
---
|
||||
title: Frontmatter title
|
||||
---
|
||||
# test
|
||||
`),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "",
|
||||
"contentTitle": "test",
|
||||
"excerpt": undefined,
|
||||
"frontMatter": Object {
|
||||
"title": "Frontmatter title",
|
||||
},
|
||||
}
|
||||
`);
|
||||
expectDuplicateTitleWarning();
|
||||
});
|
||||
|
||||
test('should ignore markdown title if its not a first text', () => {
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
foo
|
||||
# test
|
||||
`),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "foo
|
||||
# test",
|
||||
"contentTitle": undefined,
|
||||
"excerpt": "foo",
|
||||
"frontMatter": Object {},
|
||||
}
|
||||
`);
|
||||
expectNoWarning();
|
||||
});
|
||||
|
||||
test('should delete only first heading', () => {
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
# test
|
||||
|
||||
test test test test test test
|
||||
test test test # test bar
|
||||
# test2
|
||||
### test
|
||||
test3
|
||||
`),
|
||||
).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"content": "test test test test test test
|
||||
test test test # test bar
|
||||
# test2
|
||||
### test
|
||||
test3",
|
||||
"contentTitle": "test",
|
||||
"excerpt": "test test test test test test",
|
||||
"frontMatter": Object {},
|
||||
}
|
||||
`);
|
||||
expectNoWarning();
|
||||
});
|
||||
});
|
|
@ -1,177 +0,0 @@
|
|||
/**
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This source code is licensed under the MIT license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*/
|
||||
|
||||
import {parseMarkdownString, readFrontMatter} from '../index';
|
||||
import dedent from 'dedent';
|
||||
|
||||
describe('load utils: parseMarkdown', () => {
|
||||
describe('readFrontMatter', () => {
|
||||
test('should read front matter', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
expect(
|
||||
readFrontMatter(dedent`
|
||||
---
|
||||
title: test
|
||||
---
|
||||
`),
|
||||
).toMatchSnapshot();
|
||||
expect(warn).not.toBeCalled();
|
||||
});
|
||||
test('should parse first heading as title', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
expect(
|
||||
readFrontMatter(dedent`
|
||||
# test
|
||||
`),
|
||||
).toMatchSnapshot();
|
||||
expect(warn).not.toBeCalled();
|
||||
});
|
||||
test('should preserve front-matter title and warn about duplication', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
expect(
|
||||
readFrontMatter(dedent`
|
||||
---
|
||||
title: title
|
||||
---
|
||||
# test
|
||||
`),
|
||||
).toMatchSnapshot();
|
||||
expect(warn).toBeCalledWith('Duplicate title detected in `this` file');
|
||||
warn.mockReset();
|
||||
});
|
||||
test('should ignore heading if its not a first text', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
expect(
|
||||
readFrontMatter(dedent`
|
||||
foo
|
||||
# test
|
||||
`),
|
||||
).toMatchSnapshot();
|
||||
expect(warn).not.toBeCalled();
|
||||
});
|
||||
test('should parse first heading as title and keep it in content', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
expect(
|
||||
readFrontMatter(
|
||||
dedent`
|
||||
# test
|
||||
`,
|
||||
undefined,
|
||||
{},
|
||||
false,
|
||||
),
|
||||
).toMatchSnapshot();
|
||||
expect(warn).not.toBeCalled();
|
||||
});
|
||||
test('should delete only first heading', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
expect(
|
||||
readFrontMatter(dedent`
|
||||
# test
|
||||
test test test # test bar
|
||||
# test
|
||||
### test
|
||||
`),
|
||||
).toMatchSnapshot();
|
||||
expect(warn).not.toBeCalled();
|
||||
});
|
||||
test('should parse front-matter and ignore h2', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
expect(
|
||||
readFrontMatter(
|
||||
dedent`
|
||||
---
|
||||
title: title
|
||||
---
|
||||
## test
|
||||
`,
|
||||
undefined,
|
||||
{},
|
||||
false,
|
||||
),
|
||||
).toMatchSnapshot();
|
||||
expect(warn).not.toBeCalled();
|
||||
});
|
||||
test('should not warn about duplicated title', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
expect(
|
||||
readFrontMatter(
|
||||
dedent`
|
||||
---
|
||||
title: title
|
||||
---
|
||||
# test
|
||||
`,
|
||||
undefined,
|
||||
{},
|
||||
false,
|
||||
),
|
||||
).toMatchSnapshot();
|
||||
expect(warn).not.toBeCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseMarkdownString', () => {
|
||||
test('should read front matter', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
---
|
||||
title: test
|
||||
---
|
||||
`),
|
||||
).toMatchSnapshot();
|
||||
expect(warn).not.toBeCalled();
|
||||
});
|
||||
test('should parse first heading as title', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
# test
|
||||
`),
|
||||
).toMatchSnapshot();
|
||||
expect(warn).not.toBeCalled();
|
||||
});
|
||||
test('should preserve front-matter title and warn about duplication', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
---
|
||||
title: title
|
||||
---
|
||||
# test
|
||||
`),
|
||||
).toMatchSnapshot();
|
||||
expect(warn).toBeCalledWith('Duplicate title detected in `this` file');
|
||||
warn.mockReset();
|
||||
});
|
||||
test('should ignore heading if its not a first text', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
foo
|
||||
# test
|
||||
`),
|
||||
).toMatchSnapshot();
|
||||
expect(warn).not.toBeCalled();
|
||||
});
|
||||
test('should delete only first heading', () => {
|
||||
const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
expect(
|
||||
parseMarkdownString(dedent`
|
||||
# test
|
||||
|
||||
test test test test test test
|
||||
test test test # test bar
|
||||
# test
|
||||
### test
|
||||
`),
|
||||
).toMatchSnapshot();
|
||||
expect(warn).not.toBeCalled();
|
||||
});
|
||||
});
|
||||
});
|
|
@ -7,7 +7,6 @@
|
|||
|
||||
import chalk from 'chalk';
|
||||
import path from 'path';
|
||||
import matter from 'gray-matter';
|
||||
import {createHash} from 'crypto';
|
||||
import {camelCase, kebabCase, mapValues} from 'lodash';
|
||||
import escapeStringRegexp from 'escape-string-regexp';
|
||||
|
@ -23,6 +22,8 @@ import {
|
|||
import resolvePathnameUnsafe from 'resolve-pathname';
|
||||
|
||||
export * from './codeTranslationsUtils';
|
||||
export * from './markdownParser';
|
||||
export * from './markdownLinks';
|
||||
|
||||
const fileHash = new Map();
|
||||
export async function generate(
|
||||
|
@ -206,135 +207,6 @@ export function getSubFolder(file: string, refDir: string): string | null {
|
|||
return match && match[1];
|
||||
}
|
||||
|
||||
export function createExcerpt(fileString: string): string | undefined {
|
||||
const fileLines = fileString.trimLeft().split('\n');
|
||||
|
||||
/* eslint-disable no-continue */
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
for (const fileLine of fileLines) {
|
||||
// Skip empty line.
|
||||
if (!fileLine.trim()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip import/export declaration.
|
||||
if (/^\s*?import\s.*(from.*)?;?|export\s.*{.*};?/.test(fileLine)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const cleanedLine = fileLine
|
||||
// Remove HTML tags.
|
||||
.replace(/<[^>]*>/g, '')
|
||||
// Remove ATX-style headers.
|
||||
.replace(/^\#{1,6}\s*([^#]*)\s*(\#{1,6})?/gm, '$1')
|
||||
// Remove emphasis and strikethroughs.
|
||||
.replace(/([\*_~]{1,3})(\S.*?\S{0,1})\1/g, '$2')
|
||||
// Remove images.
|
||||
.replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
|
||||
// Remove footnotes.
|
||||
.replace(/\[\^.+?\](\: .*?$)?/g, '')
|
||||
// Remove inline links.
|
||||
.replace(/\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
|
||||
// Remove inline code.
|
||||
.replace(/`(.+?)`/g, '$1')
|
||||
// Remove blockquotes.
|
||||
.replace(/^\s{0,3}>\s?/g, '')
|
||||
// Remove admonition definition.
|
||||
.replace(/(:{3}.*)/, '')
|
||||
// Remove Emoji names within colons include preceding whitespace.
|
||||
.replace(/\s?(:(::|[^:\n])+:)/g, '')
|
||||
.trim();
|
||||
|
||||
if (cleanedLine) {
|
||||
return cleanedLine;
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
type ParsedMarkdown = {
|
||||
// Returned by gray-matter
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
frontMatter: Record<string, any>;
|
||||
content: string;
|
||||
excerpt: string | undefined;
|
||||
hasFrontMatter: boolean;
|
||||
};
|
||||
|
||||
export function readFrontMatter(
|
||||
markdownString: string,
|
||||
source?: string,
|
||||
options: Record<string, unknown> = {},
|
||||
removeTitleHeading = true,
|
||||
): ParsedMarkdown {
|
||||
try {
|
||||
const result = matter(markdownString, options);
|
||||
result.data = result.data || {};
|
||||
result.content = result.content.trim();
|
||||
|
||||
const hasFrontMatter = Object.keys(result.data).length > 0;
|
||||
|
||||
const heading = /^# (.*)[\n\r]?/gi.exec(result.content);
|
||||
if (heading) {
|
||||
if (result.data.title) {
|
||||
if (removeTitleHeading) {
|
||||
console.warn(
|
||||
`Duplicate title detected in \`${source || 'this'}\` file`,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
result.data.title = heading[1].trim();
|
||||
if (removeTitleHeading) {
|
||||
result.content = result.content.replace(heading[0], '');
|
||||
if (result.excerpt) {
|
||||
result.excerpt = result.excerpt.replace(heading[1], '');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
frontMatter: result.data,
|
||||
content: result.content,
|
||||
excerpt: result.excerpt,
|
||||
hasFrontMatter,
|
||||
};
|
||||
} catch (e) {
|
||||
throw new Error(`Error while parsing markdown front matter.
|
||||
This can happen if you use special characters like : in frontmatter values (try using "" around that value)
|
||||
${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function parseMarkdownString(
|
||||
markdownString: string,
|
||||
source?: string,
|
||||
): ParsedMarkdown {
|
||||
return readFrontMatter(markdownString, source, {
|
||||
excerpt: (file: matter.GrayMatterFile<string>): void => {
|
||||
// Hacky way of stripping out import statements from the excerpt
|
||||
// TODO: Find a better way to do so, possibly by compiling the Markdown content,
|
||||
// stripping out HTML tags and obtaining the first line.
|
||||
file.excerpt = createExcerpt(file.content);
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export async function parseMarkdownFile(
|
||||
source: string,
|
||||
): Promise<ParsedMarkdown> {
|
||||
const markdownString = await fs.readFile(source, 'utf-8');
|
||||
try {
|
||||
return parseMarkdownString(markdownString, source);
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
`Error while parsing markdown file ${source}
|
||||
${e.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export function normalizeUrl(rawUrls: string[]): string {
|
||||
const urls = rawUrls;
|
||||
const resultArray = [];
|
||||
|
|
185
packages/docusaurus-utils/src/markdownParser.ts
Normal file
185
packages/docusaurus-utils/src/markdownParser.ts
Normal file
|
@ -0,0 +1,185 @@
|
|||
/**
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This source code is licensed under the MIT license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*/
|
||||
|
||||
import chalk from 'chalk';
|
||||
import fs from 'fs-extra';
|
||||
import matter from 'gray-matter';
|
||||
|
||||
// Hacky way of stripping out import statements from the excerpt
|
||||
// TODO: Find a better way to do so, possibly by compiling the Markdown content,
|
||||
// stripping out HTML tags and obtaining the first line.
|
||||
export function createExcerpt(fileString: string): string | undefined {
|
||||
const fileLines = fileString
|
||||
.trimLeft()
|
||||
// Remove Markdown alternate title
|
||||
.replace(/^[^\n]*\n[=]+/g, '')
|
||||
.split('\n');
|
||||
|
||||
/* eslint-disable no-continue */
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
for (const fileLine of fileLines) {
|
||||
// Skip empty line.
|
||||
if (!fileLine.trim()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip import/export declaration.
|
||||
if (/^\s*?import\s.*(from.*)?;?|export\s.*{.*};?/.test(fileLine)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const cleanedLine = fileLine
|
||||
// Remove HTML tags.
|
||||
.replace(/<[^>]*>/g, '')
|
||||
// Remove Title headers
|
||||
.replace(/^\#\s*([^#]*)\s*\#?/gm, '')
|
||||
// Remove Markdown + ATX-style headers
|
||||
.replace(/^\#{1,6}\s*([^#]*)\s*(\#{1,6})?/gm, '$1')
|
||||
// Remove emphasis and strikethroughs.
|
||||
.replace(/([\*_~]{1,3})(\S.*?\S{0,1})\1/g, '$2')
|
||||
// Remove images.
|
||||
.replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
|
||||
// Remove footnotes.
|
||||
.replace(/\[\^.+?\](\: .*?$)?/g, '')
|
||||
// Remove inline links.
|
||||
.replace(/\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
|
||||
// Remove inline code.
|
||||
.replace(/`(.+?)`/g, '$1')
|
||||
// Remove blockquotes.
|
||||
.replace(/^\s{0,3}>\s?/g, '')
|
||||
// Remove admonition definition.
|
||||
.replace(/(:{3}.*)/, '')
|
||||
// Remove Emoji names within colons include preceding whitespace.
|
||||
.replace(/\s?(:(::|[^:\n])+:)/g, '')
|
||||
.trim();
|
||||
|
||||
if (cleanedLine) {
|
||||
return cleanedLine;
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function parseFrontMatter(
|
||||
markdownFileContent: string,
|
||||
): {
|
||||
frontMatter: Record<string, unknown>;
|
||||
content: string;
|
||||
} {
|
||||
const {data, content} = matter(markdownFileContent);
|
||||
return {
|
||||
frontMatter: data ?? {},
|
||||
content: content?.trim() ?? '',
|
||||
};
|
||||
}
|
||||
|
||||
export function parseMarkdownContentTitle(
|
||||
contentUntrimmed: string,
|
||||
options?: {keepContentTitle?: boolean},
|
||||
): {content: string; contentTitle: string | undefined} {
|
||||
const keepContentTitleOption = options?.keepContentTitle ?? false;
|
||||
|
||||
const content = contentUntrimmed.trim();
|
||||
|
||||
const regularTitleMatch = /^(?<pattern>#\s*(?<title>[^#\n]*)+\s*#*[\s\r]*?\n*?)/g.exec(
|
||||
content,
|
||||
);
|
||||
const alternateTitleMatch = /^(?<pattern>\s*(?<title>[^\n]*)\s*\n[=]+)/g.exec(
|
||||
content,
|
||||
);
|
||||
|
||||
const titleMatch = regularTitleMatch ?? alternateTitleMatch;
|
||||
const {pattern, title} = titleMatch?.groups ?? {};
|
||||
|
||||
if (!pattern || !title) {
|
||||
return {content, contentTitle: undefined};
|
||||
}
|
||||
|
||||
const newContent = keepContentTitleOption
|
||||
? content
|
||||
: content.replace(pattern, '');
|
||||
|
||||
return {
|
||||
content: newContent.trim(),
|
||||
contentTitle: title.trim(),
|
||||
};
|
||||
}
|
||||
|
||||
type ParsedMarkdown = {
|
||||
frontMatter: Record<string, unknown>;
|
||||
content: string;
|
||||
contentTitle: string | undefined;
|
||||
excerpt: string | undefined;
|
||||
};
|
||||
|
||||
export function parseMarkdownString(
|
||||
markdownFileContent: string,
|
||||
options?: {
|
||||
source?: string;
|
||||
keepContentTitle?: boolean;
|
||||
},
|
||||
): ParsedMarkdown {
|
||||
try {
|
||||
const sourceOption = options?.source;
|
||||
const keepContentTitle = options?.keepContentTitle ?? false;
|
||||
|
||||
const {frontMatter, content: contentWithoutFrontMatter} = parseFrontMatter(
|
||||
markdownFileContent,
|
||||
);
|
||||
|
||||
const {content, contentTitle} = parseMarkdownContentTitle(
|
||||
contentWithoutFrontMatter,
|
||||
{
|
||||
keepContentTitle,
|
||||
},
|
||||
);
|
||||
|
||||
const excerpt = createExcerpt(content);
|
||||
|
||||
// TODO not sure this is a good place for this warning
|
||||
if (
|
||||
frontMatter.title &&
|
||||
contentTitle &&
|
||||
!keepContentTitle &&
|
||||
!(process.env.DOCUSAURUS_NO_DUPLICATE_TITLE_WARNING === 'false')
|
||||
) {
|
||||
console.warn(
|
||||
chalk.yellow(`Duplicate title found in ${sourceOption ?? 'this'} file.
|
||||
Use either a frontmatter title or a markdown title, not both.
|
||||
If this is annoying you, use env DOCUSAURUS_NO_DUPLICATE_TITLE_WARNING=false`),
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
frontMatter,
|
||||
content,
|
||||
contentTitle,
|
||||
excerpt,
|
||||
};
|
||||
} catch (e) {
|
||||
console.error(
|
||||
chalk.red(`Error while parsing markdown front matter.
|
||||
This can happen if you use special characters like : in frontmatter values (try using "" around that value)`),
|
||||
);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
export async function parseMarkdownFile(
|
||||
source: string,
|
||||
): Promise<ParsedMarkdown> {
|
||||
const markdownString = await fs.readFile(source, 'utf-8');
|
||||
try {
|
||||
return parseMarkdownString(markdownString, {source});
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
`Error while parsing markdown file ${source}
|
||||
${e.message}`,
|
||||
);
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue