diff --git a/packages/docusaurus-mdx-loader/src/index.js b/packages/docusaurus-mdx-loader/src/index.js index c41e8544ad..ded71343c1 100644 --- a/packages/docusaurus-mdx-loader/src/index.js +++ b/packages/docusaurus-mdx-loader/src/index.js @@ -9,7 +9,10 @@ const {getOptions} = require('loader-utils'); const {readFile} = require('fs-extra'); const mdx = require('@mdx-js/mdx'); const emoji = require('remark-emoji'); -const {readFrontMatter} = require('@docusaurus/utils'); +const { + parseFrontMatter, + parseMarkdownContentTitle, +} = require('@docusaurus/utils'); const stringifyObject = require('stringify-object'); const headings = require('./remark/headings'); const toc = require('./remark/toc'); @@ -26,12 +29,14 @@ module.exports = async function docusaurusMdxLoader(fileString) { const callback = this.async(); const reqOptions = getOptions(this) || {}; - const {frontMatter, content, hasFrontMatter} = readFrontMatter( - fileString, - this.resourcePath, - {}, - reqOptions.removeTitleHeading, - ); + const {frontMatter, content: contentWithTitle} = parseFrontMatter(fileString); + + // By default, will remove the markdown title from the content + const {content} = parseMarkdownContentTitle(contentWithTitle, { + keepContentTitle: reqOptions.keepContentTitle, + }); + + const hasFrontMatter = Object.keys(frontMatter).length > 0; const options = { ...reqOptions, diff --git a/packages/docusaurus-plugin-content-blog/src/__tests__/index.test.ts b/packages/docusaurus-plugin-content-blog/src/__tests__/index.test.ts index 436c1f61c5..898efa421b 100644 --- a/packages/docusaurus-plugin-content-blog/src/__tests__/index.test.ts +++ b/packages/docusaurus-plugin-content-blog/src/__tests__/index.test.ts @@ -12,9 +12,26 @@ import path from 'path'; import pluginContentBlog from '../index'; import {DocusaurusConfig, LoadContext, I18n} from '@docusaurus/types'; import {PluginOptionSchema} from '../pluginOptionSchema'; -import {PluginOptions, EditUrlFunction} from '../types'; +import {PluginOptions, EditUrlFunction, BlogPost} from '../types'; import {Joi} from '@docusaurus/utils-validation'; +function findByTitle( + blogPosts: BlogPost[], + title: string, +): BlogPost | undefined { + return blogPosts.find((v) => v.metadata.title === title); +} +function getByTitle(blogPosts: BlogPost[], title: string): BlogPost { + const post = findByTitle(blogPosts, title); + if (!post) { + throw new Error(`can't find blog post with title ${title}. +Available blog post titles are:\n- ${blogPosts + .map((p) => p.metadata.title) + .join('\n- ')}`); + } + return post; +} + function getI18n(locale: string): I18n { return { currentLocale: locale, @@ -77,7 +94,7 @@ describe('loadBlog', () => { const blogPosts = await getBlogPosts(siteDir); expect({ - ...blogPosts.find((v) => v.metadata.title === 'date-matter')!.metadata, + ...getByTitle(blogPosts, 'date-matter').metadata, ...{prevItem: undefined}, }).toEqual({ editUrl: `${BaseEditUrl}/blog/date-matter.md`, @@ -98,9 +115,7 @@ describe('loadBlog', () => { }); expect( - blogPosts.find( - (v) => v.metadata.title === 'Happy 1st Birthday Slash! (translated)', - )!.metadata, + getByTitle(blogPosts, 'Happy 1st Birthday Slash! (translated)').metadata, ).toEqual({ editUrl: `${BaseEditUrl}/blog/2018-12-14-Happy-First-Birthday-Slash.md`, permalink: '/blog/2018/12/14/Happy-First-Birthday-Slash', @@ -124,7 +139,7 @@ describe('loadBlog', () => { }); expect({ - ...blogPosts.find((v) => v.metadata.title === 'Complex Slug')!.metadata, + ...getByTitle(blogPosts, 'Complex Slug').metadata, ...{prevItem: undefined}, }).toEqual({ editUrl: `${BaseEditUrl}/blog/complex-slug.md`, @@ -145,7 +160,7 @@ describe('loadBlog', () => { }); expect({ - ...blogPosts.find((v) => v.metadata.title === 'Simple Slug')!.metadata, + ...getByTitle(blogPosts, 'Simple Slug').metadata, ...{prevItem: undefined}, }).toEqual({ editUrl: `${BaseEditUrl}/blog/simple-slug.md`, @@ -166,7 +181,7 @@ describe('loadBlog', () => { }); expect({ - ...blogPosts.find((v) => v.metadata.title === 'some heading')!.metadata, + ...getByTitle(blogPosts, 'some heading').metadata, prevItem: undefined, }).toEqual({ editUrl: `${BaseEditUrl}/blog/heading-as-title.md`, @@ -301,7 +316,7 @@ describe('loadBlog', () => { }).format(noDateSourceBirthTime); expect({ - ...blogPosts.find((v) => v.metadata.title === 'no date')!.metadata, + ...getByTitle(blogPosts, 'no date').metadata, ...{prevItem: undefined}, }).toEqual({ editUrl: `${BaseEditUrl}/blog/no date.md`, diff --git a/packages/docusaurus-plugin-content-blog/src/blogFrontMatter.ts b/packages/docusaurus-plugin-content-blog/src/blogFrontMatter.ts new file mode 100644 index 0000000000..660a7874cb --- /dev/null +++ b/packages/docusaurus-plugin-content-blog/src/blogFrontMatter.ts @@ -0,0 +1,43 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +import {Joi} from '@docusaurus/utils-validation'; +import {Tag} from './types'; + +// TODO complete this frontmatter + add unit tests +type BlogPostFrontMatter = { + id?: string; + title?: string; + description?: string; + tags?: (string | Tag)[]; + slug?: string; + draft?: boolean; + date?: string; +}; + +const BlogTagSchema = Joi.alternatives().try( + Joi.string().required(), + Joi.object({ + label: Joi.string().required(), + permalink: Joi.string().required(), + }), +); + +const BlogFrontMatterSchema = Joi.object({ + id: Joi.string(), + title: Joi.string(), + description: Joi.string(), + tags: Joi.array().items(BlogTagSchema), + slug: Joi.string(), + draft: Joi.boolean(), +}).unknown(); + +export function assertBlogPostFrontMatter( + frontMatter: Record, +): asserts frontMatter is BlogPostFrontMatter { + Joi.attempt(frontMatter, BlogFrontMatterSchema); +} diff --git a/packages/docusaurus-plugin-content-blog/src/blogUtils.ts b/packages/docusaurus-plugin-content-blog/src/blogUtils.ts index c0465346eb..eace24dd31 100644 --- a/packages/docusaurus-plugin-content-blog/src/blogUtils.ts +++ b/packages/docusaurus-plugin-content-blog/src/blogUtils.ts @@ -26,9 +26,10 @@ import { getEditUrl, getFolderContainingFile, posixPath, + replaceMarkdownLinks, } from '@docusaurus/utils'; import {LoadContext} from '@docusaurus/types'; -import {replaceMarkdownLinks} from '@docusaurus/utils/lib/markdownLinks'; +import {assertBlogPostFrontMatter} from './blogFrontMatter'; export function truncate(fileString: string, truncateMarker: RegExp): string { return fileString.split(truncateMarker, 1).shift()!; @@ -140,12 +141,18 @@ export async function generateBlogPosts( const source = path.join(blogDirPath, blogSourceFile); + const { + frontMatter, + content, + contentTitle, + excerpt, + } = await parseMarkdownFile(source); + assertBlogPostFrontMatter(frontMatter); + const aliasedSource = aliasedSitePath(source, siteDir); const blogFileName = path.basename(blogSourceFile); - const {frontMatter, content, excerpt} = await parseMarkdownFile(source); - if (frontMatter.draft && process.env.NODE_ENV === 'production') { return; } @@ -182,9 +189,11 @@ export async function generateBlogPosts( year: 'numeric', }).format(date); + const title = frontMatter.title ?? contentTitle ?? linkName; + const description = frontMatter.description ?? excerpt ?? ''; + const slug = frontMatter.slug || (match ? toUrl({date, link: linkName}) : linkName); - frontMatter.title = frontMatter.title || linkName; const permalink = normalizeUrl([baseUrl, routeBasePath, slug]); @@ -220,16 +229,16 @@ export async function generateBlogPosts( } blogPosts.push({ - id: frontMatter.slug || frontMatter.title, + id: frontMatter.slug ?? title, metadata: { permalink, editUrl: getBlogEditUrl(), source: aliasedSource, - description: frontMatter.description || excerpt, + title, + description, date, formattedDate, - tags: frontMatter.tags, - title: frontMatter.title, + tags: frontMatter.tags ?? [], readingTime: showReadingTime ? readingTime(content).minutes : undefined, diff --git a/packages/docusaurus-plugin-content-docs/src/__tests__/__snapshots__/index.test.ts.snap b/packages/docusaurus-plugin-content-docs/src/__tests__/__snapshots__/index.test.ts.snap index 6ed8d53aa1..95a79b9d53 100644 --- a/packages/docusaurus-plugin-content-docs/src/__tests__/__snapshots__/index.test.ts.snap +++ b/packages/docusaurus-plugin-content-docs/src/__tests__/__snapshots__/index.test.ts.snap @@ -198,9 +198,7 @@ Object { \\"slug\\": \\"/headingAsTitle\\", \\"permalink\\": \\"/docs/headingAsTitle\\", \\"version\\": \\"current\\", - \\"frontMatter\\": { - \\"title\\": \\"My heading as title\\" - } + \\"frontMatter\\": {} }", "site-docs-hello-md-9df.json": "{ \\"unversionedId\\": \\"hello\\", diff --git a/packages/docusaurus-plugin-content-docs/src/docFrontMatter.ts b/packages/docusaurus-plugin-content-docs/src/docFrontMatter.ts new file mode 100644 index 0000000000..b235bb779e --- /dev/null +++ b/packages/docusaurus-plugin-content-docs/src/docFrontMatter.ts @@ -0,0 +1,33 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +import {Joi} from '@docusaurus/utils-validation'; + +// TODO complete this frontmatter + add unit tests +type DocFrontMatter = { + id?: string; + title?: string; + description?: string; + slug?: string; + sidebar_label?: string; + custom_edit_url?: string; +}; + +const DocFrontMatterSchema = Joi.object({ + id: Joi.string(), + title: Joi.string(), + description: Joi.string(), + slug: Joi.string(), + sidebar_label: Joi.string(), + custom_edit_url: Joi.string().allow(null), +}).unknown(); + +export function assertDocFrontMatter( + frontMatter: Record, +): asserts frontMatter is DocFrontMatter { + Joi.attempt(frontMatter, DocFrontMatterSchema); +} diff --git a/packages/docusaurus-plugin-content-docs/src/docs.ts b/packages/docusaurus-plugin-content-docs/src/docs.ts index 592b998439..632113c60d 100644 --- a/packages/docusaurus-plugin-content-docs/src/docs.ts +++ b/packages/docusaurus-plugin-content-docs/src/docs.ts @@ -30,6 +30,7 @@ import getSlug from './slug'; import {CURRENT_VERSION_NAME} from './constants'; import globby from 'globby'; import {getDocsDirPaths} from './versions'; +import {assertDocFrontMatter} from './docFrontMatter'; type LastUpdateOptions = Pick< PluginOptions, @@ -115,11 +116,15 @@ export function processDocMetadata({ const {homePageId} = options; const {siteDir, i18n} = context; + const {frontMatter, contentTitle, excerpt} = parseMarkdownString(content, { + source, + }); + assertDocFrontMatter(frontMatter); + // ex: api/myDoc -> api // ex: myDoc -> . const docsFileDirName = path.dirname(source); - const {frontMatter = {}, excerpt} = parseMarkdownString(content, source); const { sidebar_label: sidebarLabel, custom_edit_url: customEditURL, @@ -165,9 +170,9 @@ export function processDocMetadata({ }); // Default title is the id. - const title: string = frontMatter.title || baseID; + const title: string = frontMatter.title ?? contentTitle ?? baseID; - const description: string = frontMatter.description || excerpt; + const description: string = frontMatter.description ?? excerpt ?? ''; const permalink = normalizeUrl([versionMetadata.versionPath, docSlug]); diff --git a/packages/docusaurus-plugin-content-docs/src/markdown/linkify.ts b/packages/docusaurus-plugin-content-docs/src/markdown/linkify.ts index 3c772dd14d..6490739ab7 100644 --- a/packages/docusaurus-plugin-content-docs/src/markdown/linkify.ts +++ b/packages/docusaurus-plugin-content-docs/src/markdown/linkify.ts @@ -7,7 +7,7 @@ import {DocsMarkdownOption} from '../types'; import {getDocsDirPaths} from '../versions'; -import {replaceMarkdownLinks} from '@docusaurus/utils/lib/markdownLinks'; +import {replaceMarkdownLinks} from '@docusaurus/utils'; function getVersion(filePath: string, options: DocsMarkdownOption) { const versionFound = options.versionsMetadata.find((version) => diff --git a/packages/docusaurus-plugin-content-pages/src/index.ts b/packages/docusaurus-plugin-content-pages/src/index.ts index 816ead0ec6..7b72f84381 100644 --- a/packages/docusaurus-plugin-content-pages/src/index.ts +++ b/packages/docusaurus-plugin-content-pages/src/index.ts @@ -223,7 +223,7 @@ export default function pluginContentPages( rehypePlugins, beforeDefaultRehypePlugins, beforeDefaultRemarkPlugins, - removeTitleHeading: false, + keepContentTitle: true, staticDir: path.join(siteDir, STATIC_DIR_NAME), // Note that metadataPath must be the same/in-sync as // the path from createData for each MDX. diff --git a/packages/docusaurus-utils/src/__tests__/__snapshots__/parseMarkdown.test.ts.snap b/packages/docusaurus-utils/src/__tests__/__snapshots__/parseMarkdown.test.ts.snap deleted file mode 100644 index 8163fed6fb..0000000000 --- a/packages/docusaurus-utils/src/__tests__/__snapshots__/parseMarkdown.test.ts.snap +++ /dev/null @@ -1,148 +0,0 @@ -// Jest Snapshot v1, https://goo.gl/fbAQLP - -exports[`load utils: parseMarkdown parseMarkdownString should delete only first heading 1`] = ` -Object { - "content": " -test test test test test test -test test test # test bar -# test -### test", - "excerpt": "", - "frontMatter": Object { - "title": "test", - }, - "hasFrontMatter": false, -} -`; - -exports[`load utils: parseMarkdown parseMarkdownString should ignore heading if its not a first text 1`] = ` -Object { - "content": "foo -# test", - "excerpt": "foo", - "frontMatter": Object {}, - "hasFrontMatter": false, -} -`; - -exports[`load utils: parseMarkdown parseMarkdownString should parse first heading as title 1`] = ` -Object { - "content": "", - "excerpt": "", - "frontMatter": Object { - "title": "test", - }, - "hasFrontMatter": false, -} -`; - -exports[`load utils: parseMarkdown parseMarkdownString should preserve front-matter title and warn about duplication 1`] = ` -Object { - "content": "# test", - "excerpt": "test", - "frontMatter": Object { - "title": "title", - }, - "hasFrontMatter": true, -} -`; - -exports[`load utils: parseMarkdown parseMarkdownString should read front matter 1`] = ` -Object { - "content": "", - "excerpt": undefined, - "frontMatter": Object { - "title": "test", - }, - "hasFrontMatter": true, -} -`; - -exports[`load utils: parseMarkdown readFrontMatter should delete only first heading 1`] = ` -Object { - "content": "test test test # test bar -# test -### test", - "excerpt": "", - "frontMatter": Object { - "title": "test", - }, - "hasFrontMatter": false, -} -`; - -exports[`load utils: parseMarkdown readFrontMatter should ignore heading if its not a first text 1`] = ` -Object { - "content": "foo -# test", - "excerpt": "", - "frontMatter": Object {}, - "hasFrontMatter": false, -} -`; - -exports[`load utils: parseMarkdown readFrontMatter should not warn about duplicated title 1`] = ` -Object { - "content": "# test", - "excerpt": "", - "frontMatter": Object { - "title": "title", - }, - "hasFrontMatter": true, -} -`; - -exports[`load utils: parseMarkdown readFrontMatter should parse first heading as title 1`] = ` -Object { - "content": "", - "excerpt": "", - "frontMatter": Object { - "title": "test", - }, - "hasFrontMatter": false, -} -`; - -exports[`load utils: parseMarkdown readFrontMatter should parse first heading as title and keep it in content 1`] = ` -Object { - "content": "# test", - "excerpt": "", - "frontMatter": Object { - "title": "test", - }, - "hasFrontMatter": false, -} -`; - -exports[`load utils: parseMarkdown readFrontMatter should parse front-matter and ignore h2 1`] = ` -Object { - "content": "## test", - "excerpt": "", - "frontMatter": Object { - "title": "title", - }, - "hasFrontMatter": true, -} -`; - -exports[`load utils: parseMarkdown readFrontMatter should preserve front-matter title and warn about duplication 1`] = ` -Object { - "content": "# test", - "excerpt": "", - "frontMatter": Object { - "title": "title", - }, - "hasFrontMatter": true, -} -`; - -exports[`load utils: parseMarkdown readFrontMatter should read front matter 1`] = ` -Object { - "content": "", - "excerpt": "", - "frontMatter": Object { - "title": "test", - }, - "hasFrontMatter": true, -} -`; diff --git a/packages/docusaurus-utils/src/__tests__/index.test.ts b/packages/docusaurus-utils/src/__tests__/index.test.ts index 1f9881c2e5..f83e9f6117 100644 --- a/packages/docusaurus-utils/src/__tests__/index.test.ts +++ b/packages/docusaurus-utils/src/__tests__/index.test.ts @@ -18,7 +18,6 @@ import { posixPath, objectWithKeySorted, aliasedSitePath, - createExcerpt, isValidPathname, addTrailingSlash, removeTrailingSlash, @@ -372,81 +371,6 @@ describe('load utils', () => { ); }); - test('createExcerpt', () => { - const asserts = [ - // Regular content - { - input: ` - Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo. - - Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis. - `, - output: - 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.', - }, - // Content with imports/exports declarations and Markdown markup, as well as Emoji - { - input: ` - import Component from '@site/src/components/Component'; - import Component from '@site/src/components/Component' - import './styles.css'; - - export function ItemCol(props) { return } - - export function ItemCol(props) { return }; - - Lorem **ipsum** dolor sit \`amet\`[^1], consectetur _adipiscing_ elit. [**Vestibulum**](https://wiktionary.org/wiki/vestibulum) ex urna[^bignote], ~molestie~ et sagittis ut, varius ac justo :wink:. - - Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis. - `, - output: - 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.', - }, - // Content beginning with admonitions - { - input: ` - import Component from '@site/src/components/Component' - - :::caution - - Lorem ipsum dolor sit amet, consectetur adipiscing elit. - - ::: - - Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis. - `, - output: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.', - }, - // Content beginning with heading - { - input: ` - ## Lorem ipsum dolor sit amet - - Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis. - `, - output: 'Lorem ipsum dolor sit amet', - }, - // Content beginning with blockquote - { - input: ` - > Lorem ipsum dolor sit amet - `, - output: 'Lorem ipsum dolor sit amet', - }, - // Content beginning with image (eg. blog post) - { - input: ` - ![Lorem ipsum](/img/lorem-ipsum.svg) - `, - output: 'Lorem ipsum', - }, - ]; - - asserts.forEach((testCase) => { - expect(createExcerpt(testCase.input)).toEqual(testCase.output); - }); - }); - test('isValidPathname', () => { expect(isValidPathname('/')).toBe(true); expect(isValidPathname('/hey')).toBe(true); diff --git a/packages/docusaurus-utils/src/__tests__/markdownParser.test.ts b/packages/docusaurus-utils/src/__tests__/markdownParser.test.ts new file mode 100644 index 0000000000..c4870bec6b --- /dev/null +++ b/packages/docusaurus-utils/src/__tests__/markdownParser.test.ts @@ -0,0 +1,568 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +import { + createExcerpt, + parseMarkdownContentTitle, + parseMarkdownString, +} from '../markdownParser'; +import dedent from 'dedent'; + +describe('createExcerpt', () => { + test('should create excerpt for text-only content', () => { + expect( + createExcerpt(dedent` + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo. + + Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis. + `), + ).toEqual( + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.', + ); + }); + + test('should create excerpt for regular content with regular title', () => { + expect( + createExcerpt(dedent` + + # Markdown Regular Title + + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo. + + Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis. + `), + ).toEqual( + // h1 title is skipped on purpose, because we don't want the page to have SEO metadatas title === description + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.', + ); + }); + + test('should create excerpt for regular content with alternate title', () => { + expect( + createExcerpt(dedent` + + Markdown Alternate Title + ================ + + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo. + + Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis. + `), + ).toEqual( + // h1 title is skipped on purpose, because we don't want the page to have SEO metadatas title === description + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.', + ); + }); + + test('should create excerpt for content with h2 heading', () => { + expect( + createExcerpt(dedent` + ## Lorem ipsum dolor sit amet + + Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis. + `), + ).toEqual('Lorem ipsum dolor sit amet'); + }); + + test('should create excerpt for content beginning with blockquote', () => { + expect( + createExcerpt(dedent` + > Lorem ipsum dolor sit amet + + Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis. + `), + ).toEqual('Lorem ipsum dolor sit amet'); + }); + + test('should create excerpt for content beginning with image (eg. blog post)', () => { + expect( + createExcerpt(dedent` + ![Lorem ipsum](/img/lorem-ipsum.svg) + `), + ).toEqual('Lorem ipsum'); + }); + + test('should create excerpt for content beginning with admonitions', () => { + expect( + createExcerpt(dedent` + import Component from '@site/src/components/Component' + + :::caution + + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + ::: + + Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis. + `), + ).toEqual('Lorem ipsum dolor sit amet, consectetur adipiscing elit.'); + }); + + test('should create excerpt for content with imports/exports declarations and Markdown markup, as well as Emoji', () => { + expect( + createExcerpt(dedent` + import Component from '@site/src/components/Component'; + import Component from '@site/src/components/Component' + import './styles.css'; + + export function ItemCol(props) { return } + + export function ItemCol(props) { return }; + + Lorem **ipsum** dolor sit \`amet\`[^1], consectetur _adipiscing_ elit. [**Vestibulum**](https://wiktionary.org/wiki/vestibulum) ex urna[^bignote], ~molestie~ et sagittis ut, varius ac justo :wink:. + + Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis. + `), + ).toEqual( + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.', + ); + }); +}); + +describe('parseMarkdownContentTitle', () => { + test('Should parse markdown h1 title at the top', () => { + const markdown = dedent` + + # Markdown Title + + Lorem Ipsum + + `; + expect(parseMarkdownContentTitle(markdown)).toEqual({ + content: 'Lorem Ipsum', + contentTitle: 'Markdown Title', + }); + }); + + test('Should parse markdown h1 title at the top (atx style with closing #)', () => { + const markdown = dedent` + + # Markdown Title # + + Lorem Ipsum + + `; + expect(parseMarkdownContentTitle(markdown)).toEqual({ + content: 'Lorem Ipsum', + contentTitle: 'Markdown Title', + }); + }); + + test('Should parse markdown h1 alternate title', () => { + const markdown = dedent` + + Markdown Title + ================ + + Lorem Ipsum + + `; + expect(parseMarkdownContentTitle(markdown)).toEqual({ + content: 'Lorem Ipsum', + contentTitle: 'Markdown Title', + }); + }); + + test('Should parse title-only', () => { + const markdown = '# Document With Only A Title '; + expect(parseMarkdownContentTitle(markdown)).toEqual({ + content: '', + contentTitle: 'Document With Only A Title', + }); + }); + + test('Should parse markdown h1 title at the top but keep it in content', () => { + const markdown = dedent` + + # Markdown Title + + Lorem Ipsum + + `; + expect( + parseMarkdownContentTitle(markdown, {keepContentTitle: true}), + ).toEqual({ + content: markdown.trim(), + contentTitle: 'Markdown Title', + }); + }); + + test('Should not parse markdown h1 title in the middle of a doc', () => { + const markdown = dedent` + + Lorem Ipsum + + # Markdown Title + + Lorem Ipsum + + `; + expect(parseMarkdownContentTitle(markdown)).toEqual({ + content: markdown, + contentTitle: undefined, + }); + }); + + test('Should not parse markdown h1 alternate title in the middle of the doc', () => { + const markdown = dedent` + + Lorem Ipsum + + Markdown Title + ================ + + Lorem Ipsum + + `; + expect(parseMarkdownContentTitle(markdown)).toEqual({ + content: markdown, + contentTitle: undefined, + }); + }); +}); + +describe('parseMarkdownString', () => { + const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); + beforeEach(() => { + warn.mockReset(); + }); + + function expectDuplicateTitleWarning() { + expect(warn).toBeCalledWith( + expect.stringMatching(/Duplicate title found in this file/), + ); + } + function expectNoWarning() { + expect(warn).not.toBeCalled(); + } + + test('parse markdown with frontmatter', () => { + expect( + parseMarkdownString(dedent` + --- + title: Frontmatter title + --- + + Some text + `), + ).toMatchInlineSnapshot(` + Object { + "content": "Some text", + "contentTitle": undefined, + "excerpt": "Some text", + "frontMatter": Object { + "title": "Frontmatter title", + }, + } + `); + expectNoWarning(); + }); + + test('should parse first heading as contentTitle', () => { + expect( + parseMarkdownString(dedent` + # Markdown Title + + Some text + `), + ).toMatchInlineSnapshot(` + Object { + "content": "Some text", + "contentTitle": "Markdown Title", + "excerpt": "Some text", + "frontMatter": Object {}, + } + `); + expectNoWarning(); + }); + + test('should warn about duplicate titles (frontmatter + markdown)', () => { + expect( + parseMarkdownString(dedent` + --- + title: Frontmatter title + --- + + # Markdown Title + + Some text + `), + ).toMatchInlineSnapshot(` + Object { + "content": "Some text", + "contentTitle": "Markdown Title", + "excerpt": "Some text", + "frontMatter": Object { + "title": "Frontmatter title", + }, + } + `); + expectDuplicateTitleWarning(); + }); + + test('should warn about duplicate titles (frontmatter + markdown alternate)', () => { + expect( + parseMarkdownString(dedent` + --- + title: Frontmatter title + --- + + Markdown Title alternate + ================ + + Some text + `), + ).toMatchInlineSnapshot(` + Object { + "content": "Some text", + "contentTitle": "Markdown Title alternate", + "excerpt": "Some text", + "frontMatter": Object { + "title": "Frontmatter title", + }, + } + `); + expectDuplicateTitleWarning(); + }); + + test('should not warn for duplicate title if keepContentTitle=true', () => { + expect( + parseMarkdownString( + dedent` + --- + title: Frontmatter title + --- + + # Markdown Title + + Some text + `, + {keepContentTitle: true}, + ), + ).toMatchInlineSnapshot(` + Object { + "content": "# Markdown Title + + Some text", + "contentTitle": "Markdown Title", + "excerpt": "Some text", + "frontMatter": Object { + "title": "Frontmatter title", + }, + } + `); + expectNoWarning(); + }); + + test('should not warn for duplicate title if markdown title is not at the top', () => { + expect( + parseMarkdownString(dedent` + --- + title: Frontmatter title + --- + + foo + + # Markdown Title + `), + ).toMatchInlineSnapshot(` + Object { + "content": "foo + + # Markdown Title", + "contentTitle": undefined, + "excerpt": "foo", + "frontMatter": Object { + "title": "Frontmatter title", + }, + } + `); + expectNoWarning(); + }); + + test('should parse markdown title and keep it in content', () => { + expect( + parseMarkdownString( + dedent` + # Markdown Title + `, + {keepContentTitle: true}, + ), + ).toMatchInlineSnapshot(` + Object { + "content": "# Markdown Title", + "contentTitle": "Markdown Title", + "excerpt": undefined, + "frontMatter": Object {}, + } + `); + expectNoWarning(); + }); + + test('should delete only first heading', () => { + expect( + parseMarkdownString(dedent` + # Markdown Title + + test test test # test bar + + # Markdown Title 2 + + ### Markdown Title h3 + `), + ).toMatchInlineSnapshot(` + Object { + "content": "test test test # test bar + + # Markdown Title 2 + + ### Markdown Title h3", + "contentTitle": "Markdown Title", + "excerpt": "test test test # test bar", + "frontMatter": Object {}, + } + `); + expectNoWarning(); + }); + + test('should parse front-matter and ignore h2', () => { + expect( + parseMarkdownString( + dedent` + --- + title: Frontmatter title + --- + ## test + `, + ), + ).toMatchInlineSnapshot(` + Object { + "content": "## test", + "contentTitle": undefined, + "excerpt": "test", + "frontMatter": Object { + "title": "Frontmatter title", + }, + } + `); + expectNoWarning(); + }); + + test('should read front matter only', () => { + expect( + parseMarkdownString(dedent` + --- + title: test + --- + `), + ).toMatchInlineSnapshot(` + Object { + "content": "", + "contentTitle": undefined, + "excerpt": undefined, + "frontMatter": Object { + "title": "test", + }, + } + `); + expectNoWarning(); + }); + + test('should parse title only', () => { + expect(parseMarkdownString('# test')).toMatchInlineSnapshot(` + Object { + "content": "", + "contentTitle": "test", + "excerpt": undefined, + "frontMatter": Object {}, + } + `); + expectNoWarning(); + }); + + test('should parse title only alternate', () => { + expect( + parseMarkdownString(dedent` + test + === + `), + ).toMatchInlineSnapshot(` + Object { + "content": "", + "contentTitle": "test", + "excerpt": undefined, + "frontMatter": Object {}, + } + `); + expectNoWarning(); + }); + + test('should warn about duplicate titles', () => { + expect( + parseMarkdownString(dedent` + --- + title: Frontmatter title + --- + # test + `), + ).toMatchInlineSnapshot(` + Object { + "content": "", + "contentTitle": "test", + "excerpt": undefined, + "frontMatter": Object { + "title": "Frontmatter title", + }, + } + `); + expectDuplicateTitleWarning(); + }); + + test('should ignore markdown title if its not a first text', () => { + expect( + parseMarkdownString(dedent` + foo + # test + `), + ).toMatchInlineSnapshot(` + Object { + "content": "foo + # test", + "contentTitle": undefined, + "excerpt": "foo", + "frontMatter": Object {}, + } + `); + expectNoWarning(); + }); + + test('should delete only first heading', () => { + expect( + parseMarkdownString(dedent` + # test + + test test test test test test + test test test # test bar + # test2 + ### test + test3 + `), + ).toMatchInlineSnapshot(` + Object { + "content": "test test test test test test + test test test # test bar + # test2 + ### test + test3", + "contentTitle": "test", + "excerpt": "test test test test test test", + "frontMatter": Object {}, + } + `); + expectNoWarning(); + }); +}); diff --git a/packages/docusaurus-utils/src/__tests__/parseMarkdown.test.ts b/packages/docusaurus-utils/src/__tests__/parseMarkdown.test.ts deleted file mode 100644 index 5513cfdcff..0000000000 --- a/packages/docusaurus-utils/src/__tests__/parseMarkdown.test.ts +++ /dev/null @@ -1,177 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -import {parseMarkdownString, readFrontMatter} from '../index'; -import dedent from 'dedent'; - -describe('load utils: parseMarkdown', () => { - describe('readFrontMatter', () => { - test('should read front matter', () => { - const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); - expect( - readFrontMatter(dedent` - --- - title: test - --- - `), - ).toMatchSnapshot(); - expect(warn).not.toBeCalled(); - }); - test('should parse first heading as title', () => { - const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); - expect( - readFrontMatter(dedent` - # test - `), - ).toMatchSnapshot(); - expect(warn).not.toBeCalled(); - }); - test('should preserve front-matter title and warn about duplication', () => { - const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); - expect( - readFrontMatter(dedent` - --- - title: title - --- - # test - `), - ).toMatchSnapshot(); - expect(warn).toBeCalledWith('Duplicate title detected in `this` file'); - warn.mockReset(); - }); - test('should ignore heading if its not a first text', () => { - const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); - expect( - readFrontMatter(dedent` - foo - # test - `), - ).toMatchSnapshot(); - expect(warn).not.toBeCalled(); - }); - test('should parse first heading as title and keep it in content', () => { - const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); - expect( - readFrontMatter( - dedent` - # test - `, - undefined, - {}, - false, - ), - ).toMatchSnapshot(); - expect(warn).not.toBeCalled(); - }); - test('should delete only first heading', () => { - const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); - expect( - readFrontMatter(dedent` - # test - test test test # test bar - # test - ### test - `), - ).toMatchSnapshot(); - expect(warn).not.toBeCalled(); - }); - test('should parse front-matter and ignore h2', () => { - const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); - expect( - readFrontMatter( - dedent` - --- - title: title - --- - ## test - `, - undefined, - {}, - false, - ), - ).toMatchSnapshot(); - expect(warn).not.toBeCalled(); - }); - test('should not warn about duplicated title', () => { - const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); - expect( - readFrontMatter( - dedent` - --- - title: title - --- - # test - `, - undefined, - {}, - false, - ), - ).toMatchSnapshot(); - expect(warn).not.toBeCalled(); - }); - }); - - describe('parseMarkdownString', () => { - test('should read front matter', () => { - const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); - expect( - parseMarkdownString(dedent` - --- - title: test - --- - `), - ).toMatchSnapshot(); - expect(warn).not.toBeCalled(); - }); - test('should parse first heading as title', () => { - const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); - expect( - parseMarkdownString(dedent` - # test - `), - ).toMatchSnapshot(); - expect(warn).not.toBeCalled(); - }); - test('should preserve front-matter title and warn about duplication', () => { - const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); - expect( - parseMarkdownString(dedent` - --- - title: title - --- - # test - `), - ).toMatchSnapshot(); - expect(warn).toBeCalledWith('Duplicate title detected in `this` file'); - warn.mockReset(); - }); - test('should ignore heading if its not a first text', () => { - const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); - expect( - parseMarkdownString(dedent` - foo - # test - `), - ).toMatchSnapshot(); - expect(warn).not.toBeCalled(); - }); - test('should delete only first heading', () => { - const warn = jest.spyOn(console, 'warn').mockImplementation(() => {}); - expect( - parseMarkdownString(dedent` - # test - - test test test test test test - test test test # test bar - # test - ### test - `), - ).toMatchSnapshot(); - expect(warn).not.toBeCalled(); - }); - }); -}); diff --git a/packages/docusaurus-utils/src/index.ts b/packages/docusaurus-utils/src/index.ts index d2ebb90b84..e5aae3bcae 100644 --- a/packages/docusaurus-utils/src/index.ts +++ b/packages/docusaurus-utils/src/index.ts @@ -7,7 +7,6 @@ import chalk from 'chalk'; import path from 'path'; -import matter from 'gray-matter'; import {createHash} from 'crypto'; import {camelCase, kebabCase, mapValues} from 'lodash'; import escapeStringRegexp from 'escape-string-regexp'; @@ -23,6 +22,8 @@ import { import resolvePathnameUnsafe from 'resolve-pathname'; export * from './codeTranslationsUtils'; +export * from './markdownParser'; +export * from './markdownLinks'; const fileHash = new Map(); export async function generate( @@ -206,135 +207,6 @@ export function getSubFolder(file: string, refDir: string): string | null { return match && match[1]; } -export function createExcerpt(fileString: string): string | undefined { - const fileLines = fileString.trimLeft().split('\n'); - - /* eslint-disable no-continue */ - // eslint-disable-next-line no-restricted-syntax - for (const fileLine of fileLines) { - // Skip empty line. - if (!fileLine.trim()) { - continue; - } - - // Skip import/export declaration. - if (/^\s*?import\s.*(from.*)?;?|export\s.*{.*};?/.test(fileLine)) { - continue; - } - - const cleanedLine = fileLine - // Remove HTML tags. - .replace(/<[^>]*>/g, '') - // Remove ATX-style headers. - .replace(/^\#{1,6}\s*([^#]*)\s*(\#{1,6})?/gm, '$1') - // Remove emphasis and strikethroughs. - .replace(/([\*_~]{1,3})(\S.*?\S{0,1})\1/g, '$2') - // Remove images. - .replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, '$1') - // Remove footnotes. - .replace(/\[\^.+?\](\: .*?$)?/g, '') - // Remove inline links. - .replace(/\[(.*?)\][\[\(].*?[\]\)]/g, '$1') - // Remove inline code. - .replace(/`(.+?)`/g, '$1') - // Remove blockquotes. - .replace(/^\s{0,3}>\s?/g, '') - // Remove admonition definition. - .replace(/(:{3}.*)/, '') - // Remove Emoji names within colons include preceding whitespace. - .replace(/\s?(:(::|[^:\n])+:)/g, '') - .trim(); - - if (cleanedLine) { - return cleanedLine; - } - } - - return undefined; -} - -type ParsedMarkdown = { - // Returned by gray-matter - // eslint-disable-next-line @typescript-eslint/no-explicit-any - frontMatter: Record; - content: string; - excerpt: string | undefined; - hasFrontMatter: boolean; -}; - -export function readFrontMatter( - markdownString: string, - source?: string, - options: Record = {}, - removeTitleHeading = true, -): ParsedMarkdown { - try { - const result = matter(markdownString, options); - result.data = result.data || {}; - result.content = result.content.trim(); - - const hasFrontMatter = Object.keys(result.data).length > 0; - - const heading = /^# (.*)[\n\r]?/gi.exec(result.content); - if (heading) { - if (result.data.title) { - if (removeTitleHeading) { - console.warn( - `Duplicate title detected in \`${source || 'this'}\` file`, - ); - } - } else { - result.data.title = heading[1].trim(); - if (removeTitleHeading) { - result.content = result.content.replace(heading[0], ''); - if (result.excerpt) { - result.excerpt = result.excerpt.replace(heading[1], ''); - } - } - } - } - - return { - frontMatter: result.data, - content: result.content, - excerpt: result.excerpt, - hasFrontMatter, - }; - } catch (e) { - throw new Error(`Error while parsing markdown front matter. -This can happen if you use special characters like : in frontmatter values (try using "" around that value) -${e.message}`); - } -} - -export function parseMarkdownString( - markdownString: string, - source?: string, -): ParsedMarkdown { - return readFrontMatter(markdownString, source, { - excerpt: (file: matter.GrayMatterFile): void => { - // Hacky way of stripping out import statements from the excerpt - // TODO: Find a better way to do so, possibly by compiling the Markdown content, - // stripping out HTML tags and obtaining the first line. - file.excerpt = createExcerpt(file.content); - }, - }); -} - -export async function parseMarkdownFile( - source: string, -): Promise { - const markdownString = await fs.readFile(source, 'utf-8'); - try { - return parseMarkdownString(markdownString, source); - } catch (e) { - throw new Error( - `Error while parsing markdown file ${source} -${e.message}`, - ); - } -} - export function normalizeUrl(rawUrls: string[]): string { const urls = rawUrls; const resultArray = []; diff --git a/packages/docusaurus-utils/src/markdownParser.ts b/packages/docusaurus-utils/src/markdownParser.ts new file mode 100644 index 0000000000..3cb83dc366 --- /dev/null +++ b/packages/docusaurus-utils/src/markdownParser.ts @@ -0,0 +1,185 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +import chalk from 'chalk'; +import fs from 'fs-extra'; +import matter from 'gray-matter'; + +// Hacky way of stripping out import statements from the excerpt +// TODO: Find a better way to do so, possibly by compiling the Markdown content, +// stripping out HTML tags and obtaining the first line. +export function createExcerpt(fileString: string): string | undefined { + const fileLines = fileString + .trimLeft() + // Remove Markdown alternate title + .replace(/^[^\n]*\n[=]+/g, '') + .split('\n'); + + /* eslint-disable no-continue */ + // eslint-disable-next-line no-restricted-syntax + for (const fileLine of fileLines) { + // Skip empty line. + if (!fileLine.trim()) { + continue; + } + + // Skip import/export declaration. + if (/^\s*?import\s.*(from.*)?;?|export\s.*{.*};?/.test(fileLine)) { + continue; + } + + const cleanedLine = fileLine + // Remove HTML tags. + .replace(/<[^>]*>/g, '') + // Remove Title headers + .replace(/^\#\s*([^#]*)\s*\#?/gm, '') + // Remove Markdown + ATX-style headers + .replace(/^\#{1,6}\s*([^#]*)\s*(\#{1,6})?/gm, '$1') + // Remove emphasis and strikethroughs. + .replace(/([\*_~]{1,3})(\S.*?\S{0,1})\1/g, '$2') + // Remove images. + .replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, '$1') + // Remove footnotes. + .replace(/\[\^.+?\](\: .*?$)?/g, '') + // Remove inline links. + .replace(/\[(.*?)\][\[\(].*?[\]\)]/g, '$1') + // Remove inline code. + .replace(/`(.+?)`/g, '$1') + // Remove blockquotes. + .replace(/^\s{0,3}>\s?/g, '') + // Remove admonition definition. + .replace(/(:{3}.*)/, '') + // Remove Emoji names within colons include preceding whitespace. + .replace(/\s?(:(::|[^:\n])+:)/g, '') + .trim(); + + if (cleanedLine) { + return cleanedLine; + } + } + + return undefined; +} + +export function parseFrontMatter( + markdownFileContent: string, +): { + frontMatter: Record; + content: string; +} { + const {data, content} = matter(markdownFileContent); + return { + frontMatter: data ?? {}, + content: content?.trim() ?? '', + }; +} + +export function parseMarkdownContentTitle( + contentUntrimmed: string, + options?: {keepContentTitle?: boolean}, +): {content: string; contentTitle: string | undefined} { + const keepContentTitleOption = options?.keepContentTitle ?? false; + + const content = contentUntrimmed.trim(); + + const regularTitleMatch = /^(?#\s*(?[^#\n]*)+\s*#*[\s\r]*?\n*?)/g.exec( + content, + ); + const alternateTitleMatch = /^(?<pattern>\s*(?<title>[^\n]*)\s*\n[=]+)/g.exec( + content, + ); + + const titleMatch = regularTitleMatch ?? alternateTitleMatch; + const {pattern, title} = titleMatch?.groups ?? {}; + + if (!pattern || !title) { + return {content, contentTitle: undefined}; + } + + const newContent = keepContentTitleOption + ? content + : content.replace(pattern, ''); + + return { + content: newContent.trim(), + contentTitle: title.trim(), + }; +} + +type ParsedMarkdown = { + frontMatter: Record<string, unknown>; + content: string; + contentTitle: string | undefined; + excerpt: string | undefined; +}; + +export function parseMarkdownString( + markdownFileContent: string, + options?: { + source?: string; + keepContentTitle?: boolean; + }, +): ParsedMarkdown { + try { + const sourceOption = options?.source; + const keepContentTitle = options?.keepContentTitle ?? false; + + const {frontMatter, content: contentWithoutFrontMatter} = parseFrontMatter( + markdownFileContent, + ); + + const {content, contentTitle} = parseMarkdownContentTitle( + contentWithoutFrontMatter, + { + keepContentTitle, + }, + ); + + const excerpt = createExcerpt(content); + + // TODO not sure this is a good place for this warning + if ( + frontMatter.title && + contentTitle && + !keepContentTitle && + !(process.env.DOCUSAURUS_NO_DUPLICATE_TITLE_WARNING === 'false') + ) { + console.warn( + chalk.yellow(`Duplicate title found in ${sourceOption ?? 'this'} file. +Use either a frontmatter title or a markdown title, not both. +If this is annoying you, use env DOCUSAURUS_NO_DUPLICATE_TITLE_WARNING=false`), + ); + } + + return { + frontMatter, + content, + contentTitle, + excerpt, + }; + } catch (e) { + console.error( + chalk.red(`Error while parsing markdown front matter. +This can happen if you use special characters like : in frontmatter values (try using "" around that value)`), + ); + throw e; + } +} + +export async function parseMarkdownFile( + source: string, +): Promise<ParsedMarkdown> { + const markdownString = await fs.readFile(source, 'utf-8'); + try { + return parseMarkdownString(markdownString, {source}); + } catch (e) { + throw new Error( + `Error while parsing markdown file ${source} +${e.message}`, + ); + } +}