feat: siteConfig.markdown.parseFrontMatter hook (#9624)

2025-07-14 15:28:08 +02:00 · 2023-12-16 02:50:26 +01:00 · 2023-12-16 02:50:26 +01:00 · affca7a9a2
commit affca7a9a2
parent 28e7298211
27 changed files with 486 additions and 133 deletions
--- a/packages/docusaurus-utils/src/tests/snapshots/markdownUtils.test.ts.snap
+++ b/packages/docusaurus-utils/src/tests/snapshots/markdownUtils.test.ts.snap
@ -1,6 +1,6 @@
 // Jest Snapshot v1, https://goo.gl/fbAQLP

-exports[`parseMarkdownString deletes only first heading 1`] = `
+exports[`parseMarkdownFile deletes only first heading 1`] = `
 {
  "content": "# Markdown Title

@ -15,7 +15,7 @@ test test test # test bar
 }
 `;

-exports[`parseMarkdownString deletes only first heading 2 1`] = `
+exports[`parseMarkdownFile deletes only first heading 2 1`] = `
 {
  "content": "# test

@ -30,7 +30,7 @@ test3",
 }
 `;

-exports[`parseMarkdownString does not warn for duplicate title if markdown title is not at the top 1`] = `
+exports[`parseMarkdownFile does not warn for duplicate title if markdown title is not at the top 1`] = `
 {
  "content": "foo

@ -43,7 +43,7 @@ exports[`parseMarkdownString does not warn for duplicate title if markdown title
 }
 `;

-exports[`parseMarkdownString handles code blocks 1`] = `
+exports[`parseMarkdownFile handles code blocks 1`] = `
 {
  "content": "\`\`\`js
 code
@ -56,7 +56,7 @@ Content",
 }
 `;

-exports[`parseMarkdownString handles code blocks 2`] = `
+exports[`parseMarkdownFile handles code blocks 2`] = `
 {
  "content": "\`\`\`\`js
 Foo
@ -73,7 +73,7 @@ Content",
 }
 `;

-exports[`parseMarkdownString handles code blocks 3`] = `
+exports[`parseMarkdownFile handles code blocks 3`] = `
 {
  "content": "\`\`\`\`js
 Foo
@ -88,7 +88,7 @@ Content",
 }
 `;

-exports[`parseMarkdownString ignores markdown title if its not a first text 1`] = `
+exports[`parseMarkdownFile ignores markdown title if its not a first text 1`] = `
 {
  "content": "foo
 # test",
@ -98,7 +98,21 @@ exports[`parseMarkdownString ignores markdown title if its not a first text 1`]
 }
 `;

-exports[`parseMarkdownString parse markdown with front matter 1`] = `
+exports[`parseMarkdownFile parse markdown with custom front matter parser 1`] = `
+{
+  "content": "Some text",
+  "contentTitle": undefined,
+  "excerpt": "Some text",
+  "frontMatter": {
+    "age": 84,
+    "extra": "value",
+    "great": true,
+    "title": "Frontmatter title",
+  },
+}
+`;
+
+exports[`parseMarkdownFile parse markdown with front matter 1`] = `
 {
  "content": "Some text",
  "contentTitle": undefined,
@ -109,7 +123,7 @@ exports[`parseMarkdownString parse markdown with front matter 1`] = `
 }
 `;

-exports[`parseMarkdownString parses first heading as contentTitle 1`] = `
+exports[`parseMarkdownFile parses first heading as contentTitle 1`] = `
 {
  "content": "# Markdown Title

@ -120,7 +134,7 @@ Some text",
 }
 `;

-exports[`parseMarkdownString parses front-matter and ignore h2 1`] = `
+exports[`parseMarkdownFile parses front-matter and ignore h2 1`] = `
 {
  "content": "## test",
  "contentTitle": undefined,
@ -131,7 +145,7 @@ exports[`parseMarkdownString parses front-matter and ignore h2 1`] = `
 }
 `;

-exports[`parseMarkdownString parses title only 1`] = `
+exports[`parseMarkdownFile parses title only 1`] = `
 {
  "content": "# test",
  "contentTitle": "test",
@ -140,7 +154,7 @@ exports[`parseMarkdownString parses title only 1`] = `
 }
 `;

-exports[`parseMarkdownString parses title only alternate 1`] = `
+exports[`parseMarkdownFile parses title only alternate 1`] = `
 {
  "content": "test
 ===",
@ -150,7 +164,7 @@ exports[`parseMarkdownString parses title only alternate 1`] = `
 }
 `;

-exports[`parseMarkdownString reads front matter only 1`] = `
+exports[`parseMarkdownFile reads front matter only 1`] = `
 {
  "content": "",
  "contentTitle": undefined,
@ -161,7 +175,7 @@ exports[`parseMarkdownString reads front matter only 1`] = `
 }
 `;

-exports[`parseMarkdownString warns about duplicate titles (front matter + markdown alternate) 1`] = `
+exports[`parseMarkdownFile warns about duplicate titles (front matter + markdown alternate) 1`] = `
 {
  "content": "Markdown Title alternate
 ================
@ -175,7 +189,7 @@ Some text",
 }
 `;

-exports[`parseMarkdownString warns about duplicate titles (front matter + markdown) 1`] = `
+exports[`parseMarkdownFile warns about duplicate titles (front matter + markdown) 1`] = `
 {
  "content": "# Markdown Title

@ -188,7 +202,7 @@ Some text",
 }
 `;

-exports[`parseMarkdownString warns about duplicate titles 1`] = `
+exports[`parseMarkdownFile warns about duplicate titles 1`] = `
 {
  "content": "# test",
  "contentTitle": "test",
--- a/packages/docusaurus-utils/src/tests/markdownUtils.test.ts
+++ b/packages/docusaurus-utils/src/tests/markdownUtils.test.ts
@ -9,12 +9,14 @@ import dedent from 'dedent';
 import {
  createExcerpt,
  parseMarkdownContentTitle,
-  parseMarkdownString,
  parseMarkdownHeadingId,
  writeMarkdownHeadingId,
  escapeMarkdownHeadingIds,
  unwrapMdxCodeBlocks,
  admonitionTitleToDirectiveLabel,
+  parseMarkdownFile,
+  DEFAULT_PARSE_FRONT_MATTER,
+  parseFileContentFrontMatter,
 } from '../markdownUtils';

 describe('createExcerpt', () => {
@ -623,32 +625,110 @@ Lorem Ipsum
  });
 });

-describe('parseMarkdownString', () => {
-  it('parse markdown with front matter', () => {
-    expect(
-      parseMarkdownString(dedent`
+describe('parseFileContentFrontMatter', () => {
+  function test(fileContent: string) {
+    return parseFileContentFrontMatter(fileContent);
+  }
+
+  it('can parse front matter', () => {
+    const input = dedent`
+        ---
+        title: Frontmatter title
+        author:
+          age: 42
+          birth: 2000-07-23
+        ---
+
+        Some text
+        `;
+
+    const expectedResult = {
+      content: 'Some text',
+      frontMatter: {
+        title: 'Frontmatter title',
+        author: {age: 42, birth: new Date('2000-07-23')},
+      },
+    };
+
+    const result = test(input) as typeof expectedResult;
+    expect(result).toEqual(expectedResult);
+    expect(result.frontMatter.author.birth).toBeInstanceOf(Date);
+
+    // A regression test, ensure we don't return gray-matter cached objects
+    result.frontMatter.title = 'modified';
+    // @ts-expect-error: ok
+    result.frontMatter.author.age = 53;
+    expect(test(input)).toEqual(expectedResult);
+  });
+});
+
+describe('parseMarkdownFile', () => {
+  async function test(
+    fileContent: string,
+    options?: Partial<Parameters<typeof parseMarkdownFile>>[0],
+  ) {
+    return parseMarkdownFile({
+      fileContent,
+      filePath: 'some-file-path.mdx',
+      parseFrontMatter: DEFAULT_PARSE_FRONT_MATTER,
+      ...options,
+    });
+  }
+
+  it('parse markdown with front matter', async () => {
+    await expect(
+      test(dedent`
        ---
        title: Frontmatter title
        ---

        Some text
        `),
-    ).toMatchSnapshot();
+    ).resolves.toMatchSnapshot();
  });

-  it('parses first heading as contentTitle', () => {
-    expect(
-      parseMarkdownString(dedent`
+  it('parse markdown with custom front matter parser', async () => {
+    await expect(
+      test(
+        dedent`
+        ---
+        title: Frontmatter title
+        age: 42
+        ---
+
+        Some text
+        `,
+        {
+          parseFrontMatter: async (params) => {
+            const result = await params.defaultParseFrontMatter(params);
+            return {
+              ...result,
+              frontMatter: {
+                ...result.frontMatter,
+                age: result.frontMatter.age * 2,
+                extra: 'value',
+                great: true,
+              },
+            };
+          },
+        },
+      ),
+    ).resolves.toMatchSnapshot();
+  });
+
+  it('parses first heading as contentTitle', async () => {
+    await expect(
+      test(dedent`
        # Markdown Title

        Some text
        `),
-    ).toMatchSnapshot();
+    ).resolves.toMatchSnapshot();
  });

-  it('warns about duplicate titles (front matter + markdown)', () => {
-    expect(
-      parseMarkdownString(dedent`
+  it('warns about duplicate titles (front matter + markdown)', async () => {
+    await expect(
+      test(dedent`
        ---
        title: Frontmatter title
        ---
@ -657,12 +737,12 @@ describe('parseMarkdownString', () => {

        Some text
        `),
-    ).toMatchSnapshot();
+    ).resolves.toMatchSnapshot();
  });

-  it('warns about duplicate titles (front matter + markdown alternate)', () => {
-    expect(
-      parseMarkdownString(dedent`
+  it('warns about duplicate titles (front matter + markdown alternate)', async () => {
+    await expect(
+      test(dedent`
        ---
        title: Frontmatter title
        ---
@ -672,12 +752,12 @@ describe('parseMarkdownString', () => {

        Some text
        `),
-    ).toMatchSnapshot();
+    ).resolves.toMatchSnapshot();
  });

-  it('does not warn for duplicate title if markdown title is not at the top', () => {
-    expect(
-      parseMarkdownString(dedent`
+  it('does not warn for duplicate title if markdown title is not at the top', async () => {
+    await expect(
+      test(dedent`
        ---
        title: Frontmatter title
        ---
@ -686,12 +766,12 @@ describe('parseMarkdownString', () => {

        # Markdown Title
        `),
-    ).toMatchSnapshot();
+    ).resolves.toMatchSnapshot();
  });

-  it('deletes only first heading', () => {
-    expect(
-      parseMarkdownString(dedent`
+  it('deletes only first heading', async () => {
+    await expect(
+      test(dedent`
        # Markdown Title

        test test test # test bar
@ -700,12 +780,12 @@ describe('parseMarkdownString', () => {

        ### Markdown Title h3
        `),
-    ).toMatchSnapshot();
+    ).resolves.toMatchSnapshot();
  });

-  it('parses front-matter and ignore h2', () => {
-    expect(
-      parseMarkdownString(
+  it('parses front-matter and ignore h2', async () => {
+    await expect(
+      test(
        dedent`
          ---
          title: Frontmatter title
@ -713,55 +793,55 @@ describe('parseMarkdownString', () => {
          ## test
          `,
      ),
-    ).toMatchSnapshot();
+    ).resolves.toMatchSnapshot();
  });

-  it('reads front matter only', () => {
-    expect(
-      parseMarkdownString(dedent`
+  it('reads front matter only', async () => {
+    await expect(
+      test(dedent`
        ---
        title: test
        ---
        `),
-    ).toMatchSnapshot();
+    ).resolves.toMatchSnapshot();
  });

-  it('parses title only', () => {
-    expect(parseMarkdownString('# test')).toMatchSnapshot();
+  it('parses title only', async () => {
+    await expect(test('# test')).resolves.toMatchSnapshot();
  });

-  it('parses title only alternate', () => {
-    expect(
-      parseMarkdownString(dedent`
+  it('parses title only alternate', async () => {
+    await expect(
+      test(dedent`
        test
        ===
        `),
-    ).toMatchSnapshot();
+    ).resolves.toMatchSnapshot();
  });

-  it('warns about duplicate titles', () => {
-    expect(
-      parseMarkdownString(dedent`
+  it('warns about duplicate titles', async () => {
+    await expect(
+      test(dedent`
        ---
        title: Frontmatter title
        ---
        # test
        `),
-    ).toMatchSnapshot();
+    ).resolves.toMatchSnapshot();
  });

-  it('ignores markdown title if its not a first text', () => {
-    expect(
-      parseMarkdownString(dedent`
+  it('ignores markdown title if its not a first text', async () => {
+    await expect(
+      test(dedent`
        foo
        # test
        `),
-    ).toMatchSnapshot();
+    ).resolves.toMatchSnapshot();
  });

-  it('deletes only first heading 2', () => {
-    expect(
-      parseMarkdownString(dedent`
+  it('deletes only first heading 2', async () => {
+    await expect(
+      test(dedent`
        # test

        test test test test test test
@ -770,21 +850,21 @@ describe('parseMarkdownString', () => {
        ### test
        test3
        `),
-    ).toMatchSnapshot();
+    ).resolves.toMatchSnapshot();
  });

-  it('handles code blocks', () => {
-    expect(
-      parseMarkdownString(dedent`
+  it('handles code blocks', async () => {
+    await expect(
+      test(dedent`
        \`\`\`js
        code
        \`\`\`

        Content
      `),
-    ).toMatchSnapshot();
-    expect(
-      parseMarkdownString(dedent`
+    ).resolves.toMatchSnapshot();
+    await expect(
+      test(dedent`
        \`\`\`\`js
        Foo
        \`\`\`diff
@ -795,9 +875,9 @@ describe('parseMarkdownString', () => {

        Content
      `),
-    ).toMatchSnapshot();
-    expect(
-      parseMarkdownString(dedent`
+    ).resolves.toMatchSnapshot();
+    await expect(
+      test(dedent`
        \`\`\`\`js
        Foo
        \`\`\`diff
@ -806,17 +886,17 @@ describe('parseMarkdownString', () => {

        Content
      `),
-    ).toMatchSnapshot();
+    ).resolves.toMatchSnapshot();
  });

-  it('throws for invalid front matter', () => {
-    expect(() =>
-      parseMarkdownString(dedent`
+  it('throws for invalid front matter', async () => {
+    await expect(
+      test(dedent`
      ---
      foo: f: a
      ---
      `),
-    ).toThrowErrorMatchingInlineSnapshot(`
+    ).rejects.toThrowErrorMatchingInlineSnapshot(`
      "incomplete explicit mapping pair; a key node is missed; or followed by a non-tabulated empty line at line 2, column 7:
          foo: f: a
                ^"
--- a/packages/docusaurus-utils/src/index.ts
+++ b/packages/docusaurus-utils/src/index.ts
@ -70,9 +70,9 @@ export {
  unwrapMdxCodeBlocks,
  admonitionTitleToDirectiveLabel,
  createExcerpt,
-  parseFrontMatter,
+  DEFAULT_PARSE_FRONT_MATTER,
  parseMarkdownContentTitle,
-  parseMarkdownString,
+  parseMarkdownFile,
  writeMarkdownHeadingId,
  type WriteHeadingIDOptions,
 } from './markdownUtils';
--- a/packages/docusaurus-utils/src/markdownUtils.ts
+++ b/packages/docusaurus-utils/src/markdownUtils.ts
@ -8,6 +8,10 @@
 import logger from '@docusaurus/logger';
 import matter from 'gray-matter';
 import {createSlugger, type Slugger, type SluggerOptions} from './slugger';
+import type {
+  ParseFrontMatter,
+  DefaultParseFrontMatter,
+} from '@docusaurus/types';

 // Some utilities for parsing Markdown content. These things are only used on
 // server-side when we infer metadata like `title` and `description` from the
@ -214,19 +218,40 @@ export function createExcerpt(fileString: string): string | undefined {
 * ---
 * ```
 */
-export function parseFrontMatter(markdownFileContent: string): {
+export function parseFileContentFrontMatter(fileContent: string): {
  /** Front matter as parsed by gray-matter. */
  frontMatter: {[key: string]: unknown};
  /** The remaining content, trimmed. */
  content: string;
 } {
-  const {data, content} = matter(markdownFileContent);
+  // TODO Docusaurus v4: replace gray-matter by a better lib
+  // gray-matter is unmaintained, not flexible, and the code doesn't look good
+  const {data, content} = matter(fileContent);
+
+  // gray-matter has an undocumented front matter caching behavior
+  // https://github.com/jonschlinkert/gray-matter/blob/ce67a86dba419381db0dd01cc84e2d30a1d1e6a5/index.js#L39
+  // Unfortunately, this becomes a problem when we mutate returned front matter
+  // We want to make it possible as part of the parseFrontMatter API
+  // So we make it safe to mutate by always providing a deep copy
+  const frontMatter =
+    // And of course structuredClone() doesn't work well with Date in Jest...
+    // See https://github.com/jestjs/jest/issues/2549
+    // So we parse again for tests with a {} option object
+    // This undocumented empty option object disables gray-matter caching..
+    process.env.JEST_WORKER_ID
+      ? matter(fileContent, {}).data
+      : structuredClone(data);
+
  return {
-    frontMatter: data,
+    frontMatter,
    content: content.trim(),
  };
 }

+export const DEFAULT_PARSE_FRONT_MATTER: DefaultParseFrontMatter = async (
+  params,
+) => parseFileContentFrontMatter(params.fileContent);
+
 function toTextContentTitle(contentTitle: string): string {
  return contentTitle.replace(/`(?<text>[^`]*)`/g, '$<text>');
 }
@ -309,10 +334,16 @@ export function parseMarkdownContentTitle(
 * @throws Throws when `parseFrontMatter` throws, usually because of invalid
 * syntax.
 */
-export function parseMarkdownString(
-  markdownFileContent: string,
-  options?: ParseMarkdownContentTitleOptions,
-): {
+export async function parseMarkdownFile({
+  filePath,
+  fileContent,
+  parseFrontMatter,
+  removeContentTitle,
+}: {
+  filePath: string;
+  fileContent: string;
+  parseFrontMatter: ParseFrontMatter;
+} & ParseMarkdownContentTitleOptions): Promise<{
  /** @see {@link parseFrontMatter} */
  frontMatter: {[key: string]: unknown};
  /** @see {@link parseMarkdownContentTitle} */
@ -324,14 +355,18 @@ export function parseMarkdownString(
   * the `removeContentTitle` option.
   */
  content: string;
-} {
+}> {
  try {
    const {frontMatter, content: contentWithoutFrontMatter} =
-      parseFrontMatter(markdownFileContent);
+      await parseFrontMatter({
+        filePath,
+        fileContent,
+        defaultParseFrontMatter: DEFAULT_PARSE_FRONT_MATTER,
+      });

    const {content, contentTitle} = parseMarkdownContentTitle(
      contentWithoutFrontMatter,
-      options,
+      {removeContentTitle},
    );

    const excerpt = createExcerpt(content);