fix(mdx-loader): resolve Markdown/MDX links with Remark instead of RegExp (#10168)

2025-07-25 04:28:07 +02:00 · 2024-05-24 19:03:23 +02:00 · 2024-05-24 19:03:23 +02:00 · e34614963e
commit e34614963e
parent aab332c2ae
36 changed files with 902 additions and 1620 deletions
--- a/packages/docusaurus-plugin-content-blog/src/tests/snapshots/blogUtils.test.ts.snap
+++ b/packages/docusaurus-plugin-content-blog/src/tests/snapshots/blogUtils.test.ts.snap
@ -1,28 +1,5 @@
 // Jest Snapshot v1, https://goo.gl/fbAQLP

-exports[`linkify reports broken markdown links 1`] = `
-"---
-title: This post links to another one!
---
-
-[Good link 1](/blog/2018/12/14/Happy-First-Birthday-Slash)
-
-[Good link 2](/blog/2018/12/14/Happy-First-Birthday-Slash)
-
-[Bad link 1](postNotExist1.md)
-
-[Bad link 1](./postNotExist2.mdx)
-"
-`;
-
-exports[`linkify transforms to correct link 1`] = `
-"---
-title: This post links to another one!
---
-
-[Linked post](/blog/2018/12/14/Happy-First-Birthday-Slash)"
-`;
-
 exports[`paginateBlogPosts generates a single page 1`] = `
 [
  {
--- a/packages/docusaurus-plugin-content-blog/src/tests/blogUtils.test.ts
+++ b/packages/docusaurus-plugin-content-blog/src/tests/blogUtils.test.ts
@ -5,20 +5,13 @@
 * LICENSE file in the root directory of this source tree.
 */

-import {jest} from '@jest/globals';
-import fs from 'fs-extra';
-import path from 'path';
 import {fromPartial} from '@total-typescript/shoehorn';
 import {
  truncate,
  parseBlogFileName,
-  linkify,
-  getSourceToPermalink,
  paginateBlogPosts,
  applyProcessBlogPosts,
-  type LinkifyParams,
 } from '../blogUtils';
-import type {BlogBrokenMarkdownLink, BlogContentPaths} from '../types';
 import type {BlogPost} from '@docusaurus/plugin-content-blog';

 describe('truncate', () => {
@ -209,95 +202,6 @@ describe('parseBlogFileName', () => {
  });
 });

-describe('linkify', () => {
-  const siteDir = path.join(__dirname, '__fixtures__', 'website');
-  const contentPaths: BlogContentPaths = {
-    contentPath: path.join(siteDir, 'blog-with-ref'),
-    contentPathLocalized: path.join(siteDir, 'blog-with-ref-localized'),
-  };
-  const pluginDir = 'blog-with-ref';
-
-  const blogPosts: BlogPost[] = [
-    {
-      id: 'Happy 1st Birthday Slash!',
-      metadata: {
-        permalink: '/blog/2018/12/14/Happy-First-Birthday-Slash',
-        source: path.posix.join(
-          '@site',
-          pluginDir,
-          '2018-12-14-Happy-First-Birthday-Slash.md',
-        ),
-        title: 'Happy 1st Birthday Slash!',
-        description: `pattern name`,
-        date: new Date('2018-12-14'),
-        tags: [],
-        prevItem: {
-          permalink: '/blog/2019/01/01/date-matter',
-          title: 'date-matter',
-        },
-        hasTruncateMarker: false,
-        frontMatter: {},
-        authors: [],
-        unlisted: false,
-      },
-      content: '',
-    },
-  ];
-
-  async function transform(filePath: string, options?: Partial<LinkifyParams>) {
-    const fileContent = await fs.readFile(filePath, 'utf-8');
-    const transformedContent = linkify({
-      filePath,
-      fileString: fileContent,
-      siteDir,
-      contentPaths,
-      sourceToPermalink: getSourceToPermalink(blogPosts),
-      onBrokenMarkdownLink: (brokenMarkdownLink) => {
-        throw new Error(
-          `Broken markdown link found: ${JSON.stringify(brokenMarkdownLink)}`,
-        );
-      },
-      ...options,
-    });
-    return [fileContent, transformedContent];
-  }
-
-  it('transforms to correct link', async () => {
-    const post = path.join(contentPaths.contentPath, 'post.md');
-    const [content, transformedContent] = await transform(post);
-    expect(transformedContent).toMatchSnapshot();
-    expect(transformedContent).toContain(
-      '](/blog/2018/12/14/Happy-First-Birthday-Slash',
-    );
-    expect(transformedContent).not.toContain(
-      '](2018-12-14-Happy-First-Birthday-Slash.md)',
-    );
-    expect(content).not.toEqual(transformedContent);
-  });
-
-  it('reports broken markdown links', async () => {
-    const filePath = 'post-with-broken-links.md';
-    const folderPath = contentPaths.contentPath;
-    const postWithBrokenLinks = path.join(folderPath, filePath);
-    const onBrokenMarkdownLink = jest.fn();
-    const [, transformedContent] = await transform(postWithBrokenLinks, {
-      onBrokenMarkdownLink,
-    });
-    expect(transformedContent).toMatchSnapshot();
-    expect(onBrokenMarkdownLink).toHaveBeenCalledTimes(2);
-    expect(onBrokenMarkdownLink).toHaveBeenNthCalledWith(1, {
-      filePath: path.resolve(folderPath, filePath),
-      contentPaths,
-      link: 'postNotExist1.md',
-    } as BlogBrokenMarkdownLink);
-    expect(onBrokenMarkdownLink).toHaveBeenNthCalledWith(2, {
-      filePath: path.resolve(folderPath, filePath),
-      contentPaths,
-      link: './postNotExist2.mdx',
-    } as BlogBrokenMarkdownLink);
-  });
-});
-
 describe('processBlogPosts', () => {
  const blogPost2022: BlogPost = fromPartial({
    metadata: {date: new Date('2022-01-01')},
--- a/packages/docusaurus-plugin-content-blog/src/blogUtils.ts
+++ b/packages/docusaurus-plugin-content-blog/src/blogUtils.ts
@ -17,7 +17,6 @@ import {
  getEditUrl,
  getFolderContainingFile,
  posixPath,
-  replaceMarkdownLinks,
  Globby,
  normalizeFrontMatterTags,
  groupTaggedItems,
@ -38,7 +37,7 @@ import type {
  BlogTags,
  BlogPaginated,
 } from '@docusaurus/plugin-content-blog';
-import type {BlogContentPaths, BlogMarkdownLoaderOptions} from './types';
+import type {BlogContentPaths} from './types';

 export function truncate(fileString: string, truncateMarker: RegExp): string {
  return fileString.split(truncateMarker, 1).shift()!;
@ -403,35 +402,6 @@ export async function generateBlogPosts(
  return blogPosts;
 }

-export type LinkifyParams = {
-  filePath: string;
-  fileString: string;
-} & Pick<
-  BlogMarkdownLoaderOptions,
-  'sourceToPermalink' | 'siteDir' | 'contentPaths' | 'onBrokenMarkdownLink'
->;
-
-export function linkify({
-  filePath,
-  contentPaths,
-  fileString,
-  siteDir,
-  sourceToPermalink,
-  onBrokenMarkdownLink,
-}: LinkifyParams): string {
-  const {newContent, brokenMarkdownLinks} = replaceMarkdownLinks({
-    siteDir,
-    fileString,
-    filePath,
-    contentPaths,
-    sourceToPermalink,
-  });
-
-  brokenMarkdownLinks.forEach((l) => onBrokenMarkdownLink(l));
-
-  return newContent;
-}
-
 export async function applyProcessBlogPosts({
  blogPosts,
  processBlogPosts,
--- a/packages/docusaurus-plugin-content-blog/src/index.ts
+++ b/packages/docusaurus-plugin-content-blog/src/index.ts
@ -18,6 +18,7 @@ import {
  getContentPathList,
  getDataFilePath,
  DEFAULT_PLUGIN_ID,
+  resolveMarkdownLinkPathname,
 } from '@docusaurus/utils';
 import {
  getSourceToPermalink,
@ -43,6 +44,8 @@ import type {
  BlogContent,
  BlogPaginated,
 } from '@docusaurus/plugin-content-blog';
+import type {Options as MDXLoaderOptions} from '@docusaurus/mdx-loader/lib/loader';
+import type {RuleSetUseItem} from 'webpack';

 const PluginName = 'docusaurus-plugin-content-blog';

@ -213,22 +216,81 @@ export default async function pluginContentBlog(
        beforeDefaultRehypePlugins,
      } = options;

-      const markdownLoaderOptions: BlogMarkdownLoaderOptions = {
-        siteDir,
-        contentPaths,
-        truncateMarker,
-        sourceToPermalink: getSourceToPermalink(content.blogPosts),
-        onBrokenMarkdownLink: (brokenMarkdownLink) => {
-          if (onBrokenMarkdownLinks === 'ignore') {
-            return;
-          }
-          logger.report(
-            onBrokenMarkdownLinks,
-          )`Blog markdown link couldn't be resolved: (url=${brokenMarkdownLink.link}) in path=${brokenMarkdownLink.filePath}`;
-        },
-      };
-
+      const sourceToPermalink = getSourceToPermalink(content.blogPosts);
      const contentDirs = getContentPathList(contentPaths);
+
+      function createMDXLoader(): RuleSetUseItem {
+        const loaderOptions: MDXLoaderOptions = {
+          admonitions,
+          remarkPlugins,
+          rehypePlugins,
+          beforeDefaultRemarkPlugins: [
+            footnoteIDFixer,
+            ...beforeDefaultRemarkPlugins,
+          ],
+          beforeDefaultRehypePlugins,
+          staticDirs: siteConfig.staticDirectories.map((dir) =>
+            path.resolve(siteDir, dir),
+          ),
+          siteDir,
+          isMDXPartial: createAbsoluteFilePathMatcher(
+            options.exclude,
+            contentDirs,
+          ),
+          metadataPath: (mdxPath: string) => {
+            // Note that metadataPath must be the same/in-sync as
+            // the path from createData for each MDX.
+            const aliasedPath = aliasedSitePath(mdxPath, siteDir);
+            return path.join(dataDir, `${docuHash(aliasedPath)}.json`);
+          },
+          // For blog posts a title in markdown is always removed
+          // Blog posts title are rendered separately
+          removeContentTitle: true,
+          // Assets allow to convert some relative images paths to
+          // require() calls
+          // @ts-expect-error: TODO fix typing issue
+          createAssets: ({
+            frontMatter,
+            metadata,
+          }: {
+            frontMatter: BlogPostFrontMatter;
+            metadata: BlogPostMetadata;
+          }): Assets => ({
+            image: frontMatter.image,
+            authorsImageUrls: metadata.authors.map((author) => author.imageURL),
+          }),
+          markdownConfig: siteConfig.markdown,
+          resolveMarkdownLink: ({linkPathname, sourceFilePath}) => {
+            const permalink = resolveMarkdownLinkPathname(linkPathname, {
+              sourceFilePath,
+              sourceToPermalink,
+              siteDir,
+              contentPaths,
+            });
+            if (permalink === null) {
+              logger.report(
+                onBrokenMarkdownLinks,
+              )`Blog markdown link couldn't be resolved: (url=${linkPathname}) in source file path=${sourceFilePath}`;
+            }
+            return permalink;
+          },
+        };
+        return {
+          loader: require.resolve('@docusaurus/mdx-loader'),
+          options: loaderOptions,
+        };
+      }
+
+      function createBlogMarkdownLoader(): RuleSetUseItem {
+        const loaderOptions: BlogMarkdownLoaderOptions = {
+          truncateMarker,
+        };
+        return {
+          loader: path.resolve(__dirname, './markdownLoader.js'),
+          options: loaderOptions,
+        };
+      }
+
      return {
        resolve: {
          alias: {
@ -242,61 +304,7 @@ export default async function pluginContentBlog(
              include: contentDirs
                // Trailing slash is important, see https://github.com/facebook/docusaurus/pull/3970
                .map(addTrailingPathSeparator),
-              use: [
-                {
-                  loader: require.resolve('@docusaurus/mdx-loader'),
-                  options: {
-                    admonitions,
-                    remarkPlugins,
-                    rehypePlugins,
-                    beforeDefaultRemarkPlugins: [
-                      footnoteIDFixer,
-                      ...beforeDefaultRemarkPlugins,
-                    ],
-                    beforeDefaultRehypePlugins,
-                    staticDirs: siteConfig.staticDirectories.map((dir) =>
-                      path.resolve(siteDir, dir),
-                    ),
-                    siteDir,
-                    isMDXPartial: createAbsoluteFilePathMatcher(
-                      options.exclude,
-                      contentDirs,
-                    ),
-                    metadataPath: (mdxPath: string) => {
-                      // Note that metadataPath must be the same/in-sync as
-                      // the path from createData for each MDX.
-                      const aliasedPath = aliasedSitePath(mdxPath, siteDir);
-                      return path.join(
-                        dataDir,
-                        `${docuHash(aliasedPath)}.json`,
-                      );
-                    },
-                    // For blog posts a title in markdown is always removed
-                    // Blog posts title are rendered separately
-                    removeContentTitle: true,
-
-                    // Assets allow to convert some relative images paths to
-                    // require() calls
-                    createAssets: ({
-                      frontMatter,
-                      metadata,
-                    }: {
-                      frontMatter: BlogPostFrontMatter;
-                      metadata: BlogPostMetadata;
-                    }): Assets => ({
-                      image: frontMatter.image,
-                      authorsImageUrls: metadata.authors.map(
-                        (author) => author.imageURL,
-                      ),
-                    }),
-                    markdownConfig: siteConfig.markdown,
-                  },
-                },
-                {
-                  loader: path.resolve(__dirname, './markdownLoader.js'),
-                  options: markdownLoaderOptions,
-                },
-              ].filter(Boolean),
+              use: [createMDXLoader(), createBlogMarkdownLoader()],
            },
          ],
        },
--- a/packages/docusaurus-plugin-content-blog/src/markdownLoader.ts
+++ b/packages/docusaurus-plugin-content-blog/src/markdownLoader.ts
@ -5,7 +5,7 @@
 * LICENSE file in the root directory of this source tree.
 */

-import {truncate, linkify} from './blogUtils';
+import {truncate} from './blogUtils';
 import type {BlogMarkdownLoaderOptions} from './types';
 import type {LoaderContext} from 'webpack';

@ -13,23 +13,19 @@ export default function markdownLoader(
  this: LoaderContext<BlogMarkdownLoaderOptions>,
  source: string,
 ): void {
-  const filePath = this.resourcePath;
  const fileString = source;
  const callback = this.async();
  const markdownLoaderOptions = this.getOptions();

  // Linkify blog posts
-  let finalContent = linkify({
-    fileString,
-    filePath,
-    ...markdownLoaderOptions,
-  });
+  let finalContent = fileString;

  // Truncate content if requested (e.g: file.md?truncated=true).
  const truncated: boolean | undefined = this.resourceQuery
    ? !!new URLSearchParams(this.resourceQuery.slice(1)).get('truncated')
    : undefined;

+  // TODO truncate with the AST instead of the string ?
  if (truncated) {
    finalContent = truncate(finalContent, markdownLoaderOptions.truncateMarker);
  }
--- a/packages/docusaurus-plugin-content-blog/src/types.ts
+++ b/packages/docusaurus-plugin-content-blog/src/types.ts
@ -5,15 +5,10 @@
 * LICENSE file in the root directory of this source tree.
 */

-import type {BrokenMarkdownLink, ContentPaths} from '@docusaurus/utils';
+import type {ContentPaths} from '@docusaurus/utils';

 export type BlogContentPaths = ContentPaths;

-export type BlogBrokenMarkdownLink = BrokenMarkdownLink<BlogContentPaths>;
 export type BlogMarkdownLoaderOptions = {
-  siteDir: string;
-  contentPaths: BlogContentPaths;
  truncateMarker: RegExp;
-  sourceToPermalink: {[aliasedPath: string]: string};
-  onBrokenMarkdownLink: (brokenMarkdownLink: BlogBrokenMarkdownLink) => void;
 };