fix(mdx-loader): resolve Markdown/MDX links with Remark instead of RegExp (#10168)

2025-07-12 14:28:09 +02:00 · 2024-05-24 19:03:23 +02:00 · 2024-05-24 19:03:23 +02:00 · e34614963e
commit e34614963e
parent aab332c2ae
36 changed files with 902 additions and 1620 deletions
--- a/packages/docusaurus-utils/src/tests/snapshots/markdownLinks.test.ts.snap
+++ b/packages/docusaurus-utils/src/tests/snapshots/markdownLinks.test.ts.snap
@ -1,250 +0,0 @@
-// Jest Snapshot v1, https://goo.gl/fbAQLP
-
-exports[`replaceMarkdownLinks does basic replace 1`] = `
-{
-  "brokenMarkdownLinks": [
-    {
-      "contentPaths": {
-        "contentPath": "docs",
-        "contentPathLocalized": "i18n/docs-localized",
-      },
-      "filePath": "docs/intro.md",
-      "link": "hmmm.md",
-    },
-  ],
-  "newContent": "
-[foo](/doc/foo)
-[baz](/doc/baz)
-[foo](/doc/foo)
-[http](http://github.com/facebook/docusaurus/README.md)
-[https](https://github.com/facebook/docusaurus/README.md)
-[asset](./foo.js)
-[asset as well](@site/docs/_partial.md)
-[looks like http...](/doc/http)
-[nonexistent](hmmm.md)
-",
-}
-`;
-
-exports[`replaceMarkdownLinks handles link titles 1`] = `
-{
-  "brokenMarkdownLinks": [],
-  "newContent": "
-[URL](/docs/file "title")
-[URL](/docs/file 'title')
-[URL](/docs/file (title))
-",
-}
-`;
-
-exports[`replaceMarkdownLinks handles stray spaces 1`] = `
-{
-  "brokenMarkdownLinks": [],
-  "newContent": "
-[URL]( /docs/file )
-[ref]:  /docs/file
-",
-}
-`;
-
-exports[`replaceMarkdownLinks handles unpaired fences 1`] = `
-{
-  "brokenMarkdownLinks": [],
-  "newContent": "
-\`\`\`foo
-hello
-
-\`\`\`foo
-hello
-\`\`\`
-
-A [link](/docs/file)
-",
-}
-`;
-
-exports[`replaceMarkdownLinks ignores links in HTML comments 1`] = `
-{
-  "brokenMarkdownLinks": [
-    {
-      "contentPaths": {
-        "contentPath": "docs",
-        "contentPathLocalized": "i18n/docs-localized",
-      },
-      "filePath": "docs/intro.md",
-      "link": "./foo.md",
-    },
-    {
-      "contentPaths": {
-        "contentPath": "docs",
-        "contentPathLocalized": "i18n/docs-localized",
-      },
-      "filePath": "docs/intro.md",
-      "link": "./foo.md",
-    },
-  ],
-  "newContent": "
-<!-- [foo](./foo.md) -->
-<!--
-[foo](./foo.md)
-->
-",
-}
-`;
-
-exports[`replaceMarkdownLinks ignores links in fenced blocks 1`] = `
-{
-  "brokenMarkdownLinks": [],
-  "newContent": "
-\`\`\`
-[foo](foo.md)
-\`\`\`
-
-\`\`\`\`js
-[foo](foo.md)
-\`\`\`
-[foo](foo.md)
-\`\`\`
-[foo](foo.md)
-\`\`\`\`
-
-\`\`\`\`js
-[foo](foo.md)
-\`\`\`
-[foo](foo.md)
-\`\`\`\`
-
-~~~js
-[foo](foo.md)
-~~~
-
-~~~js
-[foo](foo.md)
-\`\`\`
-[foo](foo.md)
-\`\`\`
-[foo](foo.md)
-~~~
-",
-}
-`;
-
-exports[`replaceMarkdownLinks ignores links in inline code 1`] = `
-{
-  "brokenMarkdownLinks": [
-    {
-      "contentPaths": {
-        "contentPath": "docs",
-        "contentPathLocalized": "i18n/docs-localized",
-      },
-      "filePath": "docs/intro.md",
-      "link": "foo.md",
-    },
-  ],
-  "newContent": "
-\`[foo](foo.md)\`
-",
-}
-`;
-
-exports[`replaceMarkdownLinks preserves query/hash 1`] = `
-{
-  "brokenMarkdownLinks": [],
-  "newContent": "
-[URL](/docs/file?foo=bar#baz)
-[URL](/docs/file#a)
-[URL](/docs/file?c)
-",
-}
-`;
-
-exports[`replaceMarkdownLinks replaces Markdown links with spaces 1`] = `
-{
-  "brokenMarkdownLinks": [],
-  "newContent": "
-[doc a](/docs/doc%20a)
-[doc a](</docs/doc%20a>)
-[doc b](/docs/my%20docs/doc%20b)
-[doc b](</docs/my%20docs/doc%20b>)
-[doc]: </docs/my%20docs/doc%20b>
-",
-}
-`;
-
-exports[`replaceMarkdownLinks replaces links with same title as URL 1`] = `
-{
-  "brokenMarkdownLinks": [],
-  "newContent": "
-[foo.md](/docs/foo)
-[./foo.md](</docs/foo>)
-[./foo.md](/docs/foo)
-[foo.md](/docs/foo)
-[./foo.md](/docs/foo)
-",
-}
-`;
-
-exports[`replaceMarkdownLinks replaces multiple links on same line 1`] = `
-{
-  "brokenMarkdownLinks": [],
-  "newContent": "
-[a](/docs/a), [a](/docs/a), [b](/docs/b), [c](/docs/c)
-",
-}
-`;
-
-exports[`replaceMarkdownLinks replaces reference style Markdown links 1`] = `
-{
-  "brokenMarkdownLinks": [],
-  "newContent": "
-The following operations are defined for [URI]s:
-
-* [info]: Returns metadata about the resource,
-* [list]: Returns metadata about the resource's children (like getting the content of a local directory).
-
-[URI]:    /docs/api/classes/uri
-[info]:   /docs/api/classes/uri#info
-[list]:   /docs/api/classes/uri#list
-      ",
-}
-`;
-
-exports[`replaceMarkdownLinks replaces two links on the same line 1`] = `
-{
-  "brokenMarkdownLinks": [],
-  "newContent": "[TypeScript](/programming-languages/typescript/) and [Go](/programming-languages/go/)",
-}
-`;
-
-exports[`replaceMarkdownLinks resolves absolute and relative links differently 1`] = `
-{
-  "brokenMarkdownLinks": [
-    {
-      "contentPaths": {
-        "contentPath": "docs",
-        "contentPathLocalized": "i18n/docs-localized",
-      },
-      "filePath": "docs/intro/intro.md",
-      "link": "./api/classes/divine_uri.URI.md",
-    },
-    {
-      "contentPaths": {
-        "contentPath": "docs",
-        "contentPathLocalized": "i18n/docs-localized",
-      },
-      "filePath": "docs/intro/intro.md",
-      "link": "/another.md",
-    },
-  ],
-  "newContent": "
-[Relative link](/docs/another)
-[Relative link 2](/docs/api/classes/uri)
-[Relative link that should be absolute](./api/classes/divine_uri.URI.md)
-[Absolute link](/docs/api/classes/uri)
-[Absolute link from site dir](/docs/api/classes/uri)
-[Absolute link that should be relative](/another.md)
-[Relative link that acts as absolute](/docs/api/classes/uri)
-[Relative link that acts as relative](/docs/another)
-",
-}
-`;
--- a/packages/docusaurus-utils/src/tests/markdownLinks.test.ts
+++ b/packages/docusaurus-utils/src/tests/markdownLinks.test.ts
@ -5,401 +5,70 @@
 * LICENSE file in the root directory of this source tree.
 */

-import {replaceMarkdownLinks} from '../markdownLinks';
+import {resolveMarkdownLinkPathname} from '../markdownLinks';
+
+describe('resolveMarkdownLinkPathname', () => {
+  type Context = Parameters<typeof resolveMarkdownLinkPathname>[1];

-describe('replaceMarkdownLinks', () => {
  it('does basic replace', () => {
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/intro.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-        sourceToPermalink: {
-          '@site/docs/intro.md': '/docs/intro',
-          '@site/docs/foo.md': '/doc/foo',
-          '@site/docs/bar/baz.md': '/doc/baz',
-          '@site/docs/http.foo.md': '/doc/http',
-        },
-        fileString: `
-[foo](./foo.md)
-[baz](./bar/baz.md)
-[foo](foo.md)
-[http](http://github.com/facebook/docusaurus/README.md)
-[https](https://github.com/facebook/docusaurus/README.md)
-[asset](./foo.js)
-[asset as well](@site/docs/_partial.md)
-[looks like http...](http.foo.md)
-[nonexistent](hmmm.md)
-`,
-      }),
-    ).toMatchSnapshot();
-  });
+    const context: Context = {
+      siteDir: '.',
+      sourceFilePath: 'docs/intro.md',
+      contentPaths: {
+        contentPath: 'docs',
+        contentPathLocalized: 'i18n/docs-localized',
+      },
+      sourceToPermalink: {
+        '@site/docs/intro.md': '/docs/intro',
+        '@site/docs/foo.md': '/doc/foo',
+        '@site/docs/bar/baz.md': '/doc/baz',
+        '@site/docs/http.foo.md': '/doc/http',
+      },
+    };

-  it('replaces two links on the same line', () => {
-    // cSpell:ignore Goooooooooo
-    // This is a very arcane bug: if we continue matching using the previous
-    // matching index (as is the behavior of RegExp#exec), it will go right over
-    // the next Markdown link and fail to match the "Go" link. This only happens
-    // when: (1) the replaced link is much shorter than the Markdown path, (2)
-    // the next link is very close to the current one (e.g. here if it's not
-    // "Go" but "Goooooooooo", or if every link has the /docs/ prefix, the bug
-    // will not trigger because it won't overshoot)
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/intro.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-        sourceToPermalink: {
-          '@site/docs/intro.md': '/',
-          '@site/docs/programming-languages/typescript/typescript.md':
-            '/programming-languages/typescript/',
-          '@site/docs/programming-languages/go/go.md':
-            '/programming-languages/go/',
-        },
-        fileString: `[TypeScript](programming-languages/typescript/typescript.md) and [Go](programming-languages/go/go.md)`,
-      }),
-    ).toMatchSnapshot();
-  });
+    function test(linkPathname: string, expectedOutput: string) {
+      const output = resolveMarkdownLinkPathname(linkPathname, context);
+      expect(output).toEqual(expectedOutput);
+    }

-  it('replaces reference style Markdown links', () => {
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/intro/intro.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-
-        sourceToPermalink: {
-          '@site/docs/intro/intro.md': '/docs/intro',
-          '@site/docs/api/classes/divine_uri.URI.md': '/docs/api/classes/uri',
-        },
-
-        fileString: `
-The following operations are defined for [URI]s:
-
-* [info]: Returns metadata about the resource,
-* [list]: Returns metadata about the resource's children (like getting the content of a local directory).
-
-[URI]:    ../api/classes/divine_uri.URI.md
-[info]:   ../api/classes/divine_uri.URI.md#info
-[list]:   ../api/classes/divine_uri.URI.md#list
-      `,
-      }),
-    ).toMatchSnapshot();
+    test('./foo.md', '/doc/foo');
+    test('foo.md', '/doc/foo');
+    test('./bar/baz.md', '/doc/baz');
+    test('http.foo.md', '/doc/http');
+    test('@site/docs/_partial.md', null);
+    test('foo.js', null);
+    test('nonexistent.md', null);
+    test('https://github.com/facebook/docusaurus/README.md', null);
  });

  it('resolves absolute and relative links differently', () => {
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/intro/intro.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
+    const context: Context = {
+      siteDir: '.',
+      sourceFilePath: 'docs/intro/intro.md',
+      contentPaths: {
+        contentPath: 'docs',
+        contentPathLocalized: 'i18n/docs-localized',
+      },

-        sourceToPermalink: {
-          '@site/docs/intro/intro.md': '/docs/intro',
-          '@site/docs/intro/another.md': '/docs/another',
-          '@site/docs/api/classes/divine_uri.URI.md': '/docs/api/classes/uri',
-        },
+      sourceToPermalink: {
+        '@site/docs/intro/intro.md': '/docs/intro',
+        '@site/docs/intro/another.md': '/docs/another',
+        '@site/docs/api/classes/divine_uri.URI.md': '/docs/api/classes/uri',
+      },
+    };

-        fileString: `
-[Relative link](./another.md)
-[Relative link 2](../api/classes/divine_uri.URI.md)
-[Relative link that should be absolute](./api/classes/divine_uri.URI.md)
-[Absolute link](/api/classes/divine_uri.URI.md)
-[Absolute link from site dir](/docs/api/classes/divine_uri.URI.md)
-[Absolute link that should be relative](/another.md)
-[Relative link that acts as absolute](api/classes/divine_uri.URI.md)
-[Relative link that acts as relative](another.md)
-`,
-      }),
-    ).toMatchSnapshot();
-  });
+    function test(linkPathname: string, expectedOutput: string) {
+      const output = resolveMarkdownLinkPathname(linkPathname, context);
+      expect(output).toEqual(expectedOutput);
+    }

-  // TODO bad
-  it('ignores links in HTML comments', () => {
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/intro.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-        sourceToPermalink: {
-          '@site/docs/intro.md': '/docs/intro',
-        },
-        fileString: `
-<!-- [foo](./foo.md) -->
-<!--
-[foo](./foo.md)
-->
-`,
-      }),
-    ).toMatchSnapshot();
-  });
-
-  it('ignores links in fenced blocks', () => {
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/intro.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-        sourceToPermalink: {
-          '@site/docs/intro.md': '/docs/intro',
-        },
-        fileString: `
-\`\`\`
-[foo](foo.md)
-\`\`\`
-
-\`\`\`\`js
-[foo](foo.md)
-\`\`\`
-[foo](foo.md)
-\`\`\`
-[foo](foo.md)
-\`\`\`\`
-
-\`\`\`\`js
-[foo](foo.md)
-\`\`\`
-[foo](foo.md)
-\`\`\`\`
-
-~~~js
-[foo](foo.md)
-~~~
-
-~~~js
-[foo](foo.md)
-\`\`\`
-[foo](foo.md)
-\`\`\`
-[foo](foo.md)
-~~~
-`,
-      }),
-    ).toMatchSnapshot();
-  });
-
-  // FIXME
-  it('ignores links in inline code', () => {
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/intro.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-        sourceToPermalink: {
-          '@site/docs/intro.md': '/docs/intro',
-        },
-        fileString: `
-\`[foo](foo.md)\`
-`,
-      }),
-    ).toMatchSnapshot();
-  });
-
-  it('replaces links with same title as URL', () => {
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/intro.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-        sourceToPermalink: {
-          '@site/docs/intro.md': '/docs/intro',
-          '@site/docs/foo.md': '/docs/foo',
-        },
-        fileString: `
-[foo.md](foo.md)
-[./foo.md](<./foo.md>)
-[./foo.md](./foo.md)
-[foo.md](./foo.md)
-[./foo.md](foo.md)
-`,
-      }),
-    ).toMatchSnapshot();
-  });
-
-  it('replaces multiple links on same line', () => {
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/intro.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-        sourceToPermalink: {
-          '@site/docs/intro.md': '/docs/intro',
-          '@site/docs/a.md': '/docs/a',
-          '@site/docs/b.md': '/docs/b',
-          '@site/docs/c.md': '/docs/c',
-        },
-        fileString: `
-[a](a.md), [a](a.md), [b](b.md), [c](c.md)
-`,
-      }),
-    ).toMatchSnapshot();
-  });
-
-  it('replaces Markdown links with spaces', () => {
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/intro.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-        sourceToPermalink: {
-          '@site/docs/doc a.md': '/docs/doc%20a',
-          '@site/docs/my docs/doc b.md': '/docs/my%20docs/doc%20b',
-        },
-        fileString: `
-[doc a](./doc%20a.md)
-[doc a](<./doc a.md>)
-[doc b](./my%20docs/doc%20b.md)
-[doc b](<./my docs/doc b.md>)
-[doc]: <./my docs/doc b.md>
-`,
-      }),
-    ).toMatchSnapshot();
-  });
-
-  it('does not replace non-Markdown links', () => {
-    const input = `
-[asset](./file.md_asset/1.png)
-[URL](<https://example.com/file_(1).md>)
-[not a link]((foo)
-[not a link](foo bar)
-[not a link]: foo bar
-[not a link]: (foo
-[not a link]: bar)
-`;
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/file.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-        sourceToPermalink: {
-          '@site/docs/file.md': '/docs/file',
-        },
-        fileString: input,
-      }),
-    ).toEqual({
-      newContent: input,
-      brokenMarkdownLinks: [],
-    });
-  });
-
-  it('handles stray spaces', () => {
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/file.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-        sourceToPermalink: {
-          '@site/docs/file.md': '/docs/file',
-        },
-        fileString: `
-[URL]( ./file.md )
-[ref]:  ./file.md
-`,
-      }),
-    ).toMatchSnapshot();
-  });
-
-  it('handles link titles', () => {
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/file.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-        sourceToPermalink: {
-          '@site/docs/file.md': '/docs/file',
-        },
-        fileString: `
-[URL](./file.md "title")
-[URL](./file.md 'title')
-[URL](./file.md (title))
-`,
-      }),
-    ).toMatchSnapshot();
-  });
-
-  it('preserves query/hash', () => {
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/file.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-        sourceToPermalink: {
-          '@site/docs/file.md': '/docs/file',
-        },
-        fileString: `
-[URL](./file.md?foo=bar#baz)
-[URL](./file.md#a)
-[URL](./file.md?c)
-`,
-      }),
-    ).toMatchSnapshot();
-  });
-
-  it('handles unpaired fences', () => {
-    expect(
-      replaceMarkdownLinks({
-        siteDir: '.',
-        filePath: 'docs/file.md',
-        contentPaths: {
-          contentPath: 'docs',
-          contentPathLocalized: 'i18n/docs-localized',
-        },
-        sourceToPermalink: {
-          '@site/docs/file.md': '/docs/file',
-        },
-        fileString: `
-\`\`\`foo
-hello
-
-\`\`\`foo
-hello
-\`\`\`
-
-A [link](./file.md)
-`,
-      }),
-    ).toMatchSnapshot();
+    test('./another.md', '/docs/another');
+    test('../api/classes/divine_uri.URI.md', '/docs/api/classes/uri');
+    test('./api/classes/divine_uri.URI.md', null);
+    test('/api/classes/divine_uri.URI.md', '/docs/api/classes/uri');
+    test('/docs/api/classes/divine_uri.URI.md', '/docs/api/classes/uri');
+    test('/another.md', null);
+    test('api/classes/divine_uri.URI.md', '/docs/api/classes/uri');
+    test('another.md', '/docs/another');
  });
 });
--- a/packages/docusaurus-utils/src/tests/urlUtils.test.ts
+++ b/packages/docusaurus-utils/src/tests/urlUtils.test.ts
@ -17,6 +17,9 @@ import {
  hasSSHProtocol,
  parseURLPath,
  serializeURLPath,
+  parseURLOrPath,
+  toURLPath,
+  parseLocalURLPath,
 } from '../urlUtils';

 describe('normalizeUrl', () => {
@ -228,6 +231,166 @@ describe('isValidPathname', () => {
  });
 });

+describe('toURLPath', () => {
+  it('url', () => {
+    const url = new URL('https://example.com/pathname?qs#hash');
+    expect(toURLPath(url)).toEqual({
+      pathname: '/pathname',
+      search: 'qs',
+      hash: 'hash',
+    });
+  });
+
+  it('pathname + qs', () => {
+    const url = parseURLOrPath('/pathname?qs');
+    expect(toURLPath(url)).toEqual({
+      pathname: '/pathname',
+      search: 'qs',
+      hash: undefined,
+    });
+  });
+
+  it('pathname + hash', () => {
+    const url = parseURLOrPath('/pathname#hash');
+    expect(toURLPath(url)).toEqual({
+      pathname: '/pathname',
+      search: undefined,
+      hash: 'hash',
+    });
+  });
+
+  it('pathname + qs + hash', () => {
+    const url = parseURLOrPath('/pathname?qs#hash');
+    expect(toURLPath(url)).toEqual({
+      pathname: '/pathname',
+      search: 'qs',
+      hash: 'hash',
+    });
+  });
+
+  it('pathname + empty qs + empty hash', () => {
+    const url = parseURLOrPath('/pathname?#');
+    expect(toURLPath(url)).toEqual({
+      pathname: '/pathname',
+      search: '',
+      hash: '',
+    });
+  });
+});
+
+describe('parseLocalURLPath', () => {
+  it('returns null for non-local URLs', () => {
+    expect(parseLocalURLPath('https://example')).toBeNull();
+    expect(parseLocalURLPath('https://example:80')).toBeNull();
+    expect(parseLocalURLPath('https://example.com/xyz')).toBeNull();
+    expect(parseLocalURLPath('https://example.com/xyz?qs#hash')).toBeNull();
+    expect(parseLocalURLPath('https://example.com:80/xyz?qs#hash')).toBeNull();
+    expect(parseLocalURLPath('https://u:p@example:80/xyz?qs#hash')).toBeNull();
+  });
+
+  it('parses pathname', () => {
+    expect(parseLocalURLPath('/pathname')).toEqual({
+      pathname: '/pathname',
+      search: undefined,
+      hash: undefined,
+    });
+    expect(parseLocalURLPath('pathname.md')).toEqual({
+      pathname: 'pathname.md',
+      search: undefined,
+      hash: undefined,
+    });
+    expect(parseLocalURLPath('./pathname')).toEqual({
+      pathname: './pathname',
+      search: undefined,
+      hash: undefined,
+    });
+    expect(parseLocalURLPath('../../pathname.mdx')).toEqual({
+      pathname: '../../pathname.mdx',
+      search: undefined,
+      hash: undefined,
+    });
+  });
+
+  it('parses qs', () => {
+    expect(parseLocalURLPath('?')).toEqual({
+      pathname: '',
+      search: '',
+      hash: undefined,
+    });
+    expect(parseLocalURLPath('?qs')).toEqual({
+      pathname: '',
+      search: 'qs',
+      hash: undefined,
+    });
+    expect(parseLocalURLPath('?age=42')).toEqual({
+      pathname: '',
+      search: 'age=42',
+      hash: undefined,
+    });
+  });
+
+  it('parses hash', () => {
+    expect(parseLocalURLPath('#')).toEqual({
+      pathname: '',
+      search: undefined,
+      hash: '',
+    });
+    expect(parseLocalURLPath('#hash')).toEqual({
+      pathname: '',
+      search: undefined,
+      hash: 'hash',
+    });
+  });
+
+  it('parses complex local paths', () => {
+    expect(
+      parseLocalURLPath('../../great/path name/doc.mdx?age=42#hash'),
+    ).toEqual({
+      pathname: '../../great/path name/doc.mdx',
+      search: 'age=42',
+      hash: 'hash',
+    });
+    expect(parseLocalURLPath('my great path?=42#hash?qsInHash')).toEqual({
+      pathname: 'my great path',
+      search: '=42',
+      hash: 'hash?qsInHash',
+    });
+    expect(parseLocalURLPath('?qs1#hash1?qs2#hash2')).toEqual({
+      pathname: '',
+      search: 'qs1',
+      hash: 'hash1?qs2#hash2',
+    });
+    expect(parseLocalURLPath('../swizzling.mdx#wrapping')).toEqual({
+      pathname: '../swizzling.mdx',
+      search: undefined,
+      hash: 'wrapping',
+    });
+  });
+
+  it('parses is isomorphic with serialize', () => {
+    const testLocalPath = (url: string) => {
+      expect(serializeURLPath(parseLocalURLPath(url)!)).toBe(url);
+    };
+    [
+      '',
+      'doc',
+      'doc.mdx',
+      './doc.mdx',
+      '.././doc.mdx',
+      '/some pathname/.././doc.mdx',
+      '?',
+      '?qs',
+      '#',
+      '#hash',
+      '?qs#hash',
+      '?qs#hash',
+      'doc.mdx?qs#hash',
+      '/some pathname/.././doc.mdx?qs#hash',
+      '/some pathname/.././doc.mdx?qs#hash?qs2#hash2',
+    ].forEach(testLocalPath);
+  });
+});
+
 describe('parseURLPath', () => {
  it('parse and resolve pathname', () => {
    expect(parseURLPath('')).toEqual({
--- a/packages/docusaurus-utils/src/index.ts
+++ b/packages/docusaurus-utils/src/index.ts
@ -44,6 +44,9 @@ export {
  isValidPathname,
  resolvePathname,
  parseURLPath,
+  parseLocalURLPath,
+  parseURLOrPath,
+  toURLPath,
  serializeURLPath,
  hasSSHProtocol,
  buildHttpsUrl,
@ -71,11 +74,7 @@ export {
  writeMarkdownHeadingId,
  type WriteHeadingIDOptions,
 } from './markdownUtils';
-export {
-  type ContentPaths,
-  type BrokenMarkdownLink,
-  replaceMarkdownLinks,
-} from './markdownLinks';
+export {type ContentPaths, resolveMarkdownLinkPathname} from './markdownLinks';
 export {type SluggerOptions, type Slugger, createSlugger} from './slugger';
 export {
  isNameTooLong,
--- a/packages/docusaurus-utils/src/markdownLinks.ts
+++ b/packages/docusaurus-utils/src/markdownLinks.ts
@ -40,159 +40,35 @@ export type BrokenMarkdownLink<T extends ContentPaths> = {
  link: string;
 };

-type CodeFence = {
-  type: '`' | '~';
-  definitelyOpen: boolean;
-  count: number;
-};
-
-function parseCodeFence(line: string): CodeFence | null {
-  const match = line.trim().match(/^(?<fence>`{3,}|~{3,})(?<rest>.*)/);
-  if (!match) {
-    return null;
+// Note this is historical logic extracted during a 2024 refactor
+// The algo has been kept exactly as before for retro compatibility
+// See also https://github.com/facebook/docusaurus/pull/10168
+export function resolveMarkdownLinkPathname(
+  linkPathname: string,
+  context: {
+    sourceFilePath: string;
+    sourceToPermalink: {[aliasedFilePath: string]: string};
+    contentPaths: ContentPaths;
+    siteDir: string;
+  },
+): string | null {
+  const {sourceFilePath, sourceToPermalink, contentPaths, siteDir} = context;
+  const sourceDirsToTry: string[] = [];
+  // ./file.md and ../file.md are always relative to the current file
+  if (!linkPathname.startsWith('./') && !linkPathname.startsWith('../')) {
+    sourceDirsToTry.push(...getContentPathList(contentPaths), siteDir);
  }
-  return {
-    type: match.groups!.fence![0]! as '`' | '~',
-    definitelyOpen: !!match.groups!.rest!,
-    count: match.groups!.fence!.length,
-  };
-}
-
-/**
- * Takes a Markdown file and replaces relative file references with their URL
- * counterparts, e.g. `[link](./intro.md)` => `[link](/docs/intro)`, preserving
- * everything else.
- *
- * This method uses best effort to find a matching file. The file reference can
- * be relative to the directory of the current file (most likely) or any of the
- * content paths (so `/tutorials/intro.md` can be resolved as
- * `<siteDir>/docs/tutorials/intro.md`). Links that contain the `http(s):` or
- * `@site/` prefix will always be ignored.
- */
-export function replaceMarkdownLinks<T extends ContentPaths>({
-  siteDir,
-  fileString,
-  filePath,
-  contentPaths,
-  sourceToPermalink,
-}: {
-  /** Absolute path to the site directory, used to resolve aliased paths. */
-  siteDir: string;
-  /** The Markdown file content to be processed. */
-  fileString: string;
-  /** Absolute path to the current file containing `fileString`. */
-  filePath: string;
-  /** The content paths which the file reference may live in. */
-  contentPaths: T;
-  /**
-   * A map from source paths to their URLs. Source paths are `@site` aliased.
-   */
-  sourceToPermalink: {[aliasedPath: string]: string};
-}): {
-  /**
-   * The content with all Markdown file references replaced with their URLs.
-   * Unresolved links are left as-is.
-   */
-  newContent: string;
-  /** The list of broken links,  */
-  brokenMarkdownLinks: BrokenMarkdownLink<T>[];
-} {
-  const brokenMarkdownLinks: BrokenMarkdownLink<T>[] = [];
-
-  // Replace internal markdown linking (except in fenced blocks).
-  let lastOpenCodeFence: CodeFence | null = null;
-  const lines = fileString.split('\n').map((line) => {
-    const codeFence = parseCodeFence(line);
-    if (codeFence) {
-      if (!lastOpenCodeFence) {
-        lastOpenCodeFence = codeFence;
-      } else if (
-        !codeFence.definitelyOpen &&
-        lastOpenCodeFence.type === codeFence.type &&
-        lastOpenCodeFence.count <= codeFence.count
-      ) {
-        // All three conditions must be met in order for this to be considered
-        // a closing fence.
-        lastOpenCodeFence = null;
-      }
-    }
-    if (lastOpenCodeFence) {
-      return line;
-    }
-
-    let modifiedLine = line;
-    // Replace inline-style links or reference-style links e.g:
-    // This is [Document 1](doc1.md)
-    // [doc1]: doc1.md
-    const linkTitlePattern = '(?:\\s+(?:\'.*?\'|".*?"|\\(.*?\\)))?';
-    const linkSuffixPattern = '(?:\\?[^#>\\s]+)?(?:#[^>\\s]+)?';
-    const linkCapture = (forbidden: string) =>
-      `((?!https?://|@site/)[^${forbidden}#?]+)`;
-    const linkURLPattern = `(?:(?!<)${linkCapture(
-      '()\\s',
-    )}${linkSuffixPattern}|<${linkCapture('>')}${linkSuffixPattern}>)`;
-    const linkPattern = new RegExp(
-      `\\[(?:(?!\\]\\().)*\\]\\(\\s*${linkURLPattern}${linkTitlePattern}\\s*\\)|^\\s*\\[[^[\\]]*[^[\\]\\s][^[\\]]*\\]:\\s*${linkURLPattern}${linkTitlePattern}$`,
-      'dgm',
-    );
-    let mdMatch = linkPattern.exec(modifiedLine);
-    while (mdMatch !== null) {
-      // Replace it to correct html link.
-      const mdLink = mdMatch.slice(1, 5).find(Boolean)!;
-      const mdLinkRange = mdMatch.indices!.slice(1, 5).find(Boolean)!;
-      if (!/\.mdx?$/.test(mdLink)) {
-        mdMatch = linkPattern.exec(modifiedLine);
-        continue;
-      }
-
-      const sourcesToTry: string[] = [];
-      // ./file.md and ../file.md are always relative to the current file
-      if (!mdLink.startsWith('./') && !mdLink.startsWith('../')) {
-        sourcesToTry.push(...getContentPathList(contentPaths), siteDir);
-      }
-      // /file.md is always relative to the content path
-      if (!mdLink.startsWith('/')) {
-        sourcesToTry.push(path.dirname(filePath));
-      }
-
-      const aliasedSourceMatch = sourcesToTry
-        .map((p) => path.join(p, decodeURIComponent(mdLink)))
-        .map((source) => aliasedSitePath(source, siteDir))
-        .find((source) => sourceToPermalink[source]);
-
-      const permalink: string | undefined = aliasedSourceMatch
-        ? sourceToPermalink[aliasedSourceMatch]
-        : undefined;
-
-      if (permalink) {
-        // MDX won't be happy if the permalink contains a space, we need to
-        // convert it to %20
-        const encodedPermalink = permalink
-          .split('/')
-          .map((part) => part.replace(/\s/g, '%20'))
-          .join('/');
-        modifiedLine = `${modifiedLine.slice(
-          0,
-          mdLinkRange[0],
-        )}${encodedPermalink}${modifiedLine.slice(mdLinkRange[1])}`;
-        // Adjust the lastIndex to avoid passing over the next link if the
-        // newly replaced URL is shorter.
-        linkPattern.lastIndex += encodedPermalink.length - mdLink.length;
-      } else {
-        const brokenMarkdownLink: BrokenMarkdownLink<T> = {
-          contentPaths,
-          filePath,
-          link: mdLink,
-        };
-
-        brokenMarkdownLinks.push(brokenMarkdownLink);
-      }
-      mdMatch = linkPattern.exec(modifiedLine);
-    }
-    return modifiedLine;
-  });
-
-  const newContent = lines.join('\n');
-
-  return {newContent, brokenMarkdownLinks};
+  // /file.md is never relative to the source file path
+  if (!linkPathname.startsWith('/')) {
+    sourceDirsToTry.push(path.dirname(sourceFilePath));
+  }
+
+  const aliasedSourceMatch = sourceDirsToTry
+    .map((sourceDir) => path.join(sourceDir, decodeURIComponent(linkPathname)))
+    .map((source) => aliasedSitePath(source, siteDir))
+    .find((source) => sourceToPermalink[source]);
+
+  return aliasedSourceMatch
+    ? sourceToPermalink[aliasedSourceMatch] ?? null
+    : null;
 }
--- a/packages/docusaurus-utils/src/urlUtils.ts
+++ b/packages/docusaurus-utils/src/urlUtils.ts
@ -164,27 +164,22 @@ export function isValidPathname(str: string): boolean {
  }
 }

+export function parseURLOrPath(url: string, base?: string | URL): URL {
+  try {
+    // TODO when Node supports it, use URL.parse could be faster?
+    //  see https://kilianvalkhof.com/2024/javascript/the-problem-with-new-url-and-how-url-parse-fixes-that/
+    return new URL(url, base ?? 'https://example.com');
+  } catch (e) {
+    throw new Error(
+      `Can't parse URL ${url}${base ? ` with base ${base}` : ''}`,
+      {cause: e},
+    );
+  }
+}
+
 export type URLPath = {pathname: string; search?: string; hash?: string};

-// Let's name the concept of (pathname + search + hash) as URLPath
-// See also https://twitter.com/kettanaito/status/1741768992866308120
-// Note: this function also resolves relative pathnames while parsing!
-export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
-  function parseURL(url: string, base?: string | URL): URL {
-    try {
-      // A possible alternative? https://github.com/unjs/ufo#url
-      return new URL(url, base ?? 'https://example.com');
-    } catch (e) {
-      throw new Error(
-        `Can't parse URL ${url}${base ? ` with base ${base}` : ''}`,
-        {cause: e},
-      );
-    }
-  }
-
-  const base = fromPath ? parseURL(fromPath) : undefined;
-  const url = parseURL(urlPath, base);
-
+export function toURLPath(url: URL): URLPath {
  const {pathname} = url;

  // Fixes annoying url.search behavior
@ -193,17 +188,17 @@ export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
  // "?param => "param"
  const search = url.search
    ? url.search.slice(1)
-    : urlPath.includes('?')
+    : url.href.includes('?')
    ? ''
    : undefined;

  // Fixes annoying url.hash behavior
  // "" => undefined
  // "#" => ""
-  // "?param => "param"
+  // "#param => "param"
  const hash = url.hash
    ? url.hash.slice(1)
-    : urlPath.includes('#')
+    : url.href.includes('#')
    ? ''
    : undefined;

@ -214,6 +209,65 @@ export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
  };
 }

+/**
+ * Let's name the concept of (pathname + search + hash) as URLPath
+ * See also https://twitter.com/kettanaito/status/1741768992866308120
+ * Note: this function also resolves relative pathnames while parsing!
+ */
+export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
+  const base = fromPath ? parseURLOrPath(fromPath) : undefined;
+  const url = parseURLOrPath(urlPath, base);
+  return toURLPath(url);
+}
+
+/**
+ * This returns results for strings like "foo", "../foo", "./foo.mdx?qs#hash"
+ * Unlike "parseURLPath()" above, this will not resolve the pathnames
+ * Te returned pathname of "../../foo.mdx" will be "../../foo.mdx", not "/foo"
+ * This returns null if the url is not "local" (contains domain/protocol etc)
+ */
+export function parseLocalURLPath(urlPath: string): URLPath | null {
+  // Workaround because URL("") requires a protocol
+  const unspecifiedProtocol = 'unspecified:';
+
+  const url = parseURLOrPath(urlPath, `${unspecifiedProtocol}//`);
+  // Ignore links with specified protocol / host
+  // (usually fully qualified links starting with https://)
+  if (
+    url.protocol !== unspecifiedProtocol ||
+    url.host !== '' ||
+    url.username !== '' ||
+    url.password !== ''
+  ) {
+    return null;
+  }
+
+  // We can't use "new URL()" result because it always tries to resolve urls
+  // IE it will remove any "./" or "../" in the pathname, which we don't want
+  // We have to parse it manually...
+  let localUrlPath = urlPath;
+
+  // Extract and remove the #hash part
+  const hashIndex = localUrlPath.indexOf('#');
+  const hash =
+    hashIndex !== -1 ? localUrlPath.substring(hashIndex + 1) : undefined;
+  localUrlPath =
+    hashIndex !== -1 ? localUrlPath.substring(0, hashIndex) : localUrlPath;
+
+  // Extract and remove ?search part
+  const searchIndex = localUrlPath.indexOf('?');
+  const search =
+    searchIndex !== -1 ? localUrlPath.substring(searchIndex + 1) : undefined;
+  localUrlPath =
+    searchIndex !== -1 ? localUrlPath.substring(0, searchIndex) : localUrlPath;
+
+  return {
+    pathname: localUrlPath,
+    search,
+    hash,
+  };
+}
+
 export function serializeURLPath(urlPath: URLPath): string {
  const search = urlPath.search === undefined ? '' : `?${urlPath.search}`;
  const hash = urlPath.hash === undefined ? '' : `#${urlPath.hash}`;