fix(mdx-loader): resolve Markdown/MDX links with Remark instead of RegExp (#10168)

2025-08-06 02:08:55 +02:00 · 2024-05-24 19:03:23 +02:00 · 2024-05-24 19:03:23 +02:00 · e34614963e
commit e34614963e
parent aab332c2ae
36 changed files with 902 additions and 1620 deletions
--- a/packages/docusaurus-utils/src/urlUtils.ts
+++ b/packages/docusaurus-utils/src/urlUtils.ts
@ -164,27 +164,22 @@ export function isValidPathname(str: string): boolean {
  }
 }

+export function parseURLOrPath(url: string, base?: string | URL): URL {
+  try {
+    // TODO when Node supports it, use URL.parse could be faster?
+    //  see https://kilianvalkhof.com/2024/javascript/the-problem-with-new-url-and-how-url-parse-fixes-that/
+    return new URL(url, base ?? 'https://example.com');
+  } catch (e) {
+    throw new Error(
+      `Can't parse URL ${url}${base ? ` with base ${base}` : ''}`,
+      {cause: e},
+    );
+  }
+}
+
 export type URLPath = {pathname: string; search?: string; hash?: string};

-// Let's name the concept of (pathname + search + hash) as URLPath
-// See also https://twitter.com/kettanaito/status/1741768992866308120
-// Note: this function also resolves relative pathnames while parsing!
-export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
-  function parseURL(url: string, base?: string | URL): URL {
-    try {
-      // A possible alternative? https://github.com/unjs/ufo#url
-      return new URL(url, base ?? 'https://example.com');
-    } catch (e) {
-      throw new Error(
-        `Can't parse URL ${url}${base ? ` with base ${base}` : ''}`,
-        {cause: e},
-      );
-    }
-  }
-
-  const base = fromPath ? parseURL(fromPath) : undefined;
-  const url = parseURL(urlPath, base);
-
+export function toURLPath(url: URL): URLPath {
  const {pathname} = url;

  // Fixes annoying url.search behavior
@ -193,17 +188,17 @@ export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
  // "?param => "param"
  const search = url.search
    ? url.search.slice(1)
-    : urlPath.includes('?')
+    : url.href.includes('?')
    ? ''
    : undefined;

  // Fixes annoying url.hash behavior
  // "" => undefined
  // "#" => ""
-  // "?param => "param"
+  // "#param => "param"
  const hash = url.hash
    ? url.hash.slice(1)
-    : urlPath.includes('#')
+    : url.href.includes('#')
    ? ''
    : undefined;

@ -214,6 +209,65 @@ export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
  };
 }

+/**
+ * Let's name the concept of (pathname + search + hash) as URLPath
+ * See also https://twitter.com/kettanaito/status/1741768992866308120
+ * Note: this function also resolves relative pathnames while parsing!
+ */
+export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
+  const base = fromPath ? parseURLOrPath(fromPath) : undefined;
+  const url = parseURLOrPath(urlPath, base);
+  return toURLPath(url);
+}
+
+/**
+ * This returns results for strings like "foo", "../foo", "./foo.mdx?qs#hash"
+ * Unlike "parseURLPath()" above, this will not resolve the pathnames
+ * Te returned pathname of "../../foo.mdx" will be "../../foo.mdx", not "/foo"
+ * This returns null if the url is not "local" (contains domain/protocol etc)
+ */
+export function parseLocalURLPath(urlPath: string): URLPath | null {
+  // Workaround because URL("") requires a protocol
+  const unspecifiedProtocol = 'unspecified:';
+
+  const url = parseURLOrPath(urlPath, `${unspecifiedProtocol}//`);
+  // Ignore links with specified protocol / host
+  // (usually fully qualified links starting with https://)
+  if (
+    url.protocol !== unspecifiedProtocol ||
+    url.host !== '' ||
+    url.username !== '' ||
+    url.password !== ''
+  ) {
+    return null;
+  }
+
+  // We can't use "new URL()" result because it always tries to resolve urls
+  // IE it will remove any "./" or "../" in the pathname, which we don't want
+  // We have to parse it manually...
+  let localUrlPath = urlPath;
+
+  // Extract and remove the #hash part
+  const hashIndex = localUrlPath.indexOf('#');
+  const hash =
+    hashIndex !== -1 ? localUrlPath.substring(hashIndex + 1) : undefined;
+  localUrlPath =
+    hashIndex !== -1 ? localUrlPath.substring(0, hashIndex) : localUrlPath;
+
+  // Extract and remove ?search part
+  const searchIndex = localUrlPath.indexOf('?');
+  const search =
+    searchIndex !== -1 ? localUrlPath.substring(searchIndex + 1) : undefined;
+  localUrlPath =
+    searchIndex !== -1 ? localUrlPath.substring(0, searchIndex) : localUrlPath;
+
+  return {
+    pathname: localUrlPath,
+    search,
+    hash,
+  };
+}
+
 export function serializeURLPath(urlPath: URLPath): string {
  const search = urlPath.search === undefined ? '' : `?${urlPath.search}`;
  const hash = urlPath.hash === undefined ? '' : `#${urlPath.hash}`;