fix(mdx-loader): resolve Markdown/MDX links with Remark instead of RegExp (#10168)

This commit is contained in:
Sébastien Lorber 2024-05-24 19:03:23 +02:00 committed by GitHub
parent aab332c2ae
commit e34614963e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 902 additions and 1620 deletions

View file

@ -17,6 +17,7 @@ import stringifyObject from 'stringify-object';
import preprocessor from './preprocessor';
import {validateMDXFrontMatter} from './frontMatter';
import {createProcessorCached} from './processor';
import type {ResolveMarkdownLink} from './remark/resolveMarkdownLinks';
import type {MDXOptions} from './processor';
import type {MarkdownConfig} from '@docusaurus/types';
@ -45,6 +46,7 @@ export type Options = Partial<MDXOptions> & {
frontMatter: {[key: string]: unknown};
metadata: {[key: string]: unknown};
}) => {[key: string]: unknown};
resolveMarkdownLink?: ResolveMarkdownLink;
};
/**

View file

@ -10,6 +10,7 @@ import contentTitle from './remark/contentTitle';
import toc from './remark/toc';
import transformImage from './remark/transformImage';
import transformLinks from './remark/transformLinks';
import resolveMarkdownLinks from './remark/resolveMarkdownLinks';
import details from './remark/details';
import head from './remark/head';
import mermaid from './remark/mermaid';
@ -120,6 +121,13 @@ async function createProcessorFactory() {
siteDir: options.siteDir,
},
],
// TODO merge this with transformLinks?
options.resolveMarkdownLink
? [
resolveMarkdownLinks,
{resolveMarkdownLink: options.resolveMarkdownLink},
]
: undefined,
[
transformLinks,
{

View file

@ -0,0 +1,160 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import plugin from '..';
import type {PluginOptions} from '../index';
async function process(content: string) {
const {remark} = await import('remark');
const options: PluginOptions = {
resolveMarkdownLink: ({linkPathname}) => `/RESOLVED---${linkPathname}`,
};
const result = await remark().use(plugin, options).process(content);
return result.value;
}
describe('resolveMarkdownLinks remark plugin', () => {
it('resolves Markdown and MDX links', async () => {
/* language=markdown */
const content = `[link1](link1.mdx)
[link2](../myLink2.md) [link3](myLink3.md)
[link4](../myLink4.mdx?qs#hash) [link5](./../my/great/link5.md?#)
[link6](../myLink6.mdx?qs#hash)
[link7](<link with spaces 7.md?qs#hash>)
<b>[link8](/link8.md)</b>
[**link** \`9\`](/link9.md)
`;
const result = await process(content);
expect(result).toMatchInlineSnapshot(`
"[link1](/RESOLVED---link1.mdx)
[link2](/RESOLVED---../myLink2.md) [link3](/RESOLVED---myLink3.md)
[link4](/RESOLVED---../myLink4.mdx?qs#hash) [link5](/RESOLVED---./../my/great/link5.md?#)
[link6](/RESOLVED---../myLink6.mdx?qs#hash)
[link7](</RESOLVED---link with spaces 7.md?qs#hash>)
<b>[link8](/RESOLVED---/link8.md)</b>
[**link** \`9\`](/RESOLVED---/link9.md)
"
`);
});
it('skips non-Markdown links', async () => {
/* language=markdown */
const content = `[link1](./myLink1.m)
[link2](../myLink2mdx)
[link3](https://github.com/facebook/docusaurus/blob/main/README.md)
[link4](ftp:///README.mdx)
[link5](../link5.js)
[link6](../link6.jsx)
[link7](../link7.tsx)
<!--
[link8](link8.mdx)
-->
\`\`\`md
[link9](link9.md)
\`\`\`
`;
const result = await process(content);
expect(result).toMatchInlineSnapshot(`
"[link1](./myLink1.m)
[link2](../myLink2mdx)
[link3](https://github.com/facebook/docusaurus/blob/main/README.md)
[link4](ftp:///README.mdx)
[link5](../link5.js)
[link6](../link6.jsx)
[link7](../link7.tsx)
<!--
[link8](link8.mdx)
-->
\`\`\`md
[link9](link9.md)
\`\`\`
"
`);
});
it('keeps regular Markdown unmodified', async () => {
/* language=markdown */
const content = `# Title
Simple link
\`\`\`js
this is a code block
\`\`\`
`;
const result = await process(content);
expect(result).toEqual(content);
});
it('supports link references', async () => {
/* language=markdown */
const content = `Testing some link refs:
* [link-ref1]
* [link-ref2]
* [link-ref3]
[link-ref1]: target.mdx
[link-ref2]: ./target.mdx
[link-ref3]: ../links/target.mdx?qs#target-heading
`;
const result = await process(content);
expect(result).toMatchInlineSnapshot(`
"Testing some link refs:
* [link-ref1]
* [link-ref2]
* [link-ref3]
[link-ref1]: /RESOLVED---target.mdx
[link-ref2]: /RESOLVED---./target.mdx
[link-ref3]: /RESOLVED---../links/target.mdx?qs#target-heading
"
`);
});
});

View file

@ -0,0 +1,96 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import {
parseLocalURLPath,
serializeURLPath,
type URLPath,
} from '@docusaurus/utils';
// @ts-expect-error: TODO see https://github.com/microsoft/TypeScript/issues/49721
import type {Transformer} from 'unified';
import type {Definition, Link} from 'mdast';
type ResolveMarkdownLinkParams = {
/**
* Absolute path to the source file containing this Markdown link.
*/
sourceFilePath: string;
/**
* The Markdown link pathname to resolve, as found in the source file.
* If the link is "./myFile.mdx?qs#hash", this will be "./myFile.mdx"
*/
linkPathname: string;
};
export type ResolveMarkdownLink = (
params: ResolveMarkdownLinkParams,
) => string | null;
export interface PluginOptions {
resolveMarkdownLink: ResolveMarkdownLink;
}
// TODO as of April 2023, no way to import/re-export this ESM type easily :/
// TODO upgrade to TS 5.3
// See https://github.com/microsoft/TypeScript/issues/49721#issuecomment-1517839391
// import type {Plugin} from 'unified';
type Plugin = any; // TODO fix this asap
const HAS_MARKDOWN_EXTENSION = /\.mdx?$/i;
function parseMarkdownLinkURLPath(link: string): URLPath | null {
const urlPath = parseLocalURLPath(link);
// If it's not local, we don't resolve it even if it's a Markdown file
// Example, we don't resolve https://github.com/project/README.md
if (!urlPath) {
return null;
}
// Ignore links without a Markdown file extension (ignoring qs/hash)
if (!HAS_MARKDOWN_EXTENSION.test(urlPath.pathname)) {
return null;
}
return urlPath;
}
/**
* A remark plugin to extract the h1 heading found in Markdown files
* This is exposed as "data.contentTitle" to the processed vfile
* Also gives the ability to strip that content title (used for the blog plugin)
*/
const plugin: Plugin = function plugin(options: PluginOptions): Transformer {
const {resolveMarkdownLink} = options;
return async (root, file) => {
const {visit} = await import('unist-util-visit');
visit(root, ['link', 'definition'], (node) => {
const link = node as unknown as Link | Definition;
const linkURLPath = parseMarkdownLinkURLPath(link.url);
if (!linkURLPath) {
return;
}
const permalink = resolveMarkdownLink({
sourceFilePath: file.path,
linkPathname: linkURLPath.pathname,
});
if (permalink) {
// This reapplies the link ?qs#hash part to the resolved pathname
const resolvedUrl = serializeURLPath({
...linkURLPath,
pathname: permalink,
});
link.url = resolvedUrl;
}
});
};
};
export default plugin;

View file

@ -1,28 +1,5 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`linkify reports broken markdown links 1`] = `
"---
title: This post links to another one!
---
[Good link 1](/blog/2018/12/14/Happy-First-Birthday-Slash)
[Good link 2](/blog/2018/12/14/Happy-First-Birthday-Slash)
[Bad link 1](postNotExist1.md)
[Bad link 1](./postNotExist2.mdx)
"
`;
exports[`linkify transforms to correct link 1`] = `
"---
title: This post links to another one!
---
[Linked post](/blog/2018/12/14/Happy-First-Birthday-Slash)"
`;
exports[`paginateBlogPosts generates a single page 1`] = `
[
{

View file

@ -5,20 +5,13 @@
* LICENSE file in the root directory of this source tree.
*/
import {jest} from '@jest/globals';
import fs from 'fs-extra';
import path from 'path';
import {fromPartial} from '@total-typescript/shoehorn';
import {
truncate,
parseBlogFileName,
linkify,
getSourceToPermalink,
paginateBlogPosts,
applyProcessBlogPosts,
type LinkifyParams,
} from '../blogUtils';
import type {BlogBrokenMarkdownLink, BlogContentPaths} from '../types';
import type {BlogPost} from '@docusaurus/plugin-content-blog';
describe('truncate', () => {
@ -209,95 +202,6 @@ describe('parseBlogFileName', () => {
});
});
describe('linkify', () => {
const siteDir = path.join(__dirname, '__fixtures__', 'website');
const contentPaths: BlogContentPaths = {
contentPath: path.join(siteDir, 'blog-with-ref'),
contentPathLocalized: path.join(siteDir, 'blog-with-ref-localized'),
};
const pluginDir = 'blog-with-ref';
const blogPosts: BlogPost[] = [
{
id: 'Happy 1st Birthday Slash!',
metadata: {
permalink: '/blog/2018/12/14/Happy-First-Birthday-Slash',
source: path.posix.join(
'@site',
pluginDir,
'2018-12-14-Happy-First-Birthday-Slash.md',
),
title: 'Happy 1st Birthday Slash!',
description: `pattern name`,
date: new Date('2018-12-14'),
tags: [],
prevItem: {
permalink: '/blog/2019/01/01/date-matter',
title: 'date-matter',
},
hasTruncateMarker: false,
frontMatter: {},
authors: [],
unlisted: false,
},
content: '',
},
];
async function transform(filePath: string, options?: Partial<LinkifyParams>) {
const fileContent = await fs.readFile(filePath, 'utf-8');
const transformedContent = linkify({
filePath,
fileString: fileContent,
siteDir,
contentPaths,
sourceToPermalink: getSourceToPermalink(blogPosts),
onBrokenMarkdownLink: (brokenMarkdownLink) => {
throw new Error(
`Broken markdown link found: ${JSON.stringify(brokenMarkdownLink)}`,
);
},
...options,
});
return [fileContent, transformedContent];
}
it('transforms to correct link', async () => {
const post = path.join(contentPaths.contentPath, 'post.md');
const [content, transformedContent] = await transform(post);
expect(transformedContent).toMatchSnapshot();
expect(transformedContent).toContain(
'](/blog/2018/12/14/Happy-First-Birthday-Slash',
);
expect(transformedContent).not.toContain(
'](2018-12-14-Happy-First-Birthday-Slash.md)',
);
expect(content).not.toEqual(transformedContent);
});
it('reports broken markdown links', async () => {
const filePath = 'post-with-broken-links.md';
const folderPath = contentPaths.contentPath;
const postWithBrokenLinks = path.join(folderPath, filePath);
const onBrokenMarkdownLink = jest.fn();
const [, transformedContent] = await transform(postWithBrokenLinks, {
onBrokenMarkdownLink,
});
expect(transformedContent).toMatchSnapshot();
expect(onBrokenMarkdownLink).toHaveBeenCalledTimes(2);
expect(onBrokenMarkdownLink).toHaveBeenNthCalledWith(1, {
filePath: path.resolve(folderPath, filePath),
contentPaths,
link: 'postNotExist1.md',
} as BlogBrokenMarkdownLink);
expect(onBrokenMarkdownLink).toHaveBeenNthCalledWith(2, {
filePath: path.resolve(folderPath, filePath),
contentPaths,
link: './postNotExist2.mdx',
} as BlogBrokenMarkdownLink);
});
});
describe('processBlogPosts', () => {
const blogPost2022: BlogPost = fromPartial({
metadata: {date: new Date('2022-01-01')},

View file

@ -17,7 +17,6 @@ import {
getEditUrl,
getFolderContainingFile,
posixPath,
replaceMarkdownLinks,
Globby,
normalizeFrontMatterTags,
groupTaggedItems,
@ -38,7 +37,7 @@ import type {
BlogTags,
BlogPaginated,
} from '@docusaurus/plugin-content-blog';
import type {BlogContentPaths, BlogMarkdownLoaderOptions} from './types';
import type {BlogContentPaths} from './types';
export function truncate(fileString: string, truncateMarker: RegExp): string {
return fileString.split(truncateMarker, 1).shift()!;
@ -403,35 +402,6 @@ export async function generateBlogPosts(
return blogPosts;
}
export type LinkifyParams = {
filePath: string;
fileString: string;
} & Pick<
BlogMarkdownLoaderOptions,
'sourceToPermalink' | 'siteDir' | 'contentPaths' | 'onBrokenMarkdownLink'
>;
export function linkify({
filePath,
contentPaths,
fileString,
siteDir,
sourceToPermalink,
onBrokenMarkdownLink,
}: LinkifyParams): string {
const {newContent, brokenMarkdownLinks} = replaceMarkdownLinks({
siteDir,
fileString,
filePath,
contentPaths,
sourceToPermalink,
});
brokenMarkdownLinks.forEach((l) => onBrokenMarkdownLink(l));
return newContent;
}
export async function applyProcessBlogPosts({
blogPosts,
processBlogPosts,

View file

@ -18,6 +18,7 @@ import {
getContentPathList,
getDataFilePath,
DEFAULT_PLUGIN_ID,
resolveMarkdownLinkPathname,
} from '@docusaurus/utils';
import {
getSourceToPermalink,
@ -43,6 +44,8 @@ import type {
BlogContent,
BlogPaginated,
} from '@docusaurus/plugin-content-blog';
import type {Options as MDXLoaderOptions} from '@docusaurus/mdx-loader/lib/loader';
import type {RuleSetUseItem} from 'webpack';
const PluginName = 'docusaurus-plugin-content-blog';
@ -213,22 +216,81 @@ export default async function pluginContentBlog(
beforeDefaultRehypePlugins,
} = options;
const markdownLoaderOptions: BlogMarkdownLoaderOptions = {
siteDir,
contentPaths,
truncateMarker,
sourceToPermalink: getSourceToPermalink(content.blogPosts),
onBrokenMarkdownLink: (brokenMarkdownLink) => {
if (onBrokenMarkdownLinks === 'ignore') {
return;
}
logger.report(
onBrokenMarkdownLinks,
)`Blog markdown link couldn't be resolved: (url=${brokenMarkdownLink.link}) in path=${brokenMarkdownLink.filePath}`;
},
};
const sourceToPermalink = getSourceToPermalink(content.blogPosts);
const contentDirs = getContentPathList(contentPaths);
function createMDXLoader(): RuleSetUseItem {
const loaderOptions: MDXLoaderOptions = {
admonitions,
remarkPlugins,
rehypePlugins,
beforeDefaultRemarkPlugins: [
footnoteIDFixer,
...beforeDefaultRemarkPlugins,
],
beforeDefaultRehypePlugins,
staticDirs: siteConfig.staticDirectories.map((dir) =>
path.resolve(siteDir, dir),
),
siteDir,
isMDXPartial: createAbsoluteFilePathMatcher(
options.exclude,
contentDirs,
),
metadataPath: (mdxPath: string) => {
// Note that metadataPath must be the same/in-sync as
// the path from createData for each MDX.
const aliasedPath = aliasedSitePath(mdxPath, siteDir);
return path.join(dataDir, `${docuHash(aliasedPath)}.json`);
},
// For blog posts a title in markdown is always removed
// Blog posts title are rendered separately
removeContentTitle: true,
// Assets allow to convert some relative images paths to
// require() calls
// @ts-expect-error: TODO fix typing issue
createAssets: ({
frontMatter,
metadata,
}: {
frontMatter: BlogPostFrontMatter;
metadata: BlogPostMetadata;
}): Assets => ({
image: frontMatter.image,
authorsImageUrls: metadata.authors.map((author) => author.imageURL),
}),
markdownConfig: siteConfig.markdown,
resolveMarkdownLink: ({linkPathname, sourceFilePath}) => {
const permalink = resolveMarkdownLinkPathname(linkPathname, {
sourceFilePath,
sourceToPermalink,
siteDir,
contentPaths,
});
if (permalink === null) {
logger.report(
onBrokenMarkdownLinks,
)`Blog markdown link couldn't be resolved: (url=${linkPathname}) in source file path=${sourceFilePath}`;
}
return permalink;
},
};
return {
loader: require.resolve('@docusaurus/mdx-loader'),
options: loaderOptions,
};
}
function createBlogMarkdownLoader(): RuleSetUseItem {
const loaderOptions: BlogMarkdownLoaderOptions = {
truncateMarker,
};
return {
loader: path.resolve(__dirname, './markdownLoader.js'),
options: loaderOptions,
};
}
return {
resolve: {
alias: {
@ -242,61 +304,7 @@ export default async function pluginContentBlog(
include: contentDirs
// Trailing slash is important, see https://github.com/facebook/docusaurus/pull/3970
.map(addTrailingPathSeparator),
use: [
{
loader: require.resolve('@docusaurus/mdx-loader'),
options: {
admonitions,
remarkPlugins,
rehypePlugins,
beforeDefaultRemarkPlugins: [
footnoteIDFixer,
...beforeDefaultRemarkPlugins,
],
beforeDefaultRehypePlugins,
staticDirs: siteConfig.staticDirectories.map((dir) =>
path.resolve(siteDir, dir),
),
siteDir,
isMDXPartial: createAbsoluteFilePathMatcher(
options.exclude,
contentDirs,
),
metadataPath: (mdxPath: string) => {
// Note that metadataPath must be the same/in-sync as
// the path from createData for each MDX.
const aliasedPath = aliasedSitePath(mdxPath, siteDir);
return path.join(
dataDir,
`${docuHash(aliasedPath)}.json`,
);
},
// For blog posts a title in markdown is always removed
// Blog posts title are rendered separately
removeContentTitle: true,
// Assets allow to convert some relative images paths to
// require() calls
createAssets: ({
frontMatter,
metadata,
}: {
frontMatter: BlogPostFrontMatter;
metadata: BlogPostMetadata;
}): Assets => ({
image: frontMatter.image,
authorsImageUrls: metadata.authors.map(
(author) => author.imageURL,
),
}),
markdownConfig: siteConfig.markdown,
},
},
{
loader: path.resolve(__dirname, './markdownLoader.js'),
options: markdownLoaderOptions,
},
].filter(Boolean),
use: [createMDXLoader(), createBlogMarkdownLoader()],
},
],
},

View file

@ -5,7 +5,7 @@
* LICENSE file in the root directory of this source tree.
*/
import {truncate, linkify} from './blogUtils';
import {truncate} from './blogUtils';
import type {BlogMarkdownLoaderOptions} from './types';
import type {LoaderContext} from 'webpack';
@ -13,23 +13,19 @@ export default function markdownLoader(
this: LoaderContext<BlogMarkdownLoaderOptions>,
source: string,
): void {
const filePath = this.resourcePath;
const fileString = source;
const callback = this.async();
const markdownLoaderOptions = this.getOptions();
// Linkify blog posts
let finalContent = linkify({
fileString,
filePath,
...markdownLoaderOptions,
});
let finalContent = fileString;
// Truncate content if requested (e.g: file.md?truncated=true).
const truncated: boolean | undefined = this.resourceQuery
? !!new URLSearchParams(this.resourceQuery.slice(1)).get('truncated')
: undefined;
// TODO truncate with the AST instead of the string ?
if (truncated) {
finalContent = truncate(finalContent, markdownLoaderOptions.truncateMarker);
}

View file

@ -5,15 +5,10 @@
* LICENSE file in the root directory of this source tree.
*/
import type {BrokenMarkdownLink, ContentPaths} from '@docusaurus/utils';
import type {ContentPaths} from '@docusaurus/utils';
export type BlogContentPaths = ContentPaths;
export type BlogBrokenMarkdownLink = BrokenMarkdownLink<BlogContentPaths>;
export type BlogMarkdownLoaderOptions = {
siteDir: string;
contentPaths: BlogContentPaths;
truncateMarker: RegExp;
sourceToPermalink: {[aliasedPath: string]: string};
onBrokenMarkdownLink: (brokenMarkdownLink: BlogBrokenMarkdownLink) => void;
};

View file

@ -17,6 +17,7 @@ import {
addTrailingPathSeparator,
createAbsoluteFilePathMatcher,
createSlugger,
resolveMarkdownLinkPathname,
DEFAULT_PLUGIN_ID,
} from '@docusaurus/utils';
import {loadSidebars, resolveSidebarPathOption} from './sidebars';
@ -28,7 +29,11 @@ import {
type DocEnv,
createDocsByIdIndex,
} from './docs';
import {readVersionsMetadata, toFullVersion} from './versions';
import {
getVersionFromSourceFilePath,
readVersionsMetadata,
toFullVersion,
} from './versions';
import {cliDocsVersionCommand} from './cli';
import {VERSIONS_JSON_FILE} from './constants';
import {toGlobalDataVersion} from './globalData';
@ -38,6 +43,7 @@ import {
} from './translations';
import {createAllRoutes} from './routes';
import {createSidebarsUtils} from './sidebars/utils';
import type {Options as MDXLoaderOptions} from '@docusaurus/mdx-loader';
import type {
PluginOptions,
@ -48,13 +54,8 @@ import type {
LoadedVersion,
} from '@docusaurus/plugin-content-docs';
import type {LoadContext, Plugin} from '@docusaurus/types';
import type {
SourceToPermalink,
DocFile,
DocsMarkdownOption,
FullVersion,
} from './types';
import type {RuleSetRule} from 'webpack';
import type {SourceToPermalink, DocFile, FullVersion} from './types';
import type {RuleSetUseItem} from 'webpack';
export default async function pluginContentDocs(
context: LoadContext,
@ -251,72 +252,71 @@ export default async function pluginContentDocs(
beforeDefaultRemarkPlugins,
} = options;
const contentDirs = versionsMetadata
.flatMap(getContentPathList)
// Trailing slash is important, see https://github.com/facebook/docusaurus/pull/3970
.map(addTrailingPathSeparator);
// TODO this does not re-run when content gets updated in dev!
// it's probably better to restore a mutable cache in the plugin
function getSourceToPermalink(): SourceToPermalink {
const allDocs = content.loadedVersions.flatMap((v) => v.docs);
return Object.fromEntries(
allDocs.map(({source, permalink}) => [source, permalink]),
);
}
const sourceToPermalink = getSourceToPermalink();
const docsMarkdownOptions: DocsMarkdownOption = {
siteDir,
sourceToPermalink: getSourceToPermalink(),
versionsMetadata,
onBrokenMarkdownLink: (brokenMarkdownLink) => {
logger.report(
siteConfig.onBrokenMarkdownLinks,
)`Docs markdown link couldn't be resolved: (url=${brokenMarkdownLink.link}) in path=${brokenMarkdownLink.filePath} for version number=${brokenMarkdownLink.contentPaths.versionName}`;
},
};
function createMDXLoader(): RuleSetUseItem {
const loaderOptions: MDXLoaderOptions = {
admonitions: options.admonitions,
remarkPlugins,
rehypePlugins,
beforeDefaultRehypePlugins,
beforeDefaultRemarkPlugins,
staticDirs: siteConfig.staticDirectories.map((dir) =>
path.resolve(siteDir, dir),
),
siteDir,
isMDXPartial: createAbsoluteFilePathMatcher(
options.exclude,
contentDirs,
),
metadataPath: (mdxPath: string) => {
// Note that metadataPath must be the same/in-sync as
// the path from createData for each MDX.
const aliasedPath = aliasedSitePath(mdxPath, siteDir);
return path.join(dataDir, `${docuHash(aliasedPath)}.json`);
},
// Assets allow to convert some relative images paths to
// require(...) calls
createAssets: ({frontMatter}: {frontMatter: DocFrontMatter}) => ({
image: frontMatter.image,
}),
markdownConfig: siteConfig.markdown,
resolveMarkdownLink: ({linkPathname, sourceFilePath}) => {
const version = getVersionFromSourceFilePath(
sourceFilePath,
content.loadedVersions,
);
const permalink = resolveMarkdownLinkPathname(linkPathname, {
sourceFilePath,
sourceToPermalink,
siteDir,
contentPaths: version,
});
if (permalink === null) {
logger.report(
siteConfig.onBrokenMarkdownLinks,
)`Docs markdown link couldn't be resolved: (url=${linkPathname}) in source file path=${sourceFilePath} for version number=${version.versionName}`;
}
return permalink;
},
};
function createMDXLoaderRule(): RuleSetRule {
const contentDirs = versionsMetadata
.flatMap(getContentPathList)
// Trailing slash is important, see https://github.com/facebook/docusaurus/pull/3970
.map(addTrailingPathSeparator);
return {
test: /\.mdx?$/i,
include: contentDirs,
use: [
{
loader: require.resolve('@docusaurus/mdx-loader'),
options: {
admonitions: options.admonitions,
remarkPlugins,
rehypePlugins,
beforeDefaultRehypePlugins,
beforeDefaultRemarkPlugins,
staticDirs: siteConfig.staticDirectories.map((dir) =>
path.resolve(siteDir, dir),
),
siteDir,
isMDXPartial: createAbsoluteFilePathMatcher(
options.exclude,
contentDirs,
),
metadataPath: (mdxPath: string) => {
// Note that metadataPath must be the same/in-sync as
// the path from createData for each MDX.
const aliasedPath = aliasedSitePath(mdxPath, siteDir);
return path.join(dataDir, `${docuHash(aliasedPath)}.json`);
},
// Assets allow to convert some relative images paths to
// require(...) calls
createAssets: ({
frontMatter,
}: {
frontMatter: DocFrontMatter;
}) => ({
image: frontMatter.image,
}),
markdownConfig: siteConfig.markdown,
},
},
{
loader: path.resolve(__dirname, './markdown/index.js'),
options: docsMarkdownOptions,
},
].filter(Boolean),
loader: require.resolve('@docusaurus/mdx-loader'),
options: loaderOptions,
};
}
@ -333,7 +333,13 @@ export default async function pluginContentDocs(
},
},
module: {
rules: [createMDXLoaderRule()],
rules: [
{
test: /\.mdx?$/i,
include: contentDirs,
use: [createMDXLoader()],
},
],
},
};
},

View file

@ -1,13 +0,0 @@
# Don't transform any link here
![image1](assets/image1.png)
# Don't replace inside fenced codeblock
```md
![doc4](doc4.md)
```
### Non-existing Docs
- [hahaha](hahaha.md)

View file

@ -1,12 +0,0 @@
### Existing Docs
- [doc1](doc1.md)
- [doc2](./doc2.md)
- [doc3](subdir/doc3.md)
## Repeating Docs
- [doc1](doc1.md)
- [doc2](./doc2.md)
- [doc-localized](/doc-localized.md)

View file

@ -1,19 +0,0 @@
### Existing Docs
- [doc1][doc1]
- [doc2][doc2]
## Repeating Docs
- [doc1][doc1]
- [doc2][doc2]
## Do not replace this
```md
![image1][image1]
```
[doc1]: doc1.md
[doc2]: ./doc2.md
[image1]: assets/image1.png

View file

@ -1,6 +0,0 @@
### Not Existing Docs
- [docNotExist1](docNotExist1.md)
- [docNotExist2](./docNotExist2.mdx)
- [docNotExist3](../docNotExist3.mdx)
- [docNotExist4](./subdir/docNotExist4.md)

View file

@ -1,3 +0,0 @@
### Relative linking
- [doc1](../doc2.md)

View file

@ -1 +0,0 @@
[link](../docs/doc1.md)

View file

@ -1,7 +0,0 @@
### Existing Docs
- [doc1](subdir/doc1.md)
### With hash
- [doc2](doc2.md#existing-docs)

View file

@ -1,3 +0,0 @@
### Relative linking
- [doc1](../doc2.md)

View file

@ -1,82 +0,0 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`linkify transforms absolute links in versioned docs 1`] = `
"### Existing Docs
- [doc1](/docs/1.0.0/subdir/doc1)
### With hash
- [doc2](/docs/1.0.0/doc2#existing-docs)
"
`;
exports[`linkify transforms nothing with no links 1`] = `
"# Don't transform any link here
![image1](assets/image1.png)
# Don't replace inside fenced codeblock
\`\`\`md
![doc4](doc4.md)
\`\`\`
### Non-existing Docs
- [hahaha](hahaha.md)
"
`;
exports[`linkify transforms reference links 1`] = `
"### Existing Docs
- [doc1][doc1]
- [doc2][doc2]
## Repeating Docs
- [doc1][doc1]
- [doc2][doc2]
## Do not replace this
\`\`\`md
![image1][image1]
\`\`\`
[doc1]: /docs/doc1
[doc2]: /docs/doc2
[image1]: assets/image1.png
"
`;
exports[`linkify transforms relative links 1`] = `
"### Relative linking
- [doc1](/docs/doc2)
"
`;
exports[`linkify transforms relative links in versioned docs 1`] = `
"### Relative linking
- [doc1](/docs/1.0.0/doc2)
"
`;
exports[`linkify transforms to correct links 1`] = `
"### Existing Docs
- [doc1](/docs/doc1)
- [doc2](/docs/doc2)
- [doc3](/docs/subdir/doc3)
## Repeating Docs
- [doc1](/docs/doc1)
- [doc2](/docs/doc2)
- [doc-localized](/fr/doc-localized)
"
`;

View file

@ -1,210 +0,0 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import {jest} from '@jest/globals';
import fs from 'fs-extra';
import path from 'path';
import {linkify} from '../linkify';
import {VERSIONED_DOCS_DIR, CURRENT_VERSION_NAME} from '../../constants';
import type {
DocsMarkdownOption,
SourceToPermalink,
DocBrokenMarkdownLink,
} from '../../types';
import type {VersionMetadata} from '@docusaurus/plugin-content-docs';
function createFakeVersion({
versionName,
contentPath,
contentPathLocalized,
}: {
versionName: string;
contentPath: string;
contentPathLocalized: string;
}): VersionMetadata {
return {
versionName,
label: 'Any',
path: 'any',
badge: true,
banner: null,
tagsPath: '/tags/',
className: '',
contentPath,
contentPathLocalized,
sidebarFilePath: 'any',
routePriority: undefined,
isLast: false,
};
}
const siteDir = path.join(__dirname, '__fixtures__');
const versionCurrent = createFakeVersion({
versionName: CURRENT_VERSION_NAME,
contentPath: path.join(siteDir, 'docs'),
contentPathLocalized: path.join(
siteDir,
'i18n',
'fr',
'docusaurus-plugin-content-docs',
CURRENT_VERSION_NAME,
),
});
const version100 = createFakeVersion({
versionName: '1.0.0',
contentPath: path.join(siteDir, VERSIONED_DOCS_DIR, 'version-1.0.0'),
contentPathLocalized: path.join(
siteDir,
'i18n',
'fr',
'docusaurus-plugin-content-docs',
'version-1.0.0',
),
});
const sourceToPermalink: SourceToPermalink = {
'@site/docs/doc1.md': '/docs/doc1',
'@site/docs/doc2.md': '/docs/doc2',
'@site/docs/subdir/doc3.md': '/docs/subdir/doc3',
'@site/docs/doc4.md': '/docs/doc4',
'@site/versioned_docs/version-1.0.0/doc2.md': '/docs/1.0.0/doc2',
'@site/versioned_docs/version-1.0.0/subdir/doc1.md':
'/docs/1.0.0/subdir/doc1',
'@site/i18n/fr/docusaurus-plugin-content-docs/current/doc-localized.md':
'/fr/doc-localized',
'@site/docs/doc-localized.md': '/doc-localized',
};
function createMarkdownOptions(
options?: Partial<DocsMarkdownOption>,
): DocsMarkdownOption {
return {
sourceToPermalink,
onBrokenMarkdownLink: () => {},
versionsMetadata: [versionCurrent, version100],
siteDir,
...options,
};
}
const transform = async (
filepath: string,
options?: Partial<DocsMarkdownOption>,
) => {
const markdownOptions = createMarkdownOptions(options);
const content = await fs.readFile(filepath, 'utf-8');
const transformedContent = linkify(content, filepath, markdownOptions);
return [content, transformedContent];
};
describe('linkify', () => {
it('transforms nothing with no links', async () => {
const doc1 = path.join(versionCurrent.contentPath, 'doc1.md');
const [content, transformedContent] = await transform(doc1);
expect(transformedContent).toMatchSnapshot();
expect(content).toEqual(transformedContent);
});
it('transforms to correct links', async () => {
const doc2 = path.join(versionCurrent.contentPath, 'doc2.md');
const [content, transformedContent] = await transform(doc2);
expect(transformedContent).toMatchSnapshot();
expect(transformedContent).toContain('](/docs/doc1');
expect(transformedContent).toContain('](/docs/doc2');
expect(transformedContent).toContain('](/docs/subdir/doc3');
expect(transformedContent).toContain('](/fr/doc-localized');
expect(transformedContent).not.toContain('](doc1.md)');
expect(transformedContent).not.toContain('](./doc2.md)');
expect(transformedContent).not.toContain('](subdir/doc3.md)');
expect(transformedContent).not.toContain('](/doc-localized');
expect(content).not.toEqual(transformedContent);
});
it('transforms relative links', async () => {
const doc3 = path.join(versionCurrent.contentPath, 'subdir', 'doc3.md');
const [content, transformedContent] = await transform(doc3);
expect(transformedContent).toMatchSnapshot();
expect(transformedContent).toContain('](/docs/doc2');
expect(transformedContent).not.toContain('](../doc2.md)');
expect(content).not.toEqual(transformedContent);
});
it('transforms reference links', async () => {
const doc4 = path.join(versionCurrent.contentPath, 'doc4.md');
const [content, transformedContent] = await transform(doc4);
expect(transformedContent).toMatchSnapshot();
expect(transformedContent).toContain('[doc1]: /docs/doc1');
expect(transformedContent).toContain('[doc2]: /docs/doc2');
expect(transformedContent).not.toContain('[doc1]: doc1.md');
expect(transformedContent).not.toContain('[doc2]: ./doc2.md');
expect(content).not.toEqual(transformedContent);
});
it('reports broken markdown links', async () => {
const doc5 = path.join(versionCurrent.contentPath, 'doc5.md');
const onBrokenMarkdownLink = jest.fn();
const [content, transformedContent] = await transform(doc5, {
onBrokenMarkdownLink,
});
expect(transformedContent).toEqual(content);
expect(onBrokenMarkdownLink).toHaveBeenCalledTimes(4);
expect(onBrokenMarkdownLink).toHaveBeenNthCalledWith(1, {
filePath: doc5,
link: 'docNotExist1.md',
contentPaths: versionCurrent,
} as DocBrokenMarkdownLink);
expect(onBrokenMarkdownLink).toHaveBeenNthCalledWith(2, {
filePath: doc5,
link: './docNotExist2.mdx',
contentPaths: versionCurrent,
} as DocBrokenMarkdownLink);
expect(onBrokenMarkdownLink).toHaveBeenNthCalledWith(3, {
filePath: doc5,
link: '../docNotExist3.mdx',
contentPaths: versionCurrent,
} as DocBrokenMarkdownLink);
expect(onBrokenMarkdownLink).toHaveBeenNthCalledWith(4, {
filePath: doc5,
link: './subdir/docNotExist4.md',
contentPaths: versionCurrent,
} as DocBrokenMarkdownLink);
});
it('transforms absolute links in versioned docs', async () => {
const doc2 = path.join(version100.contentPath, 'doc2.md');
const [content, transformedContent] = await transform(doc2);
expect(transformedContent).toMatchSnapshot();
expect(transformedContent).toContain('](/docs/1.0.0/subdir/doc1');
expect(transformedContent).toContain('](/docs/1.0.0/doc2#existing-docs');
expect(transformedContent).not.toContain('](subdir/doc1.md)');
expect(transformedContent).not.toContain('](doc2.md#existing-docs)');
expect(content).not.toEqual(transformedContent);
});
it('transforms relative links in versioned docs', async () => {
const doc1 = path.join(version100.contentPath, 'subdir', 'doc1.md');
const [content, transformedContent] = await transform(doc1);
expect(transformedContent).toMatchSnapshot();
expect(transformedContent).toContain('](/docs/1.0.0/doc2');
expect(transformedContent).not.toContain('](../doc2.md)');
expect(content).not.toEqual(transformedContent);
});
// See comment in linkify.ts
it('throws for file outside version', async () => {
const doc1 = path.join(__dirname, '__fixtures__/outside/doc1.md');
await expect(() =>
transform(doc1),
).rejects.toThrowErrorMatchingInlineSnapshot(
`"Unexpected error: Markdown file at "<PROJECT_ROOT>/packages/docusaurus-plugin-content-docs/src/markdown/__tests__/__fixtures__/outside/doc1.md" does not belong to any docs version!"`,
);
});
});

View file

@ -1,20 +0,0 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import {linkify} from './linkify';
import type {DocsMarkdownOption} from '../types';
import type {LoaderContext} from 'webpack';
export default function markdownLoader(
this: LoaderContext<DocsMarkdownOption>,
source: string,
): void {
const fileString = source;
const callback = this.async();
const options = this.getOptions();
return callback(null, linkify(fileString, this.resourcePath, options));
}

View file

@ -1,47 +0,0 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import {replaceMarkdownLinks, getContentPathList} from '@docusaurus/utils';
import type {DocsMarkdownOption} from '../types';
function getVersion(filePath: string, options: DocsMarkdownOption) {
const versionFound = options.versionsMetadata.find((version) =>
getContentPathList(version).some((docsDirPath) =>
filePath.startsWith(docsDirPath),
),
);
// At this point, this should never happen, because the MDX loaders' paths are
// literally using the version content paths; but if we allow sourcing content
// from outside the docs directory (through the `include` option, for example;
// is there a compelling use-case?), this would actually be testable
if (!versionFound) {
throw new Error(
`Unexpected error: Markdown file at "${filePath}" does not belong to any docs version!`,
);
}
return versionFound;
}
export function linkify(
fileString: string,
filePath: string,
options: DocsMarkdownOption,
): string {
const {siteDir, sourceToPermalink, onBrokenMarkdownLink} = options;
const {newContent, brokenMarkdownLinks} = replaceMarkdownLinks({
siteDir,
fileString,
filePath,
contentPaths: getVersion(filePath, options),
sourceToPermalink,
});
brokenMarkdownLinks.forEach((l) => onBrokenMarkdownLink(l));
return newContent;
}

View file

@ -5,9 +5,8 @@
* LICENSE file in the root directory of this source tree.
*/
import type {BrokenMarkdownLink, Tag} from '@docusaurus/utils';
import type {Tag} from '@docusaurus/utils';
import type {
VersionMetadata,
LoadedVersion,
CategoryGeneratedIndexMetadata,
} from '@docusaurus/plugin-content-docs';
@ -37,12 +36,3 @@ export type FullVersion = LoadedVersion & {
sidebarsUtils: SidebarsUtils;
categoryGeneratedIndices: CategoryGeneratedIndexMetadata[];
};
export type DocBrokenMarkdownLink = BrokenMarkdownLink<VersionMetadata>;
export type DocsMarkdownOption = {
versionsMetadata: VersionMetadata[];
siteDir: string;
sourceToPermalink: SourceToPermalink;
onBrokenMarkdownLink: (brokenMarkdownLink: DocBrokenMarkdownLink) => void;
};

View file

@ -6,7 +6,7 @@
*/
import path from 'path';
import {normalizeUrl, posixPath} from '@docusaurus/utils';
import {getContentPathList, normalizeUrl, posixPath} from '@docusaurus/utils';
import {CURRENT_VERSION_NAME} from '../constants';
import {validateVersionsOptions} from './validation';
import {
@ -268,3 +268,20 @@ export function toFullVersion(version: LoadedVersion): FullVersion {
}),
};
}
export function getVersionFromSourceFilePath(
filePath: string,
versionsMetadata: VersionMetadata[],
): VersionMetadata {
const versionFound = versionsMetadata.find((version) =>
getContentPathList(version).some((docsDirPath) =>
filePath.startsWith(docsDirPath),
),
);
if (!versionFound) {
throw new Error(
`Unexpected error: file at "${filePath}" does not belong to any docs version!`,
);
}
return versionFound;
}

View file

@ -26,6 +26,8 @@ import type {
LoadedContent,
PageFrontMatter,
} from '@docusaurus/plugin-content-pages';
import type {RuleSetUseItem} from 'webpack';
import type {Options as MDXLoaderOptions} from '@docusaurus/mdx-loader/lib/loader';
export default function pluginContentPages(
context: LoadContext,
@ -74,6 +76,42 @@ export default function pluginContentPages(
beforeDefaultRemarkPlugins,
} = options;
const contentDirs = getContentPathList(contentPaths);
function createMDXLoader(): RuleSetUseItem {
const loaderOptions: MDXLoaderOptions = {
admonitions,
remarkPlugins,
rehypePlugins,
beforeDefaultRehypePlugins,
beforeDefaultRemarkPlugins,
staticDirs: siteConfig.staticDirectories.map((dir) =>
path.resolve(siteDir, dir),
),
siteDir,
isMDXPartial: createAbsoluteFilePathMatcher(
options.exclude,
contentDirs,
),
metadataPath: (mdxPath: string) => {
// Note that metadataPath must be the same/in-sync as
// the path from createData for each MDX.
const aliasedSource = aliasedSitePath(mdxPath, siteDir);
return path.join(dataDir, `${docuHash(aliasedSource)}.json`);
},
// Assets allow to convert some relative images paths to
// require(...) calls
createAssets: ({frontMatter}: {frontMatter: PageFrontMatter}) => ({
image: frontMatter.image,
}),
markdownConfig: siteConfig.markdown,
};
return {
loader: require.resolve('@docusaurus/mdx-loader'),
options: loaderOptions,
};
}
return {
module: {
rules: [
@ -82,52 +120,7 @@ export default function pluginContentPages(
include: contentDirs
// Trailing slash is important, see https://github.com/facebook/docusaurus/pull/3970
.map(addTrailingPathSeparator),
use: [
{
loader: require.resolve('@docusaurus/mdx-loader'),
options: {
admonitions,
remarkPlugins,
rehypePlugins,
beforeDefaultRehypePlugins,
beforeDefaultRemarkPlugins,
staticDirs: siteConfig.staticDirectories.map((dir) =>
path.resolve(siteDir, dir),
),
siteDir,
isMDXPartial: createAbsoluteFilePathMatcher(
options.exclude,
contentDirs,
),
metadataPath: (mdxPath: string) => {
// Note that metadataPath must be the same/in-sync as
// the path from createData for each MDX.
const aliasedSource = aliasedSitePath(mdxPath, siteDir);
return path.join(
dataDir,
`${docuHash(aliasedSource)}.json`,
);
},
// Assets allow to convert some relative images paths to
// require(...) calls
createAssets: ({
frontMatter,
}: {
frontMatter: PageFrontMatter;
}) => ({
image: frontMatter.image,
}),
markdownConfig: siteConfig.markdown,
},
},
{
loader: path.resolve(__dirname, './markdownLoader.js'),
options: {
// siteDir,
// contentPath,
},
},
].filter(Boolean),
use: [createMDXLoader()],
},
],
},

View file

@ -1,22 +0,0 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import type {LoaderContext} from 'webpack';
export default function markdownLoader(
this: LoaderContext<undefined>,
fileString: string,
): void {
const callback = this.async();
// const options = this.getOptions();
// TODO provide additional md processing here? like interlinking pages?
// fileString = linkify(fileString)
return callback(null, fileString);
}

View file

@ -1,250 +0,0 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`replaceMarkdownLinks does basic replace 1`] = `
{
"brokenMarkdownLinks": [
{
"contentPaths": {
"contentPath": "docs",
"contentPathLocalized": "i18n/docs-localized",
},
"filePath": "docs/intro.md",
"link": "hmmm.md",
},
],
"newContent": "
[foo](/doc/foo)
[baz](/doc/baz)
[foo](/doc/foo)
[http](http://github.com/facebook/docusaurus/README.md)
[https](https://github.com/facebook/docusaurus/README.md)
[asset](./foo.js)
[asset as well](@site/docs/_partial.md)
[looks like http...](/doc/http)
[nonexistent](hmmm.md)
",
}
`;
exports[`replaceMarkdownLinks handles link titles 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
[URL](/docs/file "title")
[URL](/docs/file 'title')
[URL](/docs/file (title))
",
}
`;
exports[`replaceMarkdownLinks handles stray spaces 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
[URL]( /docs/file )
[ref]: /docs/file
",
}
`;
exports[`replaceMarkdownLinks handles unpaired fences 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
\`\`\`foo
hello
\`\`\`foo
hello
\`\`\`
A [link](/docs/file)
",
}
`;
exports[`replaceMarkdownLinks ignores links in HTML comments 1`] = `
{
"brokenMarkdownLinks": [
{
"contentPaths": {
"contentPath": "docs",
"contentPathLocalized": "i18n/docs-localized",
},
"filePath": "docs/intro.md",
"link": "./foo.md",
},
{
"contentPaths": {
"contentPath": "docs",
"contentPathLocalized": "i18n/docs-localized",
},
"filePath": "docs/intro.md",
"link": "./foo.md",
},
],
"newContent": "
<!-- [foo](./foo.md) -->
<!--
[foo](./foo.md)
-->
",
}
`;
exports[`replaceMarkdownLinks ignores links in fenced blocks 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
\`\`\`
[foo](foo.md)
\`\`\`
\`\`\`\`js
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`\`
\`\`\`\`js
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`\`
~~~js
[foo](foo.md)
~~~
~~~js
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`
[foo](foo.md)
~~~
",
}
`;
exports[`replaceMarkdownLinks ignores links in inline code 1`] = `
{
"brokenMarkdownLinks": [
{
"contentPaths": {
"contentPath": "docs",
"contentPathLocalized": "i18n/docs-localized",
},
"filePath": "docs/intro.md",
"link": "foo.md",
},
],
"newContent": "
\`[foo](foo.md)\`
",
}
`;
exports[`replaceMarkdownLinks preserves query/hash 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
[URL](/docs/file?foo=bar#baz)
[URL](/docs/file#a)
[URL](/docs/file?c)
",
}
`;
exports[`replaceMarkdownLinks replaces Markdown links with spaces 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
[doc a](/docs/doc%20a)
[doc a](</docs/doc%20a>)
[doc b](/docs/my%20docs/doc%20b)
[doc b](</docs/my%20docs/doc%20b>)
[doc]: </docs/my%20docs/doc%20b>
",
}
`;
exports[`replaceMarkdownLinks replaces links with same title as URL 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
[foo.md](/docs/foo)
[./foo.md](</docs/foo>)
[./foo.md](/docs/foo)
[foo.md](/docs/foo)
[./foo.md](/docs/foo)
",
}
`;
exports[`replaceMarkdownLinks replaces multiple links on same line 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
[a](/docs/a), [a](/docs/a), [b](/docs/b), [c](/docs/c)
",
}
`;
exports[`replaceMarkdownLinks replaces reference style Markdown links 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
The following operations are defined for [URI]s:
* [info]: Returns metadata about the resource,
* [list]: Returns metadata about the resource's children (like getting the content of a local directory).
[URI]: /docs/api/classes/uri
[info]: /docs/api/classes/uri#info
[list]: /docs/api/classes/uri#list
",
}
`;
exports[`replaceMarkdownLinks replaces two links on the same line 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "[TypeScript](/programming-languages/typescript/) and [Go](/programming-languages/go/)",
}
`;
exports[`replaceMarkdownLinks resolves absolute and relative links differently 1`] = `
{
"brokenMarkdownLinks": [
{
"contentPaths": {
"contentPath": "docs",
"contentPathLocalized": "i18n/docs-localized",
},
"filePath": "docs/intro/intro.md",
"link": "./api/classes/divine_uri.URI.md",
},
{
"contentPaths": {
"contentPath": "docs",
"contentPathLocalized": "i18n/docs-localized",
},
"filePath": "docs/intro/intro.md",
"link": "/another.md",
},
],
"newContent": "
[Relative link](/docs/another)
[Relative link 2](/docs/api/classes/uri)
[Relative link that should be absolute](./api/classes/divine_uri.URI.md)
[Absolute link](/docs/api/classes/uri)
[Absolute link from site dir](/docs/api/classes/uri)
[Absolute link that should be relative](/another.md)
[Relative link that acts as absolute](/docs/api/classes/uri)
[Relative link that acts as relative](/docs/another)
",
}
`;

View file

@ -5,401 +5,70 @@
* LICENSE file in the root directory of this source tree.
*/
import {replaceMarkdownLinks} from '../markdownLinks';
import {resolveMarkdownLinkPathname} from '../markdownLinks';
describe('resolveMarkdownLinkPathname', () => {
type Context = Parameters<typeof resolveMarkdownLinkPathname>[1];
describe('replaceMarkdownLinks', () => {
it('does basic replace', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
'@site/docs/foo.md': '/doc/foo',
'@site/docs/bar/baz.md': '/doc/baz',
'@site/docs/http.foo.md': '/doc/http',
},
fileString: `
[foo](./foo.md)
[baz](./bar/baz.md)
[foo](foo.md)
[http](http://github.com/facebook/docusaurus/README.md)
[https](https://github.com/facebook/docusaurus/README.md)
[asset](./foo.js)
[asset as well](@site/docs/_partial.md)
[looks like http...](http.foo.md)
[nonexistent](hmmm.md)
`,
}),
).toMatchSnapshot();
});
const context: Context = {
siteDir: '.',
sourceFilePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
'@site/docs/foo.md': '/doc/foo',
'@site/docs/bar/baz.md': '/doc/baz',
'@site/docs/http.foo.md': '/doc/http',
},
};
it('replaces two links on the same line', () => {
// cSpell:ignore Goooooooooo
// This is a very arcane bug: if we continue matching using the previous
// matching index (as is the behavior of RegExp#exec), it will go right over
// the next Markdown link and fail to match the "Go" link. This only happens
// when: (1) the replaced link is much shorter than the Markdown path, (2)
// the next link is very close to the current one (e.g. here if it's not
// "Go" but "Goooooooooo", or if every link has the /docs/ prefix, the bug
// will not trigger because it won't overshoot)
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/',
'@site/docs/programming-languages/typescript/typescript.md':
'/programming-languages/typescript/',
'@site/docs/programming-languages/go/go.md':
'/programming-languages/go/',
},
fileString: `[TypeScript](programming-languages/typescript/typescript.md) and [Go](programming-languages/go/go.md)`,
}),
).toMatchSnapshot();
});
function test(linkPathname: string, expectedOutput: string) {
const output = resolveMarkdownLinkPathname(linkPathname, context);
expect(output).toEqual(expectedOutput);
}
it('replaces reference style Markdown links', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro/intro.md': '/docs/intro',
'@site/docs/api/classes/divine_uri.URI.md': '/docs/api/classes/uri',
},
fileString: `
The following operations are defined for [URI]s:
* [info]: Returns metadata about the resource,
* [list]: Returns metadata about the resource's children (like getting the content of a local directory).
[URI]: ../api/classes/divine_uri.URI.md
[info]: ../api/classes/divine_uri.URI.md#info
[list]: ../api/classes/divine_uri.URI.md#list
`,
}),
).toMatchSnapshot();
test('./foo.md', '/doc/foo');
test('foo.md', '/doc/foo');
test('./bar/baz.md', '/doc/baz');
test('http.foo.md', '/doc/http');
test('@site/docs/_partial.md', null);
test('foo.js', null);
test('nonexistent.md', null);
test('https://github.com/facebook/docusaurus/README.md', null);
});
it('resolves absolute and relative links differently', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
const context: Context = {
siteDir: '.',
sourceFilePath: 'docs/intro/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro/intro.md': '/docs/intro',
'@site/docs/intro/another.md': '/docs/another',
'@site/docs/api/classes/divine_uri.URI.md': '/docs/api/classes/uri',
},
sourceToPermalink: {
'@site/docs/intro/intro.md': '/docs/intro',
'@site/docs/intro/another.md': '/docs/another',
'@site/docs/api/classes/divine_uri.URI.md': '/docs/api/classes/uri',
},
};
fileString: `
[Relative link](./another.md)
[Relative link 2](../api/classes/divine_uri.URI.md)
[Relative link that should be absolute](./api/classes/divine_uri.URI.md)
[Absolute link](/api/classes/divine_uri.URI.md)
[Absolute link from site dir](/docs/api/classes/divine_uri.URI.md)
[Absolute link that should be relative](/another.md)
[Relative link that acts as absolute](api/classes/divine_uri.URI.md)
[Relative link that acts as relative](another.md)
`,
}),
).toMatchSnapshot();
});
function test(linkPathname: string, expectedOutput: string) {
const output = resolveMarkdownLinkPathname(linkPathname, context);
expect(output).toEqual(expectedOutput);
}
// TODO bad
it('ignores links in HTML comments', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
},
fileString: `
<!-- [foo](./foo.md) -->
<!--
[foo](./foo.md)
-->
`,
}),
).toMatchSnapshot();
});
it('ignores links in fenced blocks', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
},
fileString: `
\`\`\`
[foo](foo.md)
\`\`\`
\`\`\`\`js
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`\`
\`\`\`\`js
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`\`
~~~js
[foo](foo.md)
~~~
~~~js
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`
[foo](foo.md)
~~~
`,
}),
).toMatchSnapshot();
});
// FIXME
it('ignores links in inline code', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
},
fileString: `
\`[foo](foo.md)\`
`,
}),
).toMatchSnapshot();
});
it('replaces links with same title as URL', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
'@site/docs/foo.md': '/docs/foo',
},
fileString: `
[foo.md](foo.md)
[./foo.md](<./foo.md>)
[./foo.md](./foo.md)
[foo.md](./foo.md)
[./foo.md](foo.md)
`,
}),
).toMatchSnapshot();
});
it('replaces multiple links on same line', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
'@site/docs/a.md': '/docs/a',
'@site/docs/b.md': '/docs/b',
'@site/docs/c.md': '/docs/c',
},
fileString: `
[a](a.md), [a](a.md), [b](b.md), [c](c.md)
`,
}),
).toMatchSnapshot();
});
it('replaces Markdown links with spaces', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/doc a.md': '/docs/doc%20a',
'@site/docs/my docs/doc b.md': '/docs/my%20docs/doc%20b',
},
fileString: `
[doc a](./doc%20a.md)
[doc a](<./doc a.md>)
[doc b](./my%20docs/doc%20b.md)
[doc b](<./my docs/doc b.md>)
[doc]: <./my docs/doc b.md>
`,
}),
).toMatchSnapshot();
});
it('does not replace non-Markdown links', () => {
const input = `
[asset](./file.md_asset/1.png)
[URL](<https://example.com/file_(1).md>)
[not a link]((foo)
[not a link](foo bar)
[not a link]: foo bar
[not a link]: (foo
[not a link]: bar)
`;
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/file.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/file.md': '/docs/file',
},
fileString: input,
}),
).toEqual({
newContent: input,
brokenMarkdownLinks: [],
});
});
it('handles stray spaces', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/file.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/file.md': '/docs/file',
},
fileString: `
[URL]( ./file.md )
[ref]: ./file.md
`,
}),
).toMatchSnapshot();
});
it('handles link titles', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/file.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/file.md': '/docs/file',
},
fileString: `
[URL](./file.md "title")
[URL](./file.md 'title')
[URL](./file.md (title))
`,
}),
).toMatchSnapshot();
});
it('preserves query/hash', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/file.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/file.md': '/docs/file',
},
fileString: `
[URL](./file.md?foo=bar#baz)
[URL](./file.md#a)
[URL](./file.md?c)
`,
}),
).toMatchSnapshot();
});
it('handles unpaired fences', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/file.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/file.md': '/docs/file',
},
fileString: `
\`\`\`foo
hello
\`\`\`foo
hello
\`\`\`
A [link](./file.md)
`,
}),
).toMatchSnapshot();
test('./another.md', '/docs/another');
test('../api/classes/divine_uri.URI.md', '/docs/api/classes/uri');
test('./api/classes/divine_uri.URI.md', null);
test('/api/classes/divine_uri.URI.md', '/docs/api/classes/uri');
test('/docs/api/classes/divine_uri.URI.md', '/docs/api/classes/uri');
test('/another.md', null);
test('api/classes/divine_uri.URI.md', '/docs/api/classes/uri');
test('another.md', '/docs/another');
});
});

View file

@ -17,6 +17,9 @@ import {
hasSSHProtocol,
parseURLPath,
serializeURLPath,
parseURLOrPath,
toURLPath,
parseLocalURLPath,
} from '../urlUtils';
describe('normalizeUrl', () => {
@ -228,6 +231,166 @@ describe('isValidPathname', () => {
});
});
describe('toURLPath', () => {
it('url', () => {
const url = new URL('https://example.com/pathname?qs#hash');
expect(toURLPath(url)).toEqual({
pathname: '/pathname',
search: 'qs',
hash: 'hash',
});
});
it('pathname + qs', () => {
const url = parseURLOrPath('/pathname?qs');
expect(toURLPath(url)).toEqual({
pathname: '/pathname',
search: 'qs',
hash: undefined,
});
});
it('pathname + hash', () => {
const url = parseURLOrPath('/pathname#hash');
expect(toURLPath(url)).toEqual({
pathname: '/pathname',
search: undefined,
hash: 'hash',
});
});
it('pathname + qs + hash', () => {
const url = parseURLOrPath('/pathname?qs#hash');
expect(toURLPath(url)).toEqual({
pathname: '/pathname',
search: 'qs',
hash: 'hash',
});
});
it('pathname + empty qs + empty hash', () => {
const url = parseURLOrPath('/pathname?#');
expect(toURLPath(url)).toEqual({
pathname: '/pathname',
search: '',
hash: '',
});
});
});
describe('parseLocalURLPath', () => {
it('returns null for non-local URLs', () => {
expect(parseLocalURLPath('https://example')).toBeNull();
expect(parseLocalURLPath('https://example:80')).toBeNull();
expect(parseLocalURLPath('https://example.com/xyz')).toBeNull();
expect(parseLocalURLPath('https://example.com/xyz?qs#hash')).toBeNull();
expect(parseLocalURLPath('https://example.com:80/xyz?qs#hash')).toBeNull();
expect(parseLocalURLPath('https://u:p@example:80/xyz?qs#hash')).toBeNull();
});
it('parses pathname', () => {
expect(parseLocalURLPath('/pathname')).toEqual({
pathname: '/pathname',
search: undefined,
hash: undefined,
});
expect(parseLocalURLPath('pathname.md')).toEqual({
pathname: 'pathname.md',
search: undefined,
hash: undefined,
});
expect(parseLocalURLPath('./pathname')).toEqual({
pathname: './pathname',
search: undefined,
hash: undefined,
});
expect(parseLocalURLPath('../../pathname.mdx')).toEqual({
pathname: '../../pathname.mdx',
search: undefined,
hash: undefined,
});
});
it('parses qs', () => {
expect(parseLocalURLPath('?')).toEqual({
pathname: '',
search: '',
hash: undefined,
});
expect(parseLocalURLPath('?qs')).toEqual({
pathname: '',
search: 'qs',
hash: undefined,
});
expect(parseLocalURLPath('?age=42')).toEqual({
pathname: '',
search: 'age=42',
hash: undefined,
});
});
it('parses hash', () => {
expect(parseLocalURLPath('#')).toEqual({
pathname: '',
search: undefined,
hash: '',
});
expect(parseLocalURLPath('#hash')).toEqual({
pathname: '',
search: undefined,
hash: 'hash',
});
});
it('parses complex local paths', () => {
expect(
parseLocalURLPath('../../great/path name/doc.mdx?age=42#hash'),
).toEqual({
pathname: '../../great/path name/doc.mdx',
search: 'age=42',
hash: 'hash',
});
expect(parseLocalURLPath('my great path?=42#hash?qsInHash')).toEqual({
pathname: 'my great path',
search: '=42',
hash: 'hash?qsInHash',
});
expect(parseLocalURLPath('?qs1#hash1?qs2#hash2')).toEqual({
pathname: '',
search: 'qs1',
hash: 'hash1?qs2#hash2',
});
expect(parseLocalURLPath('../swizzling.mdx#wrapping')).toEqual({
pathname: '../swizzling.mdx',
search: undefined,
hash: 'wrapping',
});
});
it('parses is isomorphic with serialize', () => {
const testLocalPath = (url: string) => {
expect(serializeURLPath(parseLocalURLPath(url)!)).toBe(url);
};
[
'',
'doc',
'doc.mdx',
'./doc.mdx',
'.././doc.mdx',
'/some pathname/.././doc.mdx',
'?',
'?qs',
'#',
'#hash',
'?qs#hash',
'?qs#hash',
'doc.mdx?qs#hash',
'/some pathname/.././doc.mdx?qs#hash',
'/some pathname/.././doc.mdx?qs#hash?qs2#hash2',
].forEach(testLocalPath);
});
});
describe('parseURLPath', () => {
it('parse and resolve pathname', () => {
expect(parseURLPath('')).toEqual({

View file

@ -44,6 +44,9 @@ export {
isValidPathname,
resolvePathname,
parseURLPath,
parseLocalURLPath,
parseURLOrPath,
toURLPath,
serializeURLPath,
hasSSHProtocol,
buildHttpsUrl,
@ -71,11 +74,7 @@ export {
writeMarkdownHeadingId,
type WriteHeadingIDOptions,
} from './markdownUtils';
export {
type ContentPaths,
type BrokenMarkdownLink,
replaceMarkdownLinks,
} from './markdownLinks';
export {type ContentPaths, resolveMarkdownLinkPathname} from './markdownLinks';
export {type SluggerOptions, type Slugger, createSlugger} from './slugger';
export {
isNameTooLong,

View file

@ -40,159 +40,35 @@ export type BrokenMarkdownLink<T extends ContentPaths> = {
link: string;
};
type CodeFence = {
type: '`' | '~';
definitelyOpen: boolean;
count: number;
};
function parseCodeFence(line: string): CodeFence | null {
const match = line.trim().match(/^(?<fence>`{3,}|~{3,})(?<rest>.*)/);
if (!match) {
return null;
// Note this is historical logic extracted during a 2024 refactor
// The algo has been kept exactly as before for retro compatibility
// See also https://github.com/facebook/docusaurus/pull/10168
export function resolveMarkdownLinkPathname(
linkPathname: string,
context: {
sourceFilePath: string;
sourceToPermalink: {[aliasedFilePath: string]: string};
contentPaths: ContentPaths;
siteDir: string;
},
): string | null {
const {sourceFilePath, sourceToPermalink, contentPaths, siteDir} = context;
const sourceDirsToTry: string[] = [];
// ./file.md and ../file.md are always relative to the current file
if (!linkPathname.startsWith('./') && !linkPathname.startsWith('../')) {
sourceDirsToTry.push(...getContentPathList(contentPaths), siteDir);
}
return {
type: match.groups!.fence![0]! as '`' | '~',
definitelyOpen: !!match.groups!.rest!,
count: match.groups!.fence!.length,
};
}
/**
* Takes a Markdown file and replaces relative file references with their URL
* counterparts, e.g. `[link](./intro.md)` => `[link](/docs/intro)`, preserving
* everything else.
*
* This method uses best effort to find a matching file. The file reference can
* be relative to the directory of the current file (most likely) or any of the
* content paths (so `/tutorials/intro.md` can be resolved as
* `<siteDir>/docs/tutorials/intro.md`). Links that contain the `http(s):` or
* `@site/` prefix will always be ignored.
*/
export function replaceMarkdownLinks<T extends ContentPaths>({
siteDir,
fileString,
filePath,
contentPaths,
sourceToPermalink,
}: {
/** Absolute path to the site directory, used to resolve aliased paths. */
siteDir: string;
/** The Markdown file content to be processed. */
fileString: string;
/** Absolute path to the current file containing `fileString`. */
filePath: string;
/** The content paths which the file reference may live in. */
contentPaths: T;
/**
* A map from source paths to their URLs. Source paths are `@site` aliased.
*/
sourceToPermalink: {[aliasedPath: string]: string};
}): {
/**
* The content with all Markdown file references replaced with their URLs.
* Unresolved links are left as-is.
*/
newContent: string;
/** The list of broken links, */
brokenMarkdownLinks: BrokenMarkdownLink<T>[];
} {
const brokenMarkdownLinks: BrokenMarkdownLink<T>[] = [];
// Replace internal markdown linking (except in fenced blocks).
let lastOpenCodeFence: CodeFence | null = null;
const lines = fileString.split('\n').map((line) => {
const codeFence = parseCodeFence(line);
if (codeFence) {
if (!lastOpenCodeFence) {
lastOpenCodeFence = codeFence;
} else if (
!codeFence.definitelyOpen &&
lastOpenCodeFence.type === codeFence.type &&
lastOpenCodeFence.count <= codeFence.count
) {
// All three conditions must be met in order for this to be considered
// a closing fence.
lastOpenCodeFence = null;
}
}
if (lastOpenCodeFence) {
return line;
}
let modifiedLine = line;
// Replace inline-style links or reference-style links e.g:
// This is [Document 1](doc1.md)
// [doc1]: doc1.md
const linkTitlePattern = '(?:\\s+(?:\'.*?\'|".*?"|\\(.*?\\)))?';
const linkSuffixPattern = '(?:\\?[^#>\\s]+)?(?:#[^>\\s]+)?';
const linkCapture = (forbidden: string) =>
`((?!https?://|@site/)[^${forbidden}#?]+)`;
const linkURLPattern = `(?:(?!<)${linkCapture(
'()\\s',
)}${linkSuffixPattern}|<${linkCapture('>')}${linkSuffixPattern}>)`;
const linkPattern = new RegExp(
`\\[(?:(?!\\]\\().)*\\]\\(\\s*${linkURLPattern}${linkTitlePattern}\\s*\\)|^\\s*\\[[^[\\]]*[^[\\]\\s][^[\\]]*\\]:\\s*${linkURLPattern}${linkTitlePattern}$`,
'dgm',
);
let mdMatch = linkPattern.exec(modifiedLine);
while (mdMatch !== null) {
// Replace it to correct html link.
const mdLink = mdMatch.slice(1, 5).find(Boolean)!;
const mdLinkRange = mdMatch.indices!.slice(1, 5).find(Boolean)!;
if (!/\.mdx?$/.test(mdLink)) {
mdMatch = linkPattern.exec(modifiedLine);
continue;
}
const sourcesToTry: string[] = [];
// ./file.md and ../file.md are always relative to the current file
if (!mdLink.startsWith('./') && !mdLink.startsWith('../')) {
sourcesToTry.push(...getContentPathList(contentPaths), siteDir);
}
// /file.md is always relative to the content path
if (!mdLink.startsWith('/')) {
sourcesToTry.push(path.dirname(filePath));
}
const aliasedSourceMatch = sourcesToTry
.map((p) => path.join(p, decodeURIComponent(mdLink)))
.map((source) => aliasedSitePath(source, siteDir))
.find((source) => sourceToPermalink[source]);
const permalink: string | undefined = aliasedSourceMatch
? sourceToPermalink[aliasedSourceMatch]
: undefined;
if (permalink) {
// MDX won't be happy if the permalink contains a space, we need to
// convert it to %20
const encodedPermalink = permalink
.split('/')
.map((part) => part.replace(/\s/g, '%20'))
.join('/');
modifiedLine = `${modifiedLine.slice(
0,
mdLinkRange[0],
)}${encodedPermalink}${modifiedLine.slice(mdLinkRange[1])}`;
// Adjust the lastIndex to avoid passing over the next link if the
// newly replaced URL is shorter.
linkPattern.lastIndex += encodedPermalink.length - mdLink.length;
} else {
const brokenMarkdownLink: BrokenMarkdownLink<T> = {
contentPaths,
filePath,
link: mdLink,
};
brokenMarkdownLinks.push(brokenMarkdownLink);
}
mdMatch = linkPattern.exec(modifiedLine);
}
return modifiedLine;
});
const newContent = lines.join('\n');
return {newContent, brokenMarkdownLinks};
// /file.md is never relative to the source file path
if (!linkPathname.startsWith('/')) {
sourceDirsToTry.push(path.dirname(sourceFilePath));
}
const aliasedSourceMatch = sourceDirsToTry
.map((sourceDir) => path.join(sourceDir, decodeURIComponent(linkPathname)))
.map((source) => aliasedSitePath(source, siteDir))
.find((source) => sourceToPermalink[source]);
return aliasedSourceMatch
? sourceToPermalink[aliasedSourceMatch] ?? null
: null;
}

View file

@ -164,27 +164,22 @@ export function isValidPathname(str: string): boolean {
}
}
export function parseURLOrPath(url: string, base?: string | URL): URL {
try {
// TODO when Node supports it, use URL.parse could be faster?
// see https://kilianvalkhof.com/2024/javascript/the-problem-with-new-url-and-how-url-parse-fixes-that/
return new URL(url, base ?? 'https://example.com');
} catch (e) {
throw new Error(
`Can't parse URL ${url}${base ? ` with base ${base}` : ''}`,
{cause: e},
);
}
}
export type URLPath = {pathname: string; search?: string; hash?: string};
// Let's name the concept of (pathname + search + hash) as URLPath
// See also https://twitter.com/kettanaito/status/1741768992866308120
// Note: this function also resolves relative pathnames while parsing!
export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
function parseURL(url: string, base?: string | URL): URL {
try {
// A possible alternative? https://github.com/unjs/ufo#url
return new URL(url, base ?? 'https://example.com');
} catch (e) {
throw new Error(
`Can't parse URL ${url}${base ? ` with base ${base}` : ''}`,
{cause: e},
);
}
}
const base = fromPath ? parseURL(fromPath) : undefined;
const url = parseURL(urlPath, base);
export function toURLPath(url: URL): URLPath {
const {pathname} = url;
// Fixes annoying url.search behavior
@ -193,17 +188,17 @@ export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
// "?param => "param"
const search = url.search
? url.search.slice(1)
: urlPath.includes('?')
: url.href.includes('?')
? ''
: undefined;
// Fixes annoying url.hash behavior
// "" => undefined
// "#" => ""
// "?param => "param"
// "#param => "param"
const hash = url.hash
? url.hash.slice(1)
: urlPath.includes('#')
: url.href.includes('#')
? ''
: undefined;
@ -214,6 +209,65 @@ export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
};
}
/**
* Let's name the concept of (pathname + search + hash) as URLPath
* See also https://twitter.com/kettanaito/status/1741768992866308120
* Note: this function also resolves relative pathnames while parsing!
*/
export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
const base = fromPath ? parseURLOrPath(fromPath) : undefined;
const url = parseURLOrPath(urlPath, base);
return toURLPath(url);
}
/**
* This returns results for strings like "foo", "../foo", "./foo.mdx?qs#hash"
* Unlike "parseURLPath()" above, this will not resolve the pathnames
* Te returned pathname of "../../foo.mdx" will be "../../foo.mdx", not "/foo"
* This returns null if the url is not "local" (contains domain/protocol etc)
*/
export function parseLocalURLPath(urlPath: string): URLPath | null {
// Workaround because URL("") requires a protocol
const unspecifiedProtocol = 'unspecified:';
const url = parseURLOrPath(urlPath, `${unspecifiedProtocol}//`);
// Ignore links with specified protocol / host
// (usually fully qualified links starting with https://)
if (
url.protocol !== unspecifiedProtocol ||
url.host !== '' ||
url.username !== '' ||
url.password !== ''
) {
return null;
}
// We can't use "new URL()" result because it always tries to resolve urls
// IE it will remove any "./" or "../" in the pathname, which we don't want
// We have to parse it manually...
let localUrlPath = urlPath;
// Extract and remove the #hash part
const hashIndex = localUrlPath.indexOf('#');
const hash =
hashIndex !== -1 ? localUrlPath.substring(hashIndex + 1) : undefined;
localUrlPath =
hashIndex !== -1 ? localUrlPath.substring(0, hashIndex) : localUrlPath;
// Extract and remove ?search part
const searchIndex = localUrlPath.indexOf('?');
const search =
searchIndex !== -1 ? localUrlPath.substring(searchIndex + 1) : undefined;
localUrlPath =
searchIndex !== -1 ? localUrlPath.substring(0, searchIndex) : localUrlPath;
return {
pathname: localUrlPath,
search,
hash,
};
}
export function serializeURLPath(urlPath: URLPath): string {
const search = urlPath.search === undefined ? '' : `?${urlPath.search}`;
const hash = urlPath.hash === undefined ? '' : `#${urlPath.hash}`;

View file

@ -0,0 +1,9 @@
---
slug: target-doc-slug
---
# Target doc
This is just a doc meant to be linked to by other docs.
## Target heading {#target-heading}

View file

@ -0,0 +1,86 @@
# Test links
These are dogfood tests showing that Markdown links with md/mdx file references are resolved correctly.
Also proves that [#9048](https://github.com/facebook/docusaurus/issues/9048) linking bugs are solved.
---
## Resolvable links
[target.mdx](target.mdx)
[./target.mdx](./target.mdx)
[../links/target.mdx](../links/target.mdx)
[./target.mdx?age=42#target-heading](./target.mdx?age=42#target-heading)
[\<./target.mdx?qs=value with space>](<./target.mdx?qs=value with space>)
[target.mdx 'link title'](target.mdx 'link title')
## Complex resolvable links
Some of those are edge cases reported in [#9048](https://github.com/facebook/docusaurus/issues/9048)
{/* prettier-ignore */}```inline triple backticks code block, see https://github.com/facebook/docusaurus/issues/9048#issuecomment-1959199829```
<p>
<b>[./target.mdx](./target.mdx) bolded</b>
</p>
[**./target.mdx** with _italic_ and <span style={{'color': 'red'}}>`JSX`</span>](./target.mdx)
[`Type1`](target.mdx#target-heading)\<[`Type2`](target.mdx#target-heading)\>
{/* prettier-ignore */}[./target.mdx link
declared
on
multiple
lines
](./target.mdx)
[![Image with ./target.mdx link](/img/slash-introducing.svg)](./target.mdx)
## Unresolvable links
[https://github.com/facebook/docusaurus/blob/main/README.md](https://github.com/facebook/docusaurus/blob/main/README.md)
[ftp:///README.mdx](ftp:///README.mdx)
```markdown
[target.mdx](target.mdx)
```
## Links in comments
MDX/HTML comments with invalid file references should not be resolved nor reported by the broken link checker:
```mdx
{/* [doesNotExist.mdx](doesNotExist.mdx) */}
<!-- [doesNotExist.mdx](doesNotExist.mdx) -->
```
{/* [doesNotExist.mdx](doesNotExist.mdx) */}
<!-- [doesNotExist.mdx](doesNotExist.mdx) -->
## Reference-style links
The following should also work:
```md
Testing some link refs: [link-ref1], [link-ref2], [link-ref3]
[link-ref1]: target.mdx
[link-ref2]: ./target.mdx
[link-ref3]: ../links/target.mdx?qs#target-heading
```
Testing some link refs: [link-ref1], [link-ref2], [link-ref3]
[link-ref1]: target.mdx
[link-ref2]: ./target.mdx
[link-ref3]: ../links/target.mdx?qs#target-heading