fix(mdx-loader): resolve Markdown/MDX links with Remark instead of RegExp (#10168)

This commit is contained in:
Sébastien Lorber 2024-05-24 19:03:23 +02:00 committed by GitHub
parent aab332c2ae
commit e34614963e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 902 additions and 1620 deletions

View file

@ -1,28 +1,5 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`linkify reports broken markdown links 1`] = `
"---
title: This post links to another one!
---
[Good link 1](/blog/2018/12/14/Happy-First-Birthday-Slash)
[Good link 2](/blog/2018/12/14/Happy-First-Birthday-Slash)
[Bad link 1](postNotExist1.md)
[Bad link 1](./postNotExist2.mdx)
"
`;
exports[`linkify transforms to correct link 1`] = `
"---
title: This post links to another one!
---
[Linked post](/blog/2018/12/14/Happy-First-Birthday-Slash)"
`;
exports[`paginateBlogPosts generates a single page 1`] = `
[
{

View file

@ -5,20 +5,13 @@
* LICENSE file in the root directory of this source tree.
*/
import {jest} from '@jest/globals';
import fs from 'fs-extra';
import path from 'path';
import {fromPartial} from '@total-typescript/shoehorn';
import {
truncate,
parseBlogFileName,
linkify,
getSourceToPermalink,
paginateBlogPosts,
applyProcessBlogPosts,
type LinkifyParams,
} from '../blogUtils';
import type {BlogBrokenMarkdownLink, BlogContentPaths} from '../types';
import type {BlogPost} from '@docusaurus/plugin-content-blog';
describe('truncate', () => {
@ -209,95 +202,6 @@ describe('parseBlogFileName', () => {
});
});
describe('linkify', () => {
const siteDir = path.join(__dirname, '__fixtures__', 'website');
const contentPaths: BlogContentPaths = {
contentPath: path.join(siteDir, 'blog-with-ref'),
contentPathLocalized: path.join(siteDir, 'blog-with-ref-localized'),
};
const pluginDir = 'blog-with-ref';
const blogPosts: BlogPost[] = [
{
id: 'Happy 1st Birthday Slash!',
metadata: {
permalink: '/blog/2018/12/14/Happy-First-Birthday-Slash',
source: path.posix.join(
'@site',
pluginDir,
'2018-12-14-Happy-First-Birthday-Slash.md',
),
title: 'Happy 1st Birthday Slash!',
description: `pattern name`,
date: new Date('2018-12-14'),
tags: [],
prevItem: {
permalink: '/blog/2019/01/01/date-matter',
title: 'date-matter',
},
hasTruncateMarker: false,
frontMatter: {},
authors: [],
unlisted: false,
},
content: '',
},
];
async function transform(filePath: string, options?: Partial<LinkifyParams>) {
const fileContent = await fs.readFile(filePath, 'utf-8');
const transformedContent = linkify({
filePath,
fileString: fileContent,
siteDir,
contentPaths,
sourceToPermalink: getSourceToPermalink(blogPosts),
onBrokenMarkdownLink: (brokenMarkdownLink) => {
throw new Error(
`Broken markdown link found: ${JSON.stringify(brokenMarkdownLink)}`,
);
},
...options,
});
return [fileContent, transformedContent];
}
it('transforms to correct link', async () => {
const post = path.join(contentPaths.contentPath, 'post.md');
const [content, transformedContent] = await transform(post);
expect(transformedContent).toMatchSnapshot();
expect(transformedContent).toContain(
'](/blog/2018/12/14/Happy-First-Birthday-Slash',
);
expect(transformedContent).not.toContain(
'](2018-12-14-Happy-First-Birthday-Slash.md)',
);
expect(content).not.toEqual(transformedContent);
});
it('reports broken markdown links', async () => {
const filePath = 'post-with-broken-links.md';
const folderPath = contentPaths.contentPath;
const postWithBrokenLinks = path.join(folderPath, filePath);
const onBrokenMarkdownLink = jest.fn();
const [, transformedContent] = await transform(postWithBrokenLinks, {
onBrokenMarkdownLink,
});
expect(transformedContent).toMatchSnapshot();
expect(onBrokenMarkdownLink).toHaveBeenCalledTimes(2);
expect(onBrokenMarkdownLink).toHaveBeenNthCalledWith(1, {
filePath: path.resolve(folderPath, filePath),
contentPaths,
link: 'postNotExist1.md',
} as BlogBrokenMarkdownLink);
expect(onBrokenMarkdownLink).toHaveBeenNthCalledWith(2, {
filePath: path.resolve(folderPath, filePath),
contentPaths,
link: './postNotExist2.mdx',
} as BlogBrokenMarkdownLink);
});
});
describe('processBlogPosts', () => {
const blogPost2022: BlogPost = fromPartial({
metadata: {date: new Date('2022-01-01')},

View file

@ -17,7 +17,6 @@ import {
getEditUrl,
getFolderContainingFile,
posixPath,
replaceMarkdownLinks,
Globby,
normalizeFrontMatterTags,
groupTaggedItems,
@ -38,7 +37,7 @@ import type {
BlogTags,
BlogPaginated,
} from '@docusaurus/plugin-content-blog';
import type {BlogContentPaths, BlogMarkdownLoaderOptions} from './types';
import type {BlogContentPaths} from './types';
export function truncate(fileString: string, truncateMarker: RegExp): string {
return fileString.split(truncateMarker, 1).shift()!;
@ -403,35 +402,6 @@ export async function generateBlogPosts(
return blogPosts;
}
export type LinkifyParams = {
filePath: string;
fileString: string;
} & Pick<
BlogMarkdownLoaderOptions,
'sourceToPermalink' | 'siteDir' | 'contentPaths' | 'onBrokenMarkdownLink'
>;
export function linkify({
filePath,
contentPaths,
fileString,
siteDir,
sourceToPermalink,
onBrokenMarkdownLink,
}: LinkifyParams): string {
const {newContent, brokenMarkdownLinks} = replaceMarkdownLinks({
siteDir,
fileString,
filePath,
contentPaths,
sourceToPermalink,
});
brokenMarkdownLinks.forEach((l) => onBrokenMarkdownLink(l));
return newContent;
}
export async function applyProcessBlogPosts({
blogPosts,
processBlogPosts,

View file

@ -18,6 +18,7 @@ import {
getContentPathList,
getDataFilePath,
DEFAULT_PLUGIN_ID,
resolveMarkdownLinkPathname,
} from '@docusaurus/utils';
import {
getSourceToPermalink,
@ -43,6 +44,8 @@ import type {
BlogContent,
BlogPaginated,
} from '@docusaurus/plugin-content-blog';
import type {Options as MDXLoaderOptions} from '@docusaurus/mdx-loader/lib/loader';
import type {RuleSetUseItem} from 'webpack';
const PluginName = 'docusaurus-plugin-content-blog';
@ -213,22 +216,81 @@ export default async function pluginContentBlog(
beforeDefaultRehypePlugins,
} = options;
const markdownLoaderOptions: BlogMarkdownLoaderOptions = {
siteDir,
contentPaths,
truncateMarker,
sourceToPermalink: getSourceToPermalink(content.blogPosts),
onBrokenMarkdownLink: (brokenMarkdownLink) => {
if (onBrokenMarkdownLinks === 'ignore') {
return;
}
logger.report(
onBrokenMarkdownLinks,
)`Blog markdown link couldn't be resolved: (url=${brokenMarkdownLink.link}) in path=${brokenMarkdownLink.filePath}`;
},
};
const sourceToPermalink = getSourceToPermalink(content.blogPosts);
const contentDirs = getContentPathList(contentPaths);
function createMDXLoader(): RuleSetUseItem {
const loaderOptions: MDXLoaderOptions = {
admonitions,
remarkPlugins,
rehypePlugins,
beforeDefaultRemarkPlugins: [
footnoteIDFixer,
...beforeDefaultRemarkPlugins,
],
beforeDefaultRehypePlugins,
staticDirs: siteConfig.staticDirectories.map((dir) =>
path.resolve(siteDir, dir),
),
siteDir,
isMDXPartial: createAbsoluteFilePathMatcher(
options.exclude,
contentDirs,
),
metadataPath: (mdxPath: string) => {
// Note that metadataPath must be the same/in-sync as
// the path from createData for each MDX.
const aliasedPath = aliasedSitePath(mdxPath, siteDir);
return path.join(dataDir, `${docuHash(aliasedPath)}.json`);
},
// For blog posts a title in markdown is always removed
// Blog posts title are rendered separately
removeContentTitle: true,
// Assets allow to convert some relative images paths to
// require() calls
// @ts-expect-error: TODO fix typing issue
createAssets: ({
frontMatter,
metadata,
}: {
frontMatter: BlogPostFrontMatter;
metadata: BlogPostMetadata;
}): Assets => ({
image: frontMatter.image,
authorsImageUrls: metadata.authors.map((author) => author.imageURL),
}),
markdownConfig: siteConfig.markdown,
resolveMarkdownLink: ({linkPathname, sourceFilePath}) => {
const permalink = resolveMarkdownLinkPathname(linkPathname, {
sourceFilePath,
sourceToPermalink,
siteDir,
contentPaths,
});
if (permalink === null) {
logger.report(
onBrokenMarkdownLinks,
)`Blog markdown link couldn't be resolved: (url=${linkPathname}) in source file path=${sourceFilePath}`;
}
return permalink;
},
};
return {
loader: require.resolve('@docusaurus/mdx-loader'),
options: loaderOptions,
};
}
function createBlogMarkdownLoader(): RuleSetUseItem {
const loaderOptions: BlogMarkdownLoaderOptions = {
truncateMarker,
};
return {
loader: path.resolve(__dirname, './markdownLoader.js'),
options: loaderOptions,
};
}
return {
resolve: {
alias: {
@ -242,61 +304,7 @@ export default async function pluginContentBlog(
include: contentDirs
// Trailing slash is important, see https://github.com/facebook/docusaurus/pull/3970
.map(addTrailingPathSeparator),
use: [
{
loader: require.resolve('@docusaurus/mdx-loader'),
options: {
admonitions,
remarkPlugins,
rehypePlugins,
beforeDefaultRemarkPlugins: [
footnoteIDFixer,
...beforeDefaultRemarkPlugins,
],
beforeDefaultRehypePlugins,
staticDirs: siteConfig.staticDirectories.map((dir) =>
path.resolve(siteDir, dir),
),
siteDir,
isMDXPartial: createAbsoluteFilePathMatcher(
options.exclude,
contentDirs,
),
metadataPath: (mdxPath: string) => {
// Note that metadataPath must be the same/in-sync as
// the path from createData for each MDX.
const aliasedPath = aliasedSitePath(mdxPath, siteDir);
return path.join(
dataDir,
`${docuHash(aliasedPath)}.json`,
);
},
// For blog posts a title in markdown is always removed
// Blog posts title are rendered separately
removeContentTitle: true,
// Assets allow to convert some relative images paths to
// require() calls
createAssets: ({
frontMatter,
metadata,
}: {
frontMatter: BlogPostFrontMatter;
metadata: BlogPostMetadata;
}): Assets => ({
image: frontMatter.image,
authorsImageUrls: metadata.authors.map(
(author) => author.imageURL,
),
}),
markdownConfig: siteConfig.markdown,
},
},
{
loader: path.resolve(__dirname, './markdownLoader.js'),
options: markdownLoaderOptions,
},
].filter(Boolean),
use: [createMDXLoader(), createBlogMarkdownLoader()],
},
],
},

View file

@ -5,7 +5,7 @@
* LICENSE file in the root directory of this source tree.
*/
import {truncate, linkify} from './blogUtils';
import {truncate} from './blogUtils';
import type {BlogMarkdownLoaderOptions} from './types';
import type {LoaderContext} from 'webpack';
@ -13,23 +13,19 @@ export default function markdownLoader(
this: LoaderContext<BlogMarkdownLoaderOptions>,
source: string,
): void {
const filePath = this.resourcePath;
const fileString = source;
const callback = this.async();
const markdownLoaderOptions = this.getOptions();
// Linkify blog posts
let finalContent = linkify({
fileString,
filePath,
...markdownLoaderOptions,
});
let finalContent = fileString;
// Truncate content if requested (e.g: file.md?truncated=true).
const truncated: boolean | undefined = this.resourceQuery
? !!new URLSearchParams(this.resourceQuery.slice(1)).get('truncated')
: undefined;
// TODO truncate with the AST instead of the string ?
if (truncated) {
finalContent = truncate(finalContent, markdownLoaderOptions.truncateMarker);
}

View file

@ -5,15 +5,10 @@
* LICENSE file in the root directory of this source tree.
*/
import type {BrokenMarkdownLink, ContentPaths} from '@docusaurus/utils';
import type {ContentPaths} from '@docusaurus/utils';
export type BlogContentPaths = ContentPaths;
export type BlogBrokenMarkdownLink = BrokenMarkdownLink<BlogContentPaths>;
export type BlogMarkdownLoaderOptions = {
siteDir: string;
contentPaths: BlogContentPaths;
truncateMarker: RegExp;
sourceToPermalink: {[aliasedPath: string]: string};
onBrokenMarkdownLink: (brokenMarkdownLink: BlogBrokenMarkdownLink) => void;
};