fix(mdx-loader): resolve Markdown/MDX links with Remark instead of RegExp (#10168)

This commit is contained in:
Sébastien Lorber 2024-05-24 19:03:23 +02:00 committed by GitHub
parent aab332c2ae
commit e34614963e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 902 additions and 1620 deletions

View file

@ -1,250 +0,0 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`replaceMarkdownLinks does basic replace 1`] = `
{
"brokenMarkdownLinks": [
{
"contentPaths": {
"contentPath": "docs",
"contentPathLocalized": "i18n/docs-localized",
},
"filePath": "docs/intro.md",
"link": "hmmm.md",
},
],
"newContent": "
[foo](/doc/foo)
[baz](/doc/baz)
[foo](/doc/foo)
[http](http://github.com/facebook/docusaurus/README.md)
[https](https://github.com/facebook/docusaurus/README.md)
[asset](./foo.js)
[asset as well](@site/docs/_partial.md)
[looks like http...](/doc/http)
[nonexistent](hmmm.md)
",
}
`;
exports[`replaceMarkdownLinks handles link titles 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
[URL](/docs/file "title")
[URL](/docs/file 'title')
[URL](/docs/file (title))
",
}
`;
exports[`replaceMarkdownLinks handles stray spaces 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
[URL]( /docs/file )
[ref]: /docs/file
",
}
`;
exports[`replaceMarkdownLinks handles unpaired fences 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
\`\`\`foo
hello
\`\`\`foo
hello
\`\`\`
A [link](/docs/file)
",
}
`;
exports[`replaceMarkdownLinks ignores links in HTML comments 1`] = `
{
"brokenMarkdownLinks": [
{
"contentPaths": {
"contentPath": "docs",
"contentPathLocalized": "i18n/docs-localized",
},
"filePath": "docs/intro.md",
"link": "./foo.md",
},
{
"contentPaths": {
"contentPath": "docs",
"contentPathLocalized": "i18n/docs-localized",
},
"filePath": "docs/intro.md",
"link": "./foo.md",
},
],
"newContent": "
<!-- [foo](./foo.md) -->
<!--
[foo](./foo.md)
-->
",
}
`;
exports[`replaceMarkdownLinks ignores links in fenced blocks 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
\`\`\`
[foo](foo.md)
\`\`\`
\`\`\`\`js
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`\`
\`\`\`\`js
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`\`
~~~js
[foo](foo.md)
~~~
~~~js
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`
[foo](foo.md)
~~~
",
}
`;
exports[`replaceMarkdownLinks ignores links in inline code 1`] = `
{
"brokenMarkdownLinks": [
{
"contentPaths": {
"contentPath": "docs",
"contentPathLocalized": "i18n/docs-localized",
},
"filePath": "docs/intro.md",
"link": "foo.md",
},
],
"newContent": "
\`[foo](foo.md)\`
",
}
`;
exports[`replaceMarkdownLinks preserves query/hash 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
[URL](/docs/file?foo=bar#baz)
[URL](/docs/file#a)
[URL](/docs/file?c)
",
}
`;
exports[`replaceMarkdownLinks replaces Markdown links with spaces 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
[doc a](/docs/doc%20a)
[doc a](</docs/doc%20a>)
[doc b](/docs/my%20docs/doc%20b)
[doc b](</docs/my%20docs/doc%20b>)
[doc]: </docs/my%20docs/doc%20b>
",
}
`;
exports[`replaceMarkdownLinks replaces links with same title as URL 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
[foo.md](/docs/foo)
[./foo.md](</docs/foo>)
[./foo.md](/docs/foo)
[foo.md](/docs/foo)
[./foo.md](/docs/foo)
",
}
`;
exports[`replaceMarkdownLinks replaces multiple links on same line 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
[a](/docs/a), [a](/docs/a), [b](/docs/b), [c](/docs/c)
",
}
`;
exports[`replaceMarkdownLinks replaces reference style Markdown links 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "
The following operations are defined for [URI]s:
* [info]: Returns metadata about the resource,
* [list]: Returns metadata about the resource's children (like getting the content of a local directory).
[URI]: /docs/api/classes/uri
[info]: /docs/api/classes/uri#info
[list]: /docs/api/classes/uri#list
",
}
`;
exports[`replaceMarkdownLinks replaces two links on the same line 1`] = `
{
"brokenMarkdownLinks": [],
"newContent": "[TypeScript](/programming-languages/typescript/) and [Go](/programming-languages/go/)",
}
`;
exports[`replaceMarkdownLinks resolves absolute and relative links differently 1`] = `
{
"brokenMarkdownLinks": [
{
"contentPaths": {
"contentPath": "docs",
"contentPathLocalized": "i18n/docs-localized",
},
"filePath": "docs/intro/intro.md",
"link": "./api/classes/divine_uri.URI.md",
},
{
"contentPaths": {
"contentPath": "docs",
"contentPathLocalized": "i18n/docs-localized",
},
"filePath": "docs/intro/intro.md",
"link": "/another.md",
},
],
"newContent": "
[Relative link](/docs/another)
[Relative link 2](/docs/api/classes/uri)
[Relative link that should be absolute](./api/classes/divine_uri.URI.md)
[Absolute link](/docs/api/classes/uri)
[Absolute link from site dir](/docs/api/classes/uri)
[Absolute link that should be relative](/another.md)
[Relative link that acts as absolute](/docs/api/classes/uri)
[Relative link that acts as relative](/docs/another)
",
}
`;

View file

@ -5,401 +5,70 @@
* LICENSE file in the root directory of this source tree.
*/
import {replaceMarkdownLinks} from '../markdownLinks';
import {resolveMarkdownLinkPathname} from '../markdownLinks';
describe('resolveMarkdownLinkPathname', () => {
type Context = Parameters<typeof resolveMarkdownLinkPathname>[1];
describe('replaceMarkdownLinks', () => {
it('does basic replace', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
'@site/docs/foo.md': '/doc/foo',
'@site/docs/bar/baz.md': '/doc/baz',
'@site/docs/http.foo.md': '/doc/http',
},
fileString: `
[foo](./foo.md)
[baz](./bar/baz.md)
[foo](foo.md)
[http](http://github.com/facebook/docusaurus/README.md)
[https](https://github.com/facebook/docusaurus/README.md)
[asset](./foo.js)
[asset as well](@site/docs/_partial.md)
[looks like http...](http.foo.md)
[nonexistent](hmmm.md)
`,
}),
).toMatchSnapshot();
});
const context: Context = {
siteDir: '.',
sourceFilePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
'@site/docs/foo.md': '/doc/foo',
'@site/docs/bar/baz.md': '/doc/baz',
'@site/docs/http.foo.md': '/doc/http',
},
};
it('replaces two links on the same line', () => {
// cSpell:ignore Goooooooooo
// This is a very arcane bug: if we continue matching using the previous
// matching index (as is the behavior of RegExp#exec), it will go right over
// the next Markdown link and fail to match the "Go" link. This only happens
// when: (1) the replaced link is much shorter than the Markdown path, (2)
// the next link is very close to the current one (e.g. here if it's not
// "Go" but "Goooooooooo", or if every link has the /docs/ prefix, the bug
// will not trigger because it won't overshoot)
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/',
'@site/docs/programming-languages/typescript/typescript.md':
'/programming-languages/typescript/',
'@site/docs/programming-languages/go/go.md':
'/programming-languages/go/',
},
fileString: `[TypeScript](programming-languages/typescript/typescript.md) and [Go](programming-languages/go/go.md)`,
}),
).toMatchSnapshot();
});
function test(linkPathname: string, expectedOutput: string) {
const output = resolveMarkdownLinkPathname(linkPathname, context);
expect(output).toEqual(expectedOutput);
}
it('replaces reference style Markdown links', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro/intro.md': '/docs/intro',
'@site/docs/api/classes/divine_uri.URI.md': '/docs/api/classes/uri',
},
fileString: `
The following operations are defined for [URI]s:
* [info]: Returns metadata about the resource,
* [list]: Returns metadata about the resource's children (like getting the content of a local directory).
[URI]: ../api/classes/divine_uri.URI.md
[info]: ../api/classes/divine_uri.URI.md#info
[list]: ../api/classes/divine_uri.URI.md#list
`,
}),
).toMatchSnapshot();
test('./foo.md', '/doc/foo');
test('foo.md', '/doc/foo');
test('./bar/baz.md', '/doc/baz');
test('http.foo.md', '/doc/http');
test('@site/docs/_partial.md', null);
test('foo.js', null);
test('nonexistent.md', null);
test('https://github.com/facebook/docusaurus/README.md', null);
});
it('resolves absolute and relative links differently', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
const context: Context = {
siteDir: '.',
sourceFilePath: 'docs/intro/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro/intro.md': '/docs/intro',
'@site/docs/intro/another.md': '/docs/another',
'@site/docs/api/classes/divine_uri.URI.md': '/docs/api/classes/uri',
},
sourceToPermalink: {
'@site/docs/intro/intro.md': '/docs/intro',
'@site/docs/intro/another.md': '/docs/another',
'@site/docs/api/classes/divine_uri.URI.md': '/docs/api/classes/uri',
},
};
fileString: `
[Relative link](./another.md)
[Relative link 2](../api/classes/divine_uri.URI.md)
[Relative link that should be absolute](./api/classes/divine_uri.URI.md)
[Absolute link](/api/classes/divine_uri.URI.md)
[Absolute link from site dir](/docs/api/classes/divine_uri.URI.md)
[Absolute link that should be relative](/another.md)
[Relative link that acts as absolute](api/classes/divine_uri.URI.md)
[Relative link that acts as relative](another.md)
`,
}),
).toMatchSnapshot();
});
function test(linkPathname: string, expectedOutput: string) {
const output = resolveMarkdownLinkPathname(linkPathname, context);
expect(output).toEqual(expectedOutput);
}
// TODO bad
it('ignores links in HTML comments', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
},
fileString: `
<!-- [foo](./foo.md) -->
<!--
[foo](./foo.md)
-->
`,
}),
).toMatchSnapshot();
});
it('ignores links in fenced blocks', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
},
fileString: `
\`\`\`
[foo](foo.md)
\`\`\`
\`\`\`\`js
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`\`
\`\`\`\`js
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`\`
~~~js
[foo](foo.md)
~~~
~~~js
[foo](foo.md)
\`\`\`
[foo](foo.md)
\`\`\`
[foo](foo.md)
~~~
`,
}),
).toMatchSnapshot();
});
// FIXME
it('ignores links in inline code', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
},
fileString: `
\`[foo](foo.md)\`
`,
}),
).toMatchSnapshot();
});
it('replaces links with same title as URL', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
'@site/docs/foo.md': '/docs/foo',
},
fileString: `
[foo.md](foo.md)
[./foo.md](<./foo.md>)
[./foo.md](./foo.md)
[foo.md](./foo.md)
[./foo.md](foo.md)
`,
}),
).toMatchSnapshot();
});
it('replaces multiple links on same line', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/intro.md': '/docs/intro',
'@site/docs/a.md': '/docs/a',
'@site/docs/b.md': '/docs/b',
'@site/docs/c.md': '/docs/c',
},
fileString: `
[a](a.md), [a](a.md), [b](b.md), [c](c.md)
`,
}),
).toMatchSnapshot();
});
it('replaces Markdown links with spaces', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/intro.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/doc a.md': '/docs/doc%20a',
'@site/docs/my docs/doc b.md': '/docs/my%20docs/doc%20b',
},
fileString: `
[doc a](./doc%20a.md)
[doc a](<./doc a.md>)
[doc b](./my%20docs/doc%20b.md)
[doc b](<./my docs/doc b.md>)
[doc]: <./my docs/doc b.md>
`,
}),
).toMatchSnapshot();
});
it('does not replace non-Markdown links', () => {
const input = `
[asset](./file.md_asset/1.png)
[URL](<https://example.com/file_(1).md>)
[not a link]((foo)
[not a link](foo bar)
[not a link]: foo bar
[not a link]: (foo
[not a link]: bar)
`;
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/file.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/file.md': '/docs/file',
},
fileString: input,
}),
).toEqual({
newContent: input,
brokenMarkdownLinks: [],
});
});
it('handles stray spaces', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/file.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/file.md': '/docs/file',
},
fileString: `
[URL]( ./file.md )
[ref]: ./file.md
`,
}),
).toMatchSnapshot();
});
it('handles link titles', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/file.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/file.md': '/docs/file',
},
fileString: `
[URL](./file.md "title")
[URL](./file.md 'title')
[URL](./file.md (title))
`,
}),
).toMatchSnapshot();
});
it('preserves query/hash', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/file.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/file.md': '/docs/file',
},
fileString: `
[URL](./file.md?foo=bar#baz)
[URL](./file.md#a)
[URL](./file.md?c)
`,
}),
).toMatchSnapshot();
});
it('handles unpaired fences', () => {
expect(
replaceMarkdownLinks({
siteDir: '.',
filePath: 'docs/file.md',
contentPaths: {
contentPath: 'docs',
contentPathLocalized: 'i18n/docs-localized',
},
sourceToPermalink: {
'@site/docs/file.md': '/docs/file',
},
fileString: `
\`\`\`foo
hello
\`\`\`foo
hello
\`\`\`
A [link](./file.md)
`,
}),
).toMatchSnapshot();
test('./another.md', '/docs/another');
test('../api/classes/divine_uri.URI.md', '/docs/api/classes/uri');
test('./api/classes/divine_uri.URI.md', null);
test('/api/classes/divine_uri.URI.md', '/docs/api/classes/uri');
test('/docs/api/classes/divine_uri.URI.md', '/docs/api/classes/uri');
test('/another.md', null);
test('api/classes/divine_uri.URI.md', '/docs/api/classes/uri');
test('another.md', '/docs/another');
});
});

View file

@ -17,6 +17,9 @@ import {
hasSSHProtocol,
parseURLPath,
serializeURLPath,
parseURLOrPath,
toURLPath,
parseLocalURLPath,
} from '../urlUtils';
describe('normalizeUrl', () => {
@ -228,6 +231,166 @@ describe('isValidPathname', () => {
});
});
describe('toURLPath', () => {
it('url', () => {
const url = new URL('https://example.com/pathname?qs#hash');
expect(toURLPath(url)).toEqual({
pathname: '/pathname',
search: 'qs',
hash: 'hash',
});
});
it('pathname + qs', () => {
const url = parseURLOrPath('/pathname?qs');
expect(toURLPath(url)).toEqual({
pathname: '/pathname',
search: 'qs',
hash: undefined,
});
});
it('pathname + hash', () => {
const url = parseURLOrPath('/pathname#hash');
expect(toURLPath(url)).toEqual({
pathname: '/pathname',
search: undefined,
hash: 'hash',
});
});
it('pathname + qs + hash', () => {
const url = parseURLOrPath('/pathname?qs#hash');
expect(toURLPath(url)).toEqual({
pathname: '/pathname',
search: 'qs',
hash: 'hash',
});
});
it('pathname + empty qs + empty hash', () => {
const url = parseURLOrPath('/pathname?#');
expect(toURLPath(url)).toEqual({
pathname: '/pathname',
search: '',
hash: '',
});
});
});
describe('parseLocalURLPath', () => {
it('returns null for non-local URLs', () => {
expect(parseLocalURLPath('https://example')).toBeNull();
expect(parseLocalURLPath('https://example:80')).toBeNull();
expect(parseLocalURLPath('https://example.com/xyz')).toBeNull();
expect(parseLocalURLPath('https://example.com/xyz?qs#hash')).toBeNull();
expect(parseLocalURLPath('https://example.com:80/xyz?qs#hash')).toBeNull();
expect(parseLocalURLPath('https://u:p@example:80/xyz?qs#hash')).toBeNull();
});
it('parses pathname', () => {
expect(parseLocalURLPath('/pathname')).toEqual({
pathname: '/pathname',
search: undefined,
hash: undefined,
});
expect(parseLocalURLPath('pathname.md')).toEqual({
pathname: 'pathname.md',
search: undefined,
hash: undefined,
});
expect(parseLocalURLPath('./pathname')).toEqual({
pathname: './pathname',
search: undefined,
hash: undefined,
});
expect(parseLocalURLPath('../../pathname.mdx')).toEqual({
pathname: '../../pathname.mdx',
search: undefined,
hash: undefined,
});
});
it('parses qs', () => {
expect(parseLocalURLPath('?')).toEqual({
pathname: '',
search: '',
hash: undefined,
});
expect(parseLocalURLPath('?qs')).toEqual({
pathname: '',
search: 'qs',
hash: undefined,
});
expect(parseLocalURLPath('?age=42')).toEqual({
pathname: '',
search: 'age=42',
hash: undefined,
});
});
it('parses hash', () => {
expect(parseLocalURLPath('#')).toEqual({
pathname: '',
search: undefined,
hash: '',
});
expect(parseLocalURLPath('#hash')).toEqual({
pathname: '',
search: undefined,
hash: 'hash',
});
});
it('parses complex local paths', () => {
expect(
parseLocalURLPath('../../great/path name/doc.mdx?age=42#hash'),
).toEqual({
pathname: '../../great/path name/doc.mdx',
search: 'age=42',
hash: 'hash',
});
expect(parseLocalURLPath('my great path?=42#hash?qsInHash')).toEqual({
pathname: 'my great path',
search: '=42',
hash: 'hash?qsInHash',
});
expect(parseLocalURLPath('?qs1#hash1?qs2#hash2')).toEqual({
pathname: '',
search: 'qs1',
hash: 'hash1?qs2#hash2',
});
expect(parseLocalURLPath('../swizzling.mdx#wrapping')).toEqual({
pathname: '../swizzling.mdx',
search: undefined,
hash: 'wrapping',
});
});
it('parses is isomorphic with serialize', () => {
const testLocalPath = (url: string) => {
expect(serializeURLPath(parseLocalURLPath(url)!)).toBe(url);
};
[
'',
'doc',
'doc.mdx',
'./doc.mdx',
'.././doc.mdx',
'/some pathname/.././doc.mdx',
'?',
'?qs',
'#',
'#hash',
'?qs#hash',
'?qs#hash',
'doc.mdx?qs#hash',
'/some pathname/.././doc.mdx?qs#hash',
'/some pathname/.././doc.mdx?qs#hash?qs2#hash2',
].forEach(testLocalPath);
});
});
describe('parseURLPath', () => {
it('parse and resolve pathname', () => {
expect(parseURLPath('')).toEqual({

View file

@ -44,6 +44,9 @@ export {
isValidPathname,
resolvePathname,
parseURLPath,
parseLocalURLPath,
parseURLOrPath,
toURLPath,
serializeURLPath,
hasSSHProtocol,
buildHttpsUrl,
@ -71,11 +74,7 @@ export {
writeMarkdownHeadingId,
type WriteHeadingIDOptions,
} from './markdownUtils';
export {
type ContentPaths,
type BrokenMarkdownLink,
replaceMarkdownLinks,
} from './markdownLinks';
export {type ContentPaths, resolveMarkdownLinkPathname} from './markdownLinks';
export {type SluggerOptions, type Slugger, createSlugger} from './slugger';
export {
isNameTooLong,

View file

@ -40,159 +40,35 @@ export type BrokenMarkdownLink<T extends ContentPaths> = {
link: string;
};
type CodeFence = {
type: '`' | '~';
definitelyOpen: boolean;
count: number;
};
function parseCodeFence(line: string): CodeFence | null {
const match = line.trim().match(/^(?<fence>`{3,}|~{3,})(?<rest>.*)/);
if (!match) {
return null;
// Note this is historical logic extracted during a 2024 refactor
// The algo has been kept exactly as before for retro compatibility
// See also https://github.com/facebook/docusaurus/pull/10168
export function resolveMarkdownLinkPathname(
linkPathname: string,
context: {
sourceFilePath: string;
sourceToPermalink: {[aliasedFilePath: string]: string};
contentPaths: ContentPaths;
siteDir: string;
},
): string | null {
const {sourceFilePath, sourceToPermalink, contentPaths, siteDir} = context;
const sourceDirsToTry: string[] = [];
// ./file.md and ../file.md are always relative to the current file
if (!linkPathname.startsWith('./') && !linkPathname.startsWith('../')) {
sourceDirsToTry.push(...getContentPathList(contentPaths), siteDir);
}
return {
type: match.groups!.fence![0]! as '`' | '~',
definitelyOpen: !!match.groups!.rest!,
count: match.groups!.fence!.length,
};
}
/**
* Takes a Markdown file and replaces relative file references with their URL
* counterparts, e.g. `[link](./intro.md)` => `[link](/docs/intro)`, preserving
* everything else.
*
* This method uses best effort to find a matching file. The file reference can
* be relative to the directory of the current file (most likely) or any of the
* content paths (so `/tutorials/intro.md` can be resolved as
* `<siteDir>/docs/tutorials/intro.md`). Links that contain the `http(s):` or
* `@site/` prefix will always be ignored.
*/
export function replaceMarkdownLinks<T extends ContentPaths>({
siteDir,
fileString,
filePath,
contentPaths,
sourceToPermalink,
}: {
/** Absolute path to the site directory, used to resolve aliased paths. */
siteDir: string;
/** The Markdown file content to be processed. */
fileString: string;
/** Absolute path to the current file containing `fileString`. */
filePath: string;
/** The content paths which the file reference may live in. */
contentPaths: T;
/**
* A map from source paths to their URLs. Source paths are `@site` aliased.
*/
sourceToPermalink: {[aliasedPath: string]: string};
}): {
/**
* The content with all Markdown file references replaced with their URLs.
* Unresolved links are left as-is.
*/
newContent: string;
/** The list of broken links, */
brokenMarkdownLinks: BrokenMarkdownLink<T>[];
} {
const brokenMarkdownLinks: BrokenMarkdownLink<T>[] = [];
// Replace internal markdown linking (except in fenced blocks).
let lastOpenCodeFence: CodeFence | null = null;
const lines = fileString.split('\n').map((line) => {
const codeFence = parseCodeFence(line);
if (codeFence) {
if (!lastOpenCodeFence) {
lastOpenCodeFence = codeFence;
} else if (
!codeFence.definitelyOpen &&
lastOpenCodeFence.type === codeFence.type &&
lastOpenCodeFence.count <= codeFence.count
) {
// All three conditions must be met in order for this to be considered
// a closing fence.
lastOpenCodeFence = null;
}
}
if (lastOpenCodeFence) {
return line;
}
let modifiedLine = line;
// Replace inline-style links or reference-style links e.g:
// This is [Document 1](doc1.md)
// [doc1]: doc1.md
const linkTitlePattern = '(?:\\s+(?:\'.*?\'|".*?"|\\(.*?\\)))?';
const linkSuffixPattern = '(?:\\?[^#>\\s]+)?(?:#[^>\\s]+)?';
const linkCapture = (forbidden: string) =>
`((?!https?://|@site/)[^${forbidden}#?]+)`;
const linkURLPattern = `(?:(?!<)${linkCapture(
'()\\s',
)}${linkSuffixPattern}|<${linkCapture('>')}${linkSuffixPattern}>)`;
const linkPattern = new RegExp(
`\\[(?:(?!\\]\\().)*\\]\\(\\s*${linkURLPattern}${linkTitlePattern}\\s*\\)|^\\s*\\[[^[\\]]*[^[\\]\\s][^[\\]]*\\]:\\s*${linkURLPattern}${linkTitlePattern}$`,
'dgm',
);
let mdMatch = linkPattern.exec(modifiedLine);
while (mdMatch !== null) {
// Replace it to correct html link.
const mdLink = mdMatch.slice(1, 5).find(Boolean)!;
const mdLinkRange = mdMatch.indices!.slice(1, 5).find(Boolean)!;
if (!/\.mdx?$/.test(mdLink)) {
mdMatch = linkPattern.exec(modifiedLine);
continue;
}
const sourcesToTry: string[] = [];
// ./file.md and ../file.md are always relative to the current file
if (!mdLink.startsWith('./') && !mdLink.startsWith('../')) {
sourcesToTry.push(...getContentPathList(contentPaths), siteDir);
}
// /file.md is always relative to the content path
if (!mdLink.startsWith('/')) {
sourcesToTry.push(path.dirname(filePath));
}
const aliasedSourceMatch = sourcesToTry
.map((p) => path.join(p, decodeURIComponent(mdLink)))
.map((source) => aliasedSitePath(source, siteDir))
.find((source) => sourceToPermalink[source]);
const permalink: string | undefined = aliasedSourceMatch
? sourceToPermalink[aliasedSourceMatch]
: undefined;
if (permalink) {
// MDX won't be happy if the permalink contains a space, we need to
// convert it to %20
const encodedPermalink = permalink
.split('/')
.map((part) => part.replace(/\s/g, '%20'))
.join('/');
modifiedLine = `${modifiedLine.slice(
0,
mdLinkRange[0],
)}${encodedPermalink}${modifiedLine.slice(mdLinkRange[1])}`;
// Adjust the lastIndex to avoid passing over the next link if the
// newly replaced URL is shorter.
linkPattern.lastIndex += encodedPermalink.length - mdLink.length;
} else {
const brokenMarkdownLink: BrokenMarkdownLink<T> = {
contentPaths,
filePath,
link: mdLink,
};
brokenMarkdownLinks.push(brokenMarkdownLink);
}
mdMatch = linkPattern.exec(modifiedLine);
}
return modifiedLine;
});
const newContent = lines.join('\n');
return {newContent, brokenMarkdownLinks};
// /file.md is never relative to the source file path
if (!linkPathname.startsWith('/')) {
sourceDirsToTry.push(path.dirname(sourceFilePath));
}
const aliasedSourceMatch = sourceDirsToTry
.map((sourceDir) => path.join(sourceDir, decodeURIComponent(linkPathname)))
.map((source) => aliasedSitePath(source, siteDir))
.find((source) => sourceToPermalink[source]);
return aliasedSourceMatch
? sourceToPermalink[aliasedSourceMatch] ?? null
: null;
}

View file

@ -164,27 +164,22 @@ export function isValidPathname(str: string): boolean {
}
}
export function parseURLOrPath(url: string, base?: string | URL): URL {
try {
// TODO when Node supports it, use URL.parse could be faster?
// see https://kilianvalkhof.com/2024/javascript/the-problem-with-new-url-and-how-url-parse-fixes-that/
return new URL(url, base ?? 'https://example.com');
} catch (e) {
throw new Error(
`Can't parse URL ${url}${base ? ` with base ${base}` : ''}`,
{cause: e},
);
}
}
export type URLPath = {pathname: string; search?: string; hash?: string};
// Let's name the concept of (pathname + search + hash) as URLPath
// See also https://twitter.com/kettanaito/status/1741768992866308120
// Note: this function also resolves relative pathnames while parsing!
export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
function parseURL(url: string, base?: string | URL): URL {
try {
// A possible alternative? https://github.com/unjs/ufo#url
return new URL(url, base ?? 'https://example.com');
} catch (e) {
throw new Error(
`Can't parse URL ${url}${base ? ` with base ${base}` : ''}`,
{cause: e},
);
}
}
const base = fromPath ? parseURL(fromPath) : undefined;
const url = parseURL(urlPath, base);
export function toURLPath(url: URL): URLPath {
const {pathname} = url;
// Fixes annoying url.search behavior
@ -193,17 +188,17 @@ export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
// "?param => "param"
const search = url.search
? url.search.slice(1)
: urlPath.includes('?')
: url.href.includes('?')
? ''
: undefined;
// Fixes annoying url.hash behavior
// "" => undefined
// "#" => ""
// "?param => "param"
// "#param => "param"
const hash = url.hash
? url.hash.slice(1)
: urlPath.includes('#')
: url.href.includes('#')
? ''
: undefined;
@ -214,6 +209,65 @@ export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
};
}
/**
* Let's name the concept of (pathname + search + hash) as URLPath
* See also https://twitter.com/kettanaito/status/1741768992866308120
* Note: this function also resolves relative pathnames while parsing!
*/
export function parseURLPath(urlPath: string, fromPath?: string): URLPath {
const base = fromPath ? parseURLOrPath(fromPath) : undefined;
const url = parseURLOrPath(urlPath, base);
return toURLPath(url);
}
/**
* This returns results for strings like "foo", "../foo", "./foo.mdx?qs#hash"
* Unlike "parseURLPath()" above, this will not resolve the pathnames
* Te returned pathname of "../../foo.mdx" will be "../../foo.mdx", not "/foo"
* This returns null if the url is not "local" (contains domain/protocol etc)
*/
export function parseLocalURLPath(urlPath: string): URLPath | null {
// Workaround because URL("") requires a protocol
const unspecifiedProtocol = 'unspecified:';
const url = parseURLOrPath(urlPath, `${unspecifiedProtocol}//`);
// Ignore links with specified protocol / host
// (usually fully qualified links starting with https://)
if (
url.protocol !== unspecifiedProtocol ||
url.host !== '' ||
url.username !== '' ||
url.password !== ''
) {
return null;
}
// We can't use "new URL()" result because it always tries to resolve urls
// IE it will remove any "./" or "../" in the pathname, which we don't want
// We have to parse it manually...
let localUrlPath = urlPath;
// Extract and remove the #hash part
const hashIndex = localUrlPath.indexOf('#');
const hash =
hashIndex !== -1 ? localUrlPath.substring(hashIndex + 1) : undefined;
localUrlPath =
hashIndex !== -1 ? localUrlPath.substring(0, hashIndex) : localUrlPath;
// Extract and remove ?search part
const searchIndex = localUrlPath.indexOf('?');
const search =
searchIndex !== -1 ? localUrlPath.substring(searchIndex + 1) : undefined;
localUrlPath =
searchIndex !== -1 ? localUrlPath.substring(0, searchIndex) : localUrlPath;
return {
pathname: localUrlPath,
search,
hash,
};
}
export function serializeURLPath(urlPath: URLPath): string {
const search = urlPath.search === undefined ? '' : `?${urlPath.search}`;
const hash = urlPath.hash === undefined ? '' : `#${urlPath.hash}`;