mirror of
https://github.com/facebook/docusaurus.git
synced 2025-05-01 19:27:48 +02:00
* parseMarkdownContentTitle should ignore {#my-anchor-id} syntax * use frontMatter.title in priority for page meta title * parseMarkdownString should ignore fixed anchor ids syntax * docs: make the distinction between headingTitle + metaTitle more clear + add useful todo * docs: make the distinction between headingTitle + metaTitle more clear + add useful todo * writeHeadingIds should ignore top-level md title like "# Title" => we are not supposed to create anchor links for h1 headers * update tests * fix doc tests
169 lines
4.5 KiB
TypeScript
169 lines
4.5 KiB
TypeScript
/**
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
*
|
|
* This source code is licensed under the MIT license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
import chalk from 'chalk';
|
|
import fs from 'fs-extra';
|
|
import matter from 'gray-matter';
|
|
|
|
// Hacky way of stripping out import statements from the excerpt
|
|
// TODO: Find a better way to do so, possibly by compiling the Markdown content,
|
|
// stripping out HTML tags and obtaining the first line.
|
|
export function createExcerpt(fileString: string): string | undefined {
|
|
const fileLines = fileString
|
|
.trimLeft()
|
|
// Remove Markdown alternate title
|
|
.replace(/^[^\n]*\n[=]+/g, '')
|
|
.split('\n');
|
|
|
|
/* eslint-disable no-continue */
|
|
// eslint-disable-next-line no-restricted-syntax
|
|
for (const fileLine of fileLines) {
|
|
// Skip empty line.
|
|
if (!fileLine.trim()) {
|
|
continue;
|
|
}
|
|
|
|
// Skip import/export declaration.
|
|
if (/^\s*?import\s.*(from.*)?;?|export\s.*{.*};?/.test(fileLine)) {
|
|
continue;
|
|
}
|
|
|
|
const cleanedLine = fileLine
|
|
// Remove HTML tags.
|
|
.replace(/<[^>]*>/g, '')
|
|
// Remove Title headers
|
|
.replace(/^\#\s*([^#]*)\s*\#?/gm, '')
|
|
// Remove Markdown + ATX-style headers
|
|
.replace(/^\#{1,6}\s*([^#]*)\s*(\#{1,6})?/gm, '$1')
|
|
// Remove emphasis and strikethroughs.
|
|
.replace(/([\*_~]{1,3})(\S.*?\S{0,1})\1/g, '$2')
|
|
// Remove images.
|
|
.replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
|
|
// Remove footnotes.
|
|
.replace(/\[\^.+?\](\: .*?$)?/g, '')
|
|
// Remove inline links.
|
|
.replace(/\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
|
|
// Remove inline code.
|
|
.replace(/`(.+?)`/g, '$1')
|
|
// Remove blockquotes.
|
|
.replace(/^\s{0,3}>\s?/g, '')
|
|
// Remove admonition definition.
|
|
.replace(/(:{3}.*)/, '')
|
|
// Remove Emoji names within colons include preceding whitespace.
|
|
.replace(/\s?(:(::|[^:\n])+:)/g, '')
|
|
.trim();
|
|
|
|
if (cleanedLine) {
|
|
return cleanedLine;
|
|
}
|
|
}
|
|
|
|
return undefined;
|
|
}
|
|
|
|
export function parseFrontMatter(
|
|
markdownFileContent: string,
|
|
): {
|
|
frontMatter: Record<string, unknown>;
|
|
content: string;
|
|
} {
|
|
const {data, content} = matter(markdownFileContent);
|
|
return {
|
|
frontMatter: data ?? {},
|
|
content: content?.trim() ?? '',
|
|
};
|
|
}
|
|
|
|
export function parseMarkdownContentTitle(
|
|
contentUntrimmed: string,
|
|
options?: {keepContentTitle?: boolean},
|
|
): {content: string; contentTitle: string | undefined} {
|
|
const keepContentTitleOption = options?.keepContentTitle ?? false;
|
|
|
|
const content = contentUntrimmed.trim();
|
|
|
|
const regularTitleMatch = /^(?:import\s.*(from.*)?;?|\n)*?(?<pattern>#\s*(?<title>[^#\n{]*)+[ \t]*(?<suffix>({#*[\w-]+})|#)?\n*?)/g.exec(
|
|
content,
|
|
);
|
|
const alternateTitleMatch = /^(?:import\s.*(from.*)?;?|\n)*?(?<pattern>\s*(?<title>[^\n]*)\s*\n[=]+)/g.exec(
|
|
content,
|
|
);
|
|
|
|
const titleMatch = regularTitleMatch ?? alternateTitleMatch;
|
|
const {pattern, title} = titleMatch?.groups ?? {};
|
|
|
|
if (!pattern || !title) {
|
|
return {content, contentTitle: undefined};
|
|
}
|
|
|
|
const newContent = keepContentTitleOption
|
|
? content
|
|
: content.replace(pattern, '');
|
|
|
|
return {
|
|
content: newContent.trim(),
|
|
contentTitle: title.trim(),
|
|
};
|
|
}
|
|
|
|
type ParsedMarkdown = {
|
|
frontMatter: Record<string, unknown>;
|
|
content: string;
|
|
contentTitle: string | undefined;
|
|
excerpt: string | undefined;
|
|
};
|
|
|
|
export function parseMarkdownString(
|
|
markdownFileContent: string,
|
|
options?: {
|
|
keepContentTitle?: boolean;
|
|
},
|
|
): ParsedMarkdown {
|
|
try {
|
|
const keepContentTitle = options?.keepContentTitle ?? false;
|
|
|
|
const {frontMatter, content: contentWithoutFrontMatter} = parseFrontMatter(
|
|
markdownFileContent,
|
|
);
|
|
|
|
const {content, contentTitle} = parseMarkdownContentTitle(
|
|
contentWithoutFrontMatter,
|
|
{
|
|
keepContentTitle,
|
|
},
|
|
);
|
|
|
|
const excerpt = createExcerpt(content);
|
|
|
|
return {
|
|
frontMatter,
|
|
content,
|
|
contentTitle,
|
|
excerpt,
|
|
};
|
|
} catch (e) {
|
|
console.error(
|
|
chalk.red(`Error while parsing markdown front matter.
|
|
This can happen if you use special characters like : in frontmatter values (try using "" around that value)`),
|
|
);
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
export async function parseMarkdownFile(
|
|
source: string,
|
|
): Promise<ParsedMarkdown> {
|
|
const markdownString = await fs.readFile(source, 'utf-8');
|
|
try {
|
|
return parseMarkdownString(markdownString);
|
|
} catch (e) {
|
|
throw new Error(
|
|
`Error while parsing markdown file ${source}
|
|
${e.message}`,
|
|
);
|
|
}
|
|
}
|