docusaurus/packages/docusaurus-utils/src/markdownParser.ts
Joshua Chen 3f1f8255a2
chore: upgrade Prettier + regenerate lock file (#5611)
* Bump deps

* Run prettier

* Format docs

* Minor refactor

* Collapse objects

* Fix type

* Update lock file
2021-09-30 12:54:17 +02:00

189 lines
5.4 KiB
TypeScript

/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import chalk from 'chalk';
import fs from 'fs-extra';
import matter from 'gray-matter';
// Hacky way of stripping out import statements from the excerpt
// TODO: Find a better way to do so, possibly by compiling the Markdown content,
// stripping out HTML tags and obtaining the first line.
export function createExcerpt(fileString: string): string | undefined {
const fileLines = fileString
.trimLeft()
// Remove Markdown alternate title
.replace(/^[^\n]*\n[=]+/g, '')
.split('\n');
let inCode = false;
/* eslint-disable no-continue */
// eslint-disable-next-line no-restricted-syntax
for (const fileLine of fileLines) {
// Skip empty line.
if (!fileLine.trim()) {
continue;
}
// Skip import/export declaration.
if (/^\s*?import\s.*(from.*)?;?|export\s.*{.*};?/.test(fileLine)) {
continue;
}
// Skip code block line.
if (fileLine.trim().startsWith('```')) {
inCode = !inCode;
continue;
} else if (inCode) {
continue;
}
const cleanedLine = fileLine
// Remove HTML tags.
.replace(/<[^>]*>/g, '')
// Remove Title headers
.replace(/^#\s*([^#]*)\s*#?/gm, '')
// Remove Markdown + ATX-style headers
.replace(/^#{1,6}\s*([^#]*)\s*(#{1,6})?/gm, '$1')
// Remove emphasis and strikethroughs.
.replace(/([*_~]{1,3})(\S.*?\S{0,1})\1/g, '$2')
// Remove images.
.replace(/!\[(.*?)\][[(].*?[\])]/g, '$1')
// Remove footnotes.
.replace(/\[\^.+?\](: .*?$)?/g, '')
// Remove inline links.
.replace(/\[(.*?)\][[(].*?[\])]/g, '$1')
// Remove inline code.
.replace(/`(.+?)`/g, '$1')
// Remove blockquotes.
.replace(/^\s{0,3}>\s?/g, '')
// Remove admonition definition.
.replace(/(:{3}.*)/, '')
// Remove Emoji names within colons include preceding whitespace.
.replace(/\s?(:(::|[^:\n])+:)/g, '')
// Remove custom Markdown heading id.
.replace(/{#*[\w-]+}/, '')
.trim();
if (cleanedLine) {
return cleanedLine;
}
}
return undefined;
}
export function parseFrontMatter(markdownFileContent: string): {
frontMatter: Record<string, unknown>;
content: string;
} {
const {data, content} = matter(markdownFileContent);
return {
frontMatter: data ?? {},
content: content?.trim() ?? '',
};
}
// Try to convert markdown heading as text
// Does not need to be perfect, it is only used as a fallback when frontMatter.title is not provided
// For now, we just unwrap possible inline code blocks (# `config.js`)
function toTextContentTitle(contentTitle: string): string {
if (contentTitle.startsWith('`') && contentTitle.endsWith('`')) {
return contentTitle.substring(1, contentTitle.length - 1);
}
return contentTitle;
}
export function parseMarkdownContentTitle(
contentUntrimmed: string,
options?: {removeContentTitle?: boolean},
): {content: string; contentTitle: string | undefined} {
const removeContentTitleOption = options?.removeContentTitle ?? false;
const content = contentUntrimmed.trim();
const IMPORT_STATEMENT =
/import\s+(([\w*{}\s\n,]+)from\s+)?["'\s]([@\w/_.-]+)["'\s];?|\n/.source;
const REGULAR_TITLE =
/(?<pattern>#\s*(?<title>[^#\n{]*)+[ \t]*(?<suffix>({#*[\w-]+})|#)?\n*?)/
.source;
const ALTERNATE_TITLE = /(?<pattern>\s*(?<title>[^\n]*)\s*\n[=]+)/.source;
const regularTitleMatch = new RegExp(
`^(?:${IMPORT_STATEMENT})*?${REGULAR_TITLE}`,
'g',
).exec(content);
const alternateTitleMatch = new RegExp(
`^(?:${IMPORT_STATEMENT})*?${ALTERNATE_TITLE}`,
'g',
).exec(content);
const titleMatch = regularTitleMatch ?? alternateTitleMatch;
const {pattern, title} = titleMatch?.groups ?? {};
if (!pattern || !title) {
return {content, contentTitle: undefined};
} else {
const newContent = removeContentTitleOption
? content.replace(pattern, '')
: content;
return {
content: newContent.trim(),
contentTitle: toTextContentTitle(title.trim()).trim(),
};
}
}
type ParsedMarkdown = {
frontMatter: Record<string, unknown>;
content: string;
contentTitle: string | undefined;
excerpt: string | undefined;
};
export function parseMarkdownString(
markdownFileContent: string,
options?: {removeContentTitle?: boolean},
): ParsedMarkdown {
try {
const {frontMatter, content: contentWithoutFrontMatter} =
parseFrontMatter(markdownFileContent);
const {content, contentTitle} = parseMarkdownContentTitle(
contentWithoutFrontMatter,
options,
);
const excerpt = createExcerpt(content);
return {
frontMatter,
content,
contentTitle,
excerpt,
};
} catch (e) {
console.error(
chalk.red(`Error while parsing Markdown frontmatter.
This can happen if you use special characters in frontmatter values (try using double quotes around that value).`),
);
throw e;
}
}
export async function parseMarkdownFile(
source: string,
options?: {removeContentTitle?: boolean},
): Promise<ParsedMarkdown> {
const markdownString = await fs.readFile(source, 'utf-8');
try {
return parseMarkdownString(markdownString, options);
} catch (e: any) {
throw new Error(
`Error while parsing Markdown file ${source}: "${e.message}".`,
);
}
}