fix(v2): remove Markdown syntax from excerpt (#2701)

* fix(v2): remove Markdown syntax from excerpt * Update snapshots * Use Remark * Switch to own solution
2025-07-07 03:48:01 +02:00 · 2020-05-18 14:57:03 +03:00 · 2020-05-18 14:57:03 +03:00 · da0f865831
commit da0f865831
parent c7baa125e6
4 changed files with 112 additions and 20 deletions
--- a/packages/docusaurus-utils/src/index.ts
+++ b/packages/docusaurus-utils/src/index.ts
@ -185,6 +185,49 @@ export function getSubFolder(file: string, refDir: string): string | null {
 // Regex for an import statement.
 const importRegexString = '^(.*import){1}(.+){0,1}\\s[\'"](.+)[\'"];?';

+export function createExcerpt(fileString: string): string | undefined {
+  let fileContent = fileString.trimLeft();
+
+  if (RegExp(importRegexString).test(fileContent)) {
+    fileContent = fileContent
+      .replace(RegExp(importRegexString, 'gm'), '')
+      .trimLeft();
+  }
+
+  const fileLines = fileContent.split('\n');
+
+  for (let fileLine of fileLines) {
+    const cleanedLine = fileLine
+      // Remove HTML tags.
+      .replace(/<[^>]*>/g, '')
+      // Remove ATX-style headers.
+      .replace(/^\#{1,6}\s*([^#]*)\s*(\#{1,6})?/gm, '$1')
+      // Remove emphasis and strikethroughs.
+      .replace(/([\*_~]{1,3})(\S.*?\S{0,1})\1/g, '$2')
+      // Remove inline links.
+      .replace(/\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
+      // Remove inline code.
+      .replace(/`(.+?)`/g, '$1')
+      // Remove images.
+      .replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, '')
+      // Remove blockquotes.
+      .replace(/^\s{0,3}>\s?/g, '')
+      // Remove footnotes.
+      .replace(/\[\^.+?\](\: .*?$)?/g, '')
+      // Remove admonition definition.
+      .replace(/(:{3}.*)/, '')
+      // Remove Emoji names within colons include preceding whitespace.
+      .replace(/\s?(:(::|[^:\n])+:)/g, '')
+      .trim();
+
+    if (cleanedLine) {
+      return cleanedLine;
+    }
+  }
+
+  return undefined;
+}
+
 export function parse(
  fileString: string,
 ): {
@ -196,18 +239,10 @@ export function parse(
 } {
  const options: {} = {
    excerpt: (file: matter.GrayMatterFile<string>): void => {
-      let fileContent = file.content.trimLeft();
-
      // Hacky way of stripping out import statements from the excerpt
      // TODO: Find a better way to do so, possibly by compiling the Markdown content,
      // stripping out HTML tags and obtaining the first line.
-      if (RegExp(importRegexString).test(fileContent)) {
-        fileContent = fileContent
-          .replace(RegExp(importRegexString, 'gm'), '')
-          .trimLeft();
-      }
-
-      file.excerpt = fileContent.split('\n', 1).shift();
+      file.excerpt = createExcerpt(file.content);
    },
  };