fix(v2): remove Markdown syntax from excerpt (#2701)

* fix(v2): remove Markdown syntax from excerpt * Update snapshots * Use Remark * Switch to own solution
2025-07-13 23:07:58 +02:00 · 2020-05-18 14:57:03 +03:00 · 2020-05-18 14:57:03 +03:00 · da0f865831
commit da0f865831
parent c7baa125e6
4 changed files with 112 additions and 20 deletions
--- a/packages/docusaurus-plugin-content-docs/src/tests/index.test.ts
+++ b/packages/docusaurus-plugin-content-docs/src/tests/index.test.ts
@ -303,7 +303,7 @@ describe('versioned website', () => {
      permalink: '/docs/next/foo/bar',
      source: path.join('@site', routeBasePath, 'foo', 'bar.md'),
      title: 'bar',
-      description: 'This is `next` version of bar.',
+      description: 'This is next version of bar.',
      version: 'next',
      sidebar: 'docs',
      next: {
@ -316,7 +316,7 @@ describe('versioned website', () => {
      permalink: '/docs/next/hello',
      source: path.join('@site', routeBasePath, 'hello.md'),
      title: 'hello',
-      description: 'Hello `next` !',
+      description: 'Hello next !',
      version: 'next',
      sidebar: 'docs',
      previous: {
@ -334,7 +334,7 @@ describe('versioned website', () => {
        'hello.md',
      ),
      title: 'hello',
-      description: 'Hello `1.0.1` !',
+      description: 'Hello 1.0.1 !',
      version: '1.0.1',
      sidebar: 'version-1.0.1/docs',
      previous: {
@ -354,7 +354,7 @@ describe('versioned website', () => {
      ),
      title: 'baz',
      description:
-        'Baz `1.0.0` ! This will be deleted in next subsequent versions.',
+        'Baz 1.0.0 ! This will be deleted in next subsequent versions.',
      version: '1.0.0',
      sidebar: 'version-1.0.0/docs',
      next: {
--- a/packages/docusaurus-plugin-content-docs/src/tests/metadata.test.ts
+++ b/packages/docusaurus-plugin-content-docs/src/tests/metadata.test.ts
@ -84,7 +84,7 @@ describe('simple site', () => {
      title: 'baz',
      editUrl:
        'https://github.com/facebook/docusaurus/edit/master/website/docs/foo/baz.md',
-      description: '## Images',
+      description: 'Images',
    });
  });

@ -228,7 +228,7 @@ describe('versioned site', () => {
      permalink: '/docs/next/foo/bar',
      source: path.join('@site', routeBasePath, sourceA),
      title: 'bar',
-      description: 'This is `next` version of bar.',
+      description: 'This is next version of bar.',
      version: 'next',
    });
    expect(dataB).toEqual({
@ -236,7 +236,7 @@ describe('versioned site', () => {
      permalink: '/docs/next/hello',
      source: path.join('@site', routeBasePath, sourceB),
      title: 'hello',
-      description: 'Hello `next` !',
+      description: 'Hello next !',
      version: 'next',
    });
  });
@ -286,7 +286,7 @@ describe('versioned site', () => {
      permalink: '/docs/1.0.0/foo/bar',
      source: path.join('@site', path.relative(siteDir, versionedDir), sourceA),
      title: 'bar',
-      description: 'Bar `1.0.0` !',
+      description: 'Bar 1.0.0 !',
      version: '1.0.0',
    });
    expect(dataB).toEqual({
@ -294,7 +294,7 @@ describe('versioned site', () => {
      permalink: '/docs/1.0.0/hello',
      source: path.join('@site', path.relative(siteDir, versionedDir), sourceB),
      title: 'hello',
-      description: 'Hello `1.0.0` !',
+      description: 'Hello 1.0.0 !',
      version: '1.0.0',
    });
    expect(dataC).toEqual({
@ -302,7 +302,7 @@ describe('versioned site', () => {
      permalink: '/docs/foo/bar',
      source: path.join('@site', path.relative(siteDir, versionedDir), sourceC),
      title: 'bar',
-      description: 'Bar `1.0.1` !',
+      description: 'Bar 1.0.1 !',
      version: '1.0.1',
    });
    expect(dataD).toEqual({
@ -310,7 +310,7 @@ describe('versioned site', () => {
      permalink: '/docs/hello',
      source: path.join('@site', path.relative(siteDir, versionedDir), sourceD),
      title: 'hello',
-      description: 'Hello `1.0.1` !',
+      description: 'Hello 1.0.1 !',
      version: '1.0.1',
    });
  });
--- a/packages/docusaurus-utils/src/tests/index.test.ts
+++ b/packages/docusaurus-utils/src/tests/index.test.ts
@ -17,6 +17,7 @@ import {
  posixPath,
  objectWithKeySorted,
  aliasedSitePath,
+  createExcerpt,
 } from '../index';

 describe('load utils', () => {
@ -292,4 +293,60 @@ describe('load utils', () => {
      `"Url must be a string. Received undefined"`,
    );
  });
+
+  test('createExcerpt', () => {
+    const asserts = [
+      // Regular content
+      {
+        input: `
+          Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.
+
+          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
+        `,
+        output:
+          'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
+      },
+      // Content with imports declarations and Markdown markup, as well as Emoji
+      {
+        input: `
+          import Component from '@site/src/components/Component';
+          import Component from '@site/src/components/Component'
+
+          Lorem **ipsum** dolor sit \`amet\`, consectetur _adipiscing_ elit. [**Vestibulum**](https://wiktionary.org/wiki/vestibulum) ex urna, ~molestie~ et sagittis ut, varius ac justo :wink:.
+
+          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
+        `,
+        output:
+          'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
+      },
+      // Content beginning with admonitions
+      {
+        input: `
+          import Component from '@site/src/components/Component'
+
+          :::caution
+
+          Lorem ipsum dolor sit amet, consectetur adipiscing elit.
+
+          :::
+
+          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
+        `,
+        output: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
+      },
+      // Content beginning with heading
+      {
+        input: `
+          ## Lorem ipsum dolor sit amet
+
+          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
+        `,
+        output: 'Lorem ipsum dolor sit amet',
+      },
+    ];
+
+    asserts.forEach((testCase) => {
+      expect(createExcerpt(testCase.input)).toEqual(testCase.output);
+    });
+  });
 });
--- a/packages/docusaurus-utils/src/index.ts
+++ b/packages/docusaurus-utils/src/index.ts
@ -185,6 +185,49 @@ export function getSubFolder(file: string, refDir: string): string | null {
 // Regex for an import statement.
 const importRegexString = '^(.*import){1}(.+){0,1}\\s[\'"](.+)[\'"];?';

+export function createExcerpt(fileString: string): string | undefined {
+  let fileContent = fileString.trimLeft();
+
+  if (RegExp(importRegexString).test(fileContent)) {
+    fileContent = fileContent
+      .replace(RegExp(importRegexString, 'gm'), '')
+      .trimLeft();
+  }
+
+  const fileLines = fileContent.split('\n');
+
+  for (let fileLine of fileLines) {
+    const cleanedLine = fileLine
+      // Remove HTML tags.
+      .replace(/<[^>]*>/g, '')
+      // Remove ATX-style headers.
+      .replace(/^\#{1,6}\s*([^#]*)\s*(\#{1,6})?/gm, '$1')
+      // Remove emphasis and strikethroughs.
+      .replace(/([\*_~]{1,3})(\S.*?\S{0,1})\1/g, '$2')
+      // Remove inline links.
+      .replace(/\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
+      // Remove inline code.
+      .replace(/`(.+?)`/g, '$1')
+      // Remove images.
+      .replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, '')
+      // Remove blockquotes.
+      .replace(/^\s{0,3}>\s?/g, '')
+      // Remove footnotes.
+      .replace(/\[\^.+?\](\: .*?$)?/g, '')
+      // Remove admonition definition.
+      .replace(/(:{3}.*)/, '')
+      // Remove Emoji names within colons include preceding whitespace.
+      .replace(/\s?(:(::|[^:\n])+:)/g, '')
+      .trim();
+
+    if (cleanedLine) {
+      return cleanedLine;
+    }
+  }
+
+  return undefined;
+}
+
 export function parse(
  fileString: string,
 ): {
@ -196,18 +239,10 @@ export function parse(
 } {
  const options: {} = {
    excerpt: (file: matter.GrayMatterFile<string>): void => {
-      let fileContent = file.content.trimLeft();
-
      // Hacky way of stripping out import statements from the excerpt
      // TODO: Find a better way to do so, possibly by compiling the Markdown content,
      // stripping out HTML tags and obtaining the first line.
-      if (RegExp(importRegexString).test(fileContent)) {
-        fileContent = fileContent
-          .replace(RegExp(importRegexString, 'gm'), '')
-          .trimLeft();
-      }
-
-      file.excerpt = fileContent.split('\n', 1).shift();
+      file.excerpt = createExcerpt(file.content);
    },
  };