feat(v2): various markdown string parsing improvements/fixes (#4590)

* extract createExcerpt code in separate file + add bad test * almost working markdown parsing refactor * complete parseMarkdownString refactor * fix tests * fix blog test issue * fix docusaurus utils imports
2025-08-15 18:28:14 +02:00 · 2021-04-09 17:09:33 +02:00 · 2021-04-09 17:09:33 +02:00 · 4efe6824b3
commit 4efe6824b3
parent b743edf5fb
15 changed files with 895 additions and 563 deletions
--- a/packages/docusaurus-mdx-loader/src/index.js
+++ b/packages/docusaurus-mdx-loader/src/index.js
@ -9,7 +9,10 @@ const {getOptions} = require('loader-utils');
 const {readFile} = require('fs-extra');
 const mdx = require('@mdx-js/mdx');
 const emoji = require('remark-emoji');
-const {readFrontMatter} = require('@docusaurus/utils');
+const {
+  parseFrontMatter,
+  parseMarkdownContentTitle,
+} = require('@docusaurus/utils');
 const stringifyObject = require('stringify-object');
 const headings = require('./remark/headings');
 const toc = require('./remark/toc');
@ -26,12 +29,14 @@ module.exports = async function docusaurusMdxLoader(fileString) {
  const callback = this.async();
  const reqOptions = getOptions(this) || {};

-  const {frontMatter, content, hasFrontMatter} = readFrontMatter(
-    fileString,
-    this.resourcePath,
-    {},
-    reqOptions.removeTitleHeading,
-  );
+  const {frontMatter, content: contentWithTitle} = parseFrontMatter(fileString);
+
+  // By default, will remove the markdown title from the content
+  const {content} = parseMarkdownContentTitle(contentWithTitle, {
+    keepContentTitle: reqOptions.keepContentTitle,
+  });
+
+  const hasFrontMatter = Object.keys(frontMatter).length > 0;

  const options = {
    ...reqOptions,
--- a/packages/docusaurus-plugin-content-blog/src/tests/index.test.ts
+++ b/packages/docusaurus-plugin-content-blog/src/tests/index.test.ts
@ -12,9 +12,26 @@ import path from 'path';
 import pluginContentBlog from '../index';
 import {DocusaurusConfig, LoadContext, I18n} from '@docusaurus/types';
 import {PluginOptionSchema} from '../pluginOptionSchema';
-import {PluginOptions, EditUrlFunction} from '../types';
+import {PluginOptions, EditUrlFunction, BlogPost} from '../types';
 import {Joi} from '@docusaurus/utils-validation';

+function findByTitle(
+  blogPosts: BlogPost[],
+  title: string,
+): BlogPost | undefined {
+  return blogPosts.find((v) => v.metadata.title === title);
+}
+function getByTitle(blogPosts: BlogPost[], title: string): BlogPost {
+  const post = findByTitle(blogPosts, title);
+  if (!post) {
+    throw new Error(`can't find blog post with title ${title}.
+Available blog post titles are:\n- ${blogPosts
+      .map((p) => p.metadata.title)
+      .join('\n- ')}`);
+  }
+  return post;
+}
+
 function getI18n(locale: string): I18n {
  return {
    currentLocale: locale,
@ -77,7 +94,7 @@ describe('loadBlog', () => {
    const blogPosts = await getBlogPosts(siteDir);

    expect({
-      ...blogPosts.find((v) => v.metadata.title === 'date-matter')!.metadata,
+      ...getByTitle(blogPosts, 'date-matter').metadata,
      ...{prevItem: undefined},
    }).toEqual({
      editUrl: `${BaseEditUrl}/blog/date-matter.md`,
@ -98,9 +115,7 @@ describe('loadBlog', () => {
    });

    expect(
-      blogPosts.find(
-        (v) => v.metadata.title === 'Happy 1st Birthday Slash! (translated)',
-      )!.metadata,
+      getByTitle(blogPosts, 'Happy 1st Birthday Slash! (translated)').metadata,
    ).toEqual({
      editUrl: `${BaseEditUrl}/blog/2018-12-14-Happy-First-Birthday-Slash.md`,
      permalink: '/blog/2018/12/14/Happy-First-Birthday-Slash',
@ -124,7 +139,7 @@ describe('loadBlog', () => {
    });

    expect({
-      ...blogPosts.find((v) => v.metadata.title === 'Complex Slug')!.metadata,
+      ...getByTitle(blogPosts, 'Complex Slug').metadata,
      ...{prevItem: undefined},
    }).toEqual({
      editUrl: `${BaseEditUrl}/blog/complex-slug.md`,
@ -145,7 +160,7 @@ describe('loadBlog', () => {
    });

    expect({
-      ...blogPosts.find((v) => v.metadata.title === 'Simple Slug')!.metadata,
+      ...getByTitle(blogPosts, 'Simple Slug').metadata,
      ...{prevItem: undefined},
    }).toEqual({
      editUrl: `${BaseEditUrl}/blog/simple-slug.md`,
@ -166,7 +181,7 @@ describe('loadBlog', () => {
    });

    expect({
-      ...blogPosts.find((v) => v.metadata.title === 'some heading')!.metadata,
+      ...getByTitle(blogPosts, 'some heading').metadata,
      prevItem: undefined,
    }).toEqual({
      editUrl: `${BaseEditUrl}/blog/heading-as-title.md`,
@ -301,7 +316,7 @@ describe('loadBlog', () => {
    }).format(noDateSourceBirthTime);

    expect({
-      ...blogPosts.find((v) => v.metadata.title === 'no date')!.metadata,
+      ...getByTitle(blogPosts, 'no date').metadata,
      ...{prevItem: undefined},
    }).toEqual({
      editUrl: `${BaseEditUrl}/blog/no date.md`,
--- a/packages/docusaurus-plugin-content-blog/src/blogFrontMatter.ts
+++ b/packages/docusaurus-plugin-content-blog/src/blogFrontMatter.ts
@ -0,0 +1,43 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+import {Joi} from '@docusaurus/utils-validation';
+import {Tag} from './types';
+
+// TODO complete this frontmatter + add unit tests
+type BlogPostFrontMatter = {
+  id?: string;
+  title?: string;
+  description?: string;
+  tags?: (string | Tag)[];
+  slug?: string;
+  draft?: boolean;
+  date?: string;
+};
+
+const BlogTagSchema = Joi.alternatives().try(
+  Joi.string().required(),
+  Joi.object<Tag>({
+    label: Joi.string().required(),
+    permalink: Joi.string().required(),
+  }),
+);
+
+const BlogFrontMatterSchema = Joi.object<BlogPostFrontMatter>({
+  id: Joi.string(),
+  title: Joi.string(),
+  description: Joi.string(),
+  tags: Joi.array().items(BlogTagSchema),
+  slug: Joi.string(),
+  draft: Joi.boolean(),
+}).unknown();
+
+export function assertBlogPostFrontMatter(
+  frontMatter: Record<string, unknown>,
+): asserts frontMatter is BlogPostFrontMatter {
+  Joi.attempt(frontMatter, BlogFrontMatterSchema);
+}
--- a/packages/docusaurus-plugin-content-blog/src/blogUtils.ts
+++ b/packages/docusaurus-plugin-content-blog/src/blogUtils.ts
@ -26,9 +26,10 @@ import {
  getEditUrl,
  getFolderContainingFile,
  posixPath,
+  replaceMarkdownLinks,
 } from '@docusaurus/utils';
 import {LoadContext} from '@docusaurus/types';
-import {replaceMarkdownLinks} from '@docusaurus/utils/lib/markdownLinks';
+import {assertBlogPostFrontMatter} from './blogFrontMatter';

 export function truncate(fileString: string, truncateMarker: RegExp): string {
  return fileString.split(truncateMarker, 1).shift()!;
@ -140,12 +141,18 @@ export async function generateBlogPosts(

      const source = path.join(blogDirPath, blogSourceFile);

+      const {
+        frontMatter,
+        content,
+        contentTitle,
+        excerpt,
+      } = await parseMarkdownFile(source);
+      assertBlogPostFrontMatter(frontMatter);
+
      const aliasedSource = aliasedSitePath(source, siteDir);

      const blogFileName = path.basename(blogSourceFile);

-      const {frontMatter, content, excerpt} = await parseMarkdownFile(source);
-
      if (frontMatter.draft && process.env.NODE_ENV === 'production') {
        return;
      }
@ -182,9 +189,11 @@ export async function generateBlogPosts(
        year: 'numeric',
      }).format(date);

+      const title = frontMatter.title ?? contentTitle ?? linkName;
+      const description = frontMatter.description ?? excerpt ?? '';
+
      const slug =
        frontMatter.slug || (match ? toUrl({date, link: linkName}) : linkName);
-      frontMatter.title = frontMatter.title || linkName;

      const permalink = normalizeUrl([baseUrl, routeBasePath, slug]);

@ -220,16 +229,16 @@ export async function generateBlogPosts(
      }

      blogPosts.push({
-        id: frontMatter.slug || frontMatter.title,
+        id: frontMatter.slug ?? title,
        metadata: {
          permalink,
          editUrl: getBlogEditUrl(),
          source: aliasedSource,
-          description: frontMatter.description || excerpt,
+          title,
+          description,
          date,
          formattedDate,
-          tags: frontMatter.tags,
-          title: frontMatter.title,
+          tags: frontMatter.tags ?? [],
          readingTime: showReadingTime
            ? readingTime(content).minutes
            : undefined,
--- a/packages/docusaurus-plugin-content-docs/src/tests/snapshots/index.test.ts.snap
+++ b/packages/docusaurus-plugin-content-docs/src/tests/snapshots/index.test.ts.snap
@ -198,9 +198,7 @@ Object {
  \\"slug\\": \\"/headingAsTitle\\",
  \\"permalink\\": \\"/docs/headingAsTitle\\",
  \\"version\\": \\"current\\",
-  \\"frontMatter\\": {
-    \\"title\\": \\"My heading as title\\"
-  }
+  \\"frontMatter\\": {}
 }",
  "site-docs-hello-md-9df.json": "{
  \\"unversionedId\\": \\"hello\\",
--- a/packages/docusaurus-plugin-content-docs/src/docFrontMatter.ts
+++ b/packages/docusaurus-plugin-content-docs/src/docFrontMatter.ts
@ -0,0 +1,33 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+import {Joi} from '@docusaurus/utils-validation';
+
+// TODO complete this frontmatter + add unit tests
+type DocFrontMatter = {
+  id?: string;
+  title?: string;
+  description?: string;
+  slug?: string;
+  sidebar_label?: string;
+  custom_edit_url?: string;
+};
+
+const DocFrontMatterSchema = Joi.object<DocFrontMatter>({
+  id: Joi.string(),
+  title: Joi.string(),
+  description: Joi.string(),
+  slug: Joi.string(),
+  sidebar_label: Joi.string(),
+  custom_edit_url: Joi.string().allow(null),
+}).unknown();
+
+export function assertDocFrontMatter(
+  frontMatter: Record<string, unknown>,
+): asserts frontMatter is DocFrontMatter {
+  Joi.attempt(frontMatter, DocFrontMatterSchema);
+}
--- a/packages/docusaurus-plugin-content-docs/src/docs.ts
+++ b/packages/docusaurus-plugin-content-docs/src/docs.ts
@ -30,6 +30,7 @@ import getSlug from './slug';
 import {CURRENT_VERSION_NAME} from './constants';
 import globby from 'globby';
 import {getDocsDirPaths} from './versions';
+import {assertDocFrontMatter} from './docFrontMatter';

 type LastUpdateOptions = Pick<
  PluginOptions,
@ -115,11 +116,15 @@ export function processDocMetadata({
  const {homePageId} = options;
  const {siteDir, i18n} = context;

+  const {frontMatter, contentTitle, excerpt} = parseMarkdownString(content, {
+    source,
+  });
+  assertDocFrontMatter(frontMatter);
+
  // ex: api/myDoc -> api
  // ex: myDoc -> .
  const docsFileDirName = path.dirname(source);

-  const {frontMatter = {}, excerpt} = parseMarkdownString(content, source);
  const {
    sidebar_label: sidebarLabel,
    custom_edit_url: customEditURL,
@ -165,9 +170,9 @@ export function processDocMetadata({
      });

  // Default title is the id.
-  const title: string = frontMatter.title || baseID;
+  const title: string = frontMatter.title ?? contentTitle ?? baseID;

-  const description: string = frontMatter.description || excerpt;
+  const description: string = frontMatter.description ?? excerpt ?? '';

  const permalink = normalizeUrl([versionMetadata.versionPath, docSlug]);

--- a/packages/docusaurus-plugin-content-docs/src/markdown/linkify.ts
+++ b/packages/docusaurus-plugin-content-docs/src/markdown/linkify.ts
@ -7,7 +7,7 @@

 import {DocsMarkdownOption} from '../types';
 import {getDocsDirPaths} from '../versions';
-import {replaceMarkdownLinks} from '@docusaurus/utils/lib/markdownLinks';
+import {replaceMarkdownLinks} from '@docusaurus/utils';

 function getVersion(filePath: string, options: DocsMarkdownOption) {
  const versionFound = options.versionsMetadata.find((version) =>
--- a/packages/docusaurus-plugin-content-pages/src/index.ts
+++ b/packages/docusaurus-plugin-content-pages/src/index.ts
@ -223,7 +223,7 @@ export default function pluginContentPages(
                    rehypePlugins,
                    beforeDefaultRehypePlugins,
                    beforeDefaultRemarkPlugins,
-                    removeTitleHeading: false,
+                    keepContentTitle: true,
                    staticDir: path.join(siteDir, STATIC_DIR_NAME),
                    // Note that metadataPath must be the same/in-sync as
                    // the path from createData for each MDX.
--- a/packages/docusaurus-utils/src/tests/snapshots/parseMarkdown.test.ts.snap
+++ b/packages/docusaurus-utils/src/tests/snapshots/parseMarkdown.test.ts.snap
@ -1,148 +0,0 @@
-// Jest Snapshot v1, https://goo.gl/fbAQLP
-
-exports[`load utils: parseMarkdown parseMarkdownString should delete only first heading 1`] = `
-Object {
-  "content": "
-test test test test test test
-test test test # test bar
-# test
-### test",
-  "excerpt": "",
-  "frontMatter": Object {
-    "title": "test",
-  },
-  "hasFrontMatter": false,
-}
-`;
-
-exports[`load utils: parseMarkdown parseMarkdownString should ignore heading if its not a first text 1`] = `
-Object {
-  "content": "foo
-# test",
-  "excerpt": "foo",
-  "frontMatter": Object {},
-  "hasFrontMatter": false,
-}
-`;
-
-exports[`load utils: parseMarkdown parseMarkdownString should parse first heading as title 1`] = `
-Object {
-  "content": "",
-  "excerpt": "",
-  "frontMatter": Object {
-    "title": "test",
-  },
-  "hasFrontMatter": false,
-}
-`;
-
-exports[`load utils: parseMarkdown parseMarkdownString should preserve front-matter title and warn about duplication 1`] = `
-Object {
-  "content": "# test",
-  "excerpt": "test",
-  "frontMatter": Object {
-    "title": "title",
-  },
-  "hasFrontMatter": true,
-}
-`;
-
-exports[`load utils: parseMarkdown parseMarkdownString should read front matter 1`] = `
-Object {
-  "content": "",
-  "excerpt": undefined,
-  "frontMatter": Object {
-    "title": "test",
-  },
-  "hasFrontMatter": true,
-}
-`;
-
-exports[`load utils: parseMarkdown readFrontMatter should delete only first heading 1`] = `
-Object {
-  "content": "test test test # test bar
-# test
-### test",
-  "excerpt": "",
-  "frontMatter": Object {
-    "title": "test",
-  },
-  "hasFrontMatter": false,
-}
-`;
-
-exports[`load utils: parseMarkdown readFrontMatter should ignore heading if its not a first text 1`] = `
-Object {
-  "content": "foo
-# test",
-  "excerpt": "",
-  "frontMatter": Object {},
-  "hasFrontMatter": false,
-}
-`;
-
-exports[`load utils: parseMarkdown readFrontMatter should not warn about duplicated title 1`] = `
-Object {
-  "content": "# test",
-  "excerpt": "",
-  "frontMatter": Object {
-    "title": "title",
-  },
-  "hasFrontMatter": true,
-}
-`;
-
-exports[`load utils: parseMarkdown readFrontMatter should parse first heading as title 1`] = `
-Object {
-  "content": "",
-  "excerpt": "",
-  "frontMatter": Object {
-    "title": "test",
-  },
-  "hasFrontMatter": false,
-}
-`;
-
-exports[`load utils: parseMarkdown readFrontMatter should parse first heading as title and keep it in content 1`] = `
-Object {
-  "content": "# test",
-  "excerpt": "",
-  "frontMatter": Object {
-    "title": "test",
-  },
-  "hasFrontMatter": false,
-}
-`;
-
-exports[`load utils: parseMarkdown readFrontMatter should parse front-matter and ignore h2 1`] = `
-Object {
-  "content": "## test",
-  "excerpt": "",
-  "frontMatter": Object {
-    "title": "title",
-  },
-  "hasFrontMatter": true,
-}
-`;
-
-exports[`load utils: parseMarkdown readFrontMatter should preserve front-matter title and warn about duplication 1`] = `
-Object {
-  "content": "# test",
-  "excerpt": "",
-  "frontMatter": Object {
-    "title": "title",
-  },
-  "hasFrontMatter": true,
-}
-`;
-
-exports[`load utils: parseMarkdown readFrontMatter should read front matter 1`] = `
-Object {
-  "content": "",
-  "excerpt": "",
-  "frontMatter": Object {
-    "title": "test",
-  },
-  "hasFrontMatter": true,
-}
-`;
--- a/packages/docusaurus-utils/src/tests/index.test.ts
+++ b/packages/docusaurus-utils/src/tests/index.test.ts
@ -18,7 +18,6 @@ import {
  posixPath,
  objectWithKeySorted,
  aliasedSitePath,
-  createExcerpt,
  isValidPathname,
  addTrailingSlash,
  removeTrailingSlash,
@ -372,81 +371,6 @@ describe('load utils', () => {
    );
  });

-  test('createExcerpt', () => {
-    const asserts = [
-      // Regular content
-      {
-        input: `
-          Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.
-
-          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
-        `,
-        output:
-          'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
-      },
-      // Content with imports/exports declarations and Markdown markup, as well as Emoji
-      {
-        input: `
-          import Component from '@site/src/components/Component';
-          import Component from '@site/src/components/Component'
-          import './styles.css';
-
-          export function ItemCol(props) { return <Item {...props} className={'col col--6 margin-bottom--lg'}/> }
-
-          export function ItemCol(props) { return <Item {...props} className={'col col--6 margin-bottom--lg'}/> };
-
-          Lorem **ipsum** dolor sit \`amet\`[^1], consectetur _adipiscing_ elit. [**Vestibulum**](https://wiktionary.org/wiki/vestibulum) ex urna[^bignote], ~molestie~ et sagittis ut, varius ac justo :wink:.
-
-          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
-        `,
-        output:
-          'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
-      },
-      // Content beginning with admonitions
-      {
-        input: `
-          import Component from '@site/src/components/Component'
-
-          :::caution
-
-          Lorem ipsum dolor sit amet, consectetur adipiscing elit.
-
-          :::
-
-          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
-        `,
-        output: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
-      },
-      // Content beginning with heading
-      {
-        input: `
-          ## Lorem ipsum dolor sit amet
-
-          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
-        `,
-        output: 'Lorem ipsum dolor sit amet',
-      },
-      // Content beginning with blockquote
-      {
-        input: `
-          > Lorem ipsum dolor sit amet
-        `,
-        output: 'Lorem ipsum dolor sit amet',
-      },
-      // Content beginning with image (eg. blog post)
-      {
-        input: `
-          ![Lorem ipsum](/img/lorem-ipsum.svg)
-        `,
-        output: 'Lorem ipsum',
-      },
-    ];
-
-    asserts.forEach((testCase) => {
-      expect(createExcerpt(testCase.input)).toEqual(testCase.output);
-    });
-  });
-
  test('isValidPathname', () => {
    expect(isValidPathname('/')).toBe(true);
    expect(isValidPathname('/hey')).toBe(true);
--- a/packages/docusaurus-utils/src/tests/markdownParser.test.ts
+++ b/packages/docusaurus-utils/src/tests/markdownParser.test.ts
@ -0,0 +1,568 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+import {
+  createExcerpt,
+  parseMarkdownContentTitle,
+  parseMarkdownString,
+} from '../markdownParser';
+import dedent from 'dedent';
+
+describe('createExcerpt', () => {
+  test('should create excerpt for text-only content', () => {
+    expect(
+      createExcerpt(dedent`
+          Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.
+
+          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
+        `),
+    ).toEqual(
+      'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
+    );
+  });
+
+  test('should create excerpt for regular content with regular title', () => {
+    expect(
+      createExcerpt(dedent`
+
+          # Markdown Regular Title
+
+          Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.
+
+          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
+        `),
+    ).toEqual(
+      // h1 title is skipped on purpose, because we don't want the page to have SEO metadatas title === description
+      'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
+    );
+  });
+
+  test('should create excerpt for regular content with alternate title', () => {
+    expect(
+      createExcerpt(dedent`
+
+          Markdown Alternate Title
+          ================
+
+          Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.
+
+          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
+        `),
+    ).toEqual(
+      // h1 title is skipped on purpose, because we don't want the page to have SEO metadatas title === description
+      'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
+    );
+  });
+
+  test('should create excerpt for content with h2 heading', () => {
+    expect(
+      createExcerpt(dedent`
+          ## Lorem ipsum dolor sit amet
+
+          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
+        `),
+    ).toEqual('Lorem ipsum dolor sit amet');
+  });
+
+  test('should create excerpt for content beginning with blockquote', () => {
+    expect(
+      createExcerpt(dedent`
+          > Lorem ipsum dolor sit amet
+
+          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
+        `),
+    ).toEqual('Lorem ipsum dolor sit amet');
+  });
+
+  test('should create excerpt for content beginning with image (eg. blog post)', () => {
+    expect(
+      createExcerpt(dedent`
+          ![Lorem ipsum](/img/lorem-ipsum.svg)
+        `),
+    ).toEqual('Lorem ipsum');
+  });
+
+  test('should create excerpt for content beginning with admonitions', () => {
+    expect(
+      createExcerpt(dedent`
+          import Component from '@site/src/components/Component'
+
+          :::caution
+
+          Lorem ipsum dolor sit amet, consectetur adipiscing elit.
+
+          :::
+
+          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
+        `),
+    ).toEqual('Lorem ipsum dolor sit amet, consectetur adipiscing elit.');
+  });
+
+  test('should create excerpt for content with imports/exports declarations and Markdown markup, as well as Emoji', () => {
+    expect(
+      createExcerpt(dedent`
+          import Component from '@site/src/components/Component';
+          import Component from '@site/src/components/Component'
+          import './styles.css';
+
+          export function ItemCol(props) { return <Item {...props} className={'col col--6 margin-bottom--lg'}/> }
+
+          export function ItemCol(props) { return <Item {...props} className={'col col--6 margin-bottom--lg'}/> };
+
+          Lorem **ipsum** dolor sit \`amet\`[^1], consectetur _adipiscing_ elit. [**Vestibulum**](https://wiktionary.org/wiki/vestibulum) ex urna[^bignote], ~molestie~ et sagittis ut, varius ac justo :wink:.
+
+          Nunc porttitor libero nec vulputate venenatis. Nam nec rhoncus mauris. Morbi tempus est et nibh maximus, tempus venenatis arcu lobortis.
+        `),
+    ).toEqual(
+      'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum ex urna, molestie et sagittis ut, varius ac justo.',
+    );
+  });
+});
+
+describe('parseMarkdownContentTitle', () => {
+  test('Should parse markdown h1 title at the top', () => {
+    const markdown = dedent`
+
+          # Markdown Title
+
+          Lorem Ipsum
+
+        `;
+    expect(parseMarkdownContentTitle(markdown)).toEqual({
+      content: 'Lorem Ipsum',
+      contentTitle: 'Markdown Title',
+    });
+  });
+
+  test('Should parse markdown h1 title at the top (atx style with closing #)', () => {
+    const markdown = dedent`
+
+          # Markdown Title #
+
+          Lorem Ipsum
+
+        `;
+    expect(parseMarkdownContentTitle(markdown)).toEqual({
+      content: 'Lorem Ipsum',
+      contentTitle: 'Markdown Title',
+    });
+  });
+
+  test('Should parse markdown h1 alternate title', () => {
+    const markdown = dedent`
+
+          Markdown Title
+          ================
+
+          Lorem Ipsum
+
+        `;
+    expect(parseMarkdownContentTitle(markdown)).toEqual({
+      content: 'Lorem Ipsum',
+      contentTitle: 'Markdown Title',
+    });
+  });
+
+  test('Should parse title-only', () => {
+    const markdown = '# Document With Only A Title ';
+    expect(parseMarkdownContentTitle(markdown)).toEqual({
+      content: '',
+      contentTitle: 'Document With Only A Title',
+    });
+  });
+
+  test('Should parse markdown h1 title at the top but keep it in content', () => {
+    const markdown = dedent`
+
+          # Markdown Title
+
+          Lorem Ipsum
+
+        `;
+    expect(
+      parseMarkdownContentTitle(markdown, {keepContentTitle: true}),
+    ).toEqual({
+      content: markdown.trim(),
+      contentTitle: 'Markdown Title',
+    });
+  });
+
+  test('Should not parse markdown h1 title in the middle of a doc', () => {
+    const markdown = dedent`
+
+          Lorem Ipsum
+
+          # Markdown Title
+
+          Lorem Ipsum
+
+        `;
+    expect(parseMarkdownContentTitle(markdown)).toEqual({
+      content: markdown,
+      contentTitle: undefined,
+    });
+  });
+
+  test('Should not parse markdown h1 alternate title in the middle of the doc', () => {
+    const markdown = dedent`
+
+          Lorem Ipsum
+
+          Markdown Title
+          ================
+
+          Lorem Ipsum
+
+        `;
+    expect(parseMarkdownContentTitle(markdown)).toEqual({
+      content: markdown,
+      contentTitle: undefined,
+    });
+  });
+});
+
+describe('parseMarkdownString', () => {
+  const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
+  beforeEach(() => {
+    warn.mockReset();
+  });
+
+  function expectDuplicateTitleWarning() {
+    expect(warn).toBeCalledWith(
+      expect.stringMatching(/Duplicate title found in this file/),
+    );
+  }
+  function expectNoWarning() {
+    expect(warn).not.toBeCalled();
+  }
+
+  test('parse markdown with frontmatter', () => {
+    expect(
+      parseMarkdownString(dedent`
+        ---
+        title: Frontmatter title
+        ---
+
+        Some text
+        `),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "Some text",
+        "contentTitle": undefined,
+        "excerpt": "Some text",
+        "frontMatter": Object {
+          "title": "Frontmatter title",
+        },
+      }
+    `);
+    expectNoWarning();
+  });
+
+  test('should parse first heading as contentTitle', () => {
+    expect(
+      parseMarkdownString(dedent`
+        # Markdown Title
+
+        Some text
+        `),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "Some text",
+        "contentTitle": "Markdown Title",
+        "excerpt": "Some text",
+        "frontMatter": Object {},
+      }
+    `);
+    expectNoWarning();
+  });
+
+  test('should warn about duplicate titles (frontmatter + markdown)', () => {
+    expect(
+      parseMarkdownString(dedent`
+        ---
+        title: Frontmatter title
+        ---
+
+        # Markdown Title
+
+        Some text
+        `),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "Some text",
+        "contentTitle": "Markdown Title",
+        "excerpt": "Some text",
+        "frontMatter": Object {
+          "title": "Frontmatter title",
+        },
+      }
+    `);
+    expectDuplicateTitleWarning();
+  });
+
+  test('should warn about duplicate titles (frontmatter + markdown alternate)', () => {
+    expect(
+      parseMarkdownString(dedent`
+        ---
+        title: Frontmatter title
+        ---
+
+        Markdown Title alternate
+        ================
+
+        Some text
+        `),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "Some text",
+        "contentTitle": "Markdown Title alternate",
+        "excerpt": "Some text",
+        "frontMatter": Object {
+          "title": "Frontmatter title",
+        },
+      }
+    `);
+    expectDuplicateTitleWarning();
+  });
+
+  test('should not warn for duplicate title if keepContentTitle=true', () => {
+    expect(
+      parseMarkdownString(
+        dedent`
+        ---
+        title: Frontmatter title
+        ---
+
+        # Markdown Title
+
+        Some text
+        `,
+        {keepContentTitle: true},
+      ),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "# Markdown Title
+
+      Some text",
+        "contentTitle": "Markdown Title",
+        "excerpt": "Some text",
+        "frontMatter": Object {
+          "title": "Frontmatter title",
+        },
+      }
+    `);
+    expectNoWarning();
+  });
+
+  test('should not warn for duplicate title if markdown title is not at the top', () => {
+    expect(
+      parseMarkdownString(dedent`
+        ---
+        title: Frontmatter title
+        ---
+
+        foo
+
+        # Markdown Title
+        `),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "foo
+
+      # Markdown Title",
+        "contentTitle": undefined,
+        "excerpt": "foo",
+        "frontMatter": Object {
+          "title": "Frontmatter title",
+        },
+      }
+    `);
+    expectNoWarning();
+  });
+
+  test('should parse markdown title and keep it in content', () => {
+    expect(
+      parseMarkdownString(
+        dedent`
+          # Markdown Title
+          `,
+        {keepContentTitle: true},
+      ),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "# Markdown Title",
+        "contentTitle": "Markdown Title",
+        "excerpt": undefined,
+        "frontMatter": Object {},
+      }
+    `);
+    expectNoWarning();
+  });
+
+  test('should delete only first heading', () => {
+    expect(
+      parseMarkdownString(dedent`
+        # Markdown Title
+
+        test test test # test bar
+
+        # Markdown Title 2
+
+        ### Markdown Title h3
+        `),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "test test test # test bar
+
+      # Markdown Title 2
+
+      ### Markdown Title h3",
+        "contentTitle": "Markdown Title",
+        "excerpt": "test test test # test bar",
+        "frontMatter": Object {},
+      }
+    `);
+    expectNoWarning();
+  });
+
+  test('should parse front-matter and ignore h2', () => {
+    expect(
+      parseMarkdownString(
+        dedent`
+          ---
+          title: Frontmatter title
+          ---
+          ## test
+          `,
+      ),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "## test",
+        "contentTitle": undefined,
+        "excerpt": "test",
+        "frontMatter": Object {
+          "title": "Frontmatter title",
+        },
+      }
+    `);
+    expectNoWarning();
+  });
+
+  test('should read front matter only', () => {
+    expect(
+      parseMarkdownString(dedent`
+        ---
+        title: test
+        ---
+        `),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "",
+        "contentTitle": undefined,
+        "excerpt": undefined,
+        "frontMatter": Object {
+          "title": "test",
+        },
+      }
+    `);
+    expectNoWarning();
+  });
+
+  test('should parse title only', () => {
+    expect(parseMarkdownString('# test')).toMatchInlineSnapshot(`
+      Object {
+        "content": "",
+        "contentTitle": "test",
+        "excerpt": undefined,
+        "frontMatter": Object {},
+      }
+    `);
+    expectNoWarning();
+  });
+
+  test('should parse title only alternate', () => {
+    expect(
+      parseMarkdownString(dedent`
+        test
+        ===
+        `),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "",
+        "contentTitle": "test",
+        "excerpt": undefined,
+        "frontMatter": Object {},
+      }
+    `);
+    expectNoWarning();
+  });
+
+  test('should warn about duplicate titles', () => {
+    expect(
+      parseMarkdownString(dedent`
+        ---
+        title: Frontmatter title
+        ---
+        # test
+        `),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "",
+        "contentTitle": "test",
+        "excerpt": undefined,
+        "frontMatter": Object {
+          "title": "Frontmatter title",
+        },
+      }
+    `);
+    expectDuplicateTitleWarning();
+  });
+
+  test('should ignore markdown title if its not a first text', () => {
+    expect(
+      parseMarkdownString(dedent`
+        foo
+        # test
+        `),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "foo
+      # test",
+        "contentTitle": undefined,
+        "excerpt": "foo",
+        "frontMatter": Object {},
+      }
+    `);
+    expectNoWarning();
+  });
+
+  test('should delete only first heading', () => {
+    expect(
+      parseMarkdownString(dedent`
+        # test
+
+        test test test test test test
+        test test test # test bar
+        # test2
+        ### test
+        test3
+        `),
+    ).toMatchInlineSnapshot(`
+      Object {
+        "content": "test test test test test test
+      test test test # test bar
+      # test2
+      ### test
+      test3",
+        "contentTitle": "test",
+        "excerpt": "test test test test test test",
+        "frontMatter": Object {},
+      }
+    `);
+    expectNoWarning();
+  });
+});
--- a/packages/docusaurus-utils/src/tests/parseMarkdown.test.ts
+++ b/packages/docusaurus-utils/src/tests/parseMarkdown.test.ts
@ -1,177 +0,0 @@
-/**
- * Copyright (c) Facebook, Inc. and its affiliates.
- *
- * This source code is licensed under the MIT license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-import {parseMarkdownString, readFrontMatter} from '../index';
-import dedent from 'dedent';
-
-describe('load utils: parseMarkdown', () => {
-  describe('readFrontMatter', () => {
-    test('should read front matter', () => {
-      const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
-      expect(
-        readFrontMatter(dedent`
-        ---
-        title: test
-        ---
-        `),
-      ).toMatchSnapshot();
-      expect(warn).not.toBeCalled();
-    });
-    test('should parse first heading as title', () => {
-      const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
-      expect(
-        readFrontMatter(dedent`
-        # test
-        `),
-      ).toMatchSnapshot();
-      expect(warn).not.toBeCalled();
-    });
-    test('should preserve front-matter title and warn about duplication', () => {
-      const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
-      expect(
-        readFrontMatter(dedent`
-        ---
-        title: title
-        ---
-        # test
-        `),
-      ).toMatchSnapshot();
-      expect(warn).toBeCalledWith('Duplicate title detected in `this` file');
-      warn.mockReset();
-    });
-    test('should ignore heading if its not a first text', () => {
-      const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
-      expect(
-        readFrontMatter(dedent`
-        foo
-        # test
-        `),
-      ).toMatchSnapshot();
-      expect(warn).not.toBeCalled();
-    });
-    test('should parse first heading as title and keep it in content', () => {
-      const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
-      expect(
-        readFrontMatter(
-          dedent`
-          # test
-          `,
-          undefined,
-          {},
-          false,
-        ),
-      ).toMatchSnapshot();
-      expect(warn).not.toBeCalled();
-    });
-    test('should delete only first heading', () => {
-      const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
-      expect(
-        readFrontMatter(dedent`
-        # test
-        test test test # test bar
-        # test
-        ### test
-        `),
-      ).toMatchSnapshot();
-      expect(warn).not.toBeCalled();
-    });
-    test('should parse front-matter and ignore h2', () => {
-      const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
-      expect(
-        readFrontMatter(
-          dedent`
-          ---
-          title: title
-          ---
-          ## test
-          `,
-          undefined,
-          {},
-          false,
-        ),
-      ).toMatchSnapshot();
-      expect(warn).not.toBeCalled();
-    });
-    test('should not warn about duplicated title', () => {
-      const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
-      expect(
-        readFrontMatter(
-          dedent`
-          ---
-          title: title
-          ---
-          # test
-          `,
-          undefined,
-          {},
-          false,
-        ),
-      ).toMatchSnapshot();
-      expect(warn).not.toBeCalled();
-    });
-  });
-
-  describe('parseMarkdownString', () => {
-    test('should read front matter', () => {
-      const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
-      expect(
-        parseMarkdownString(dedent`
-        ---
-        title: test
-        ---
-        `),
-      ).toMatchSnapshot();
-      expect(warn).not.toBeCalled();
-    });
-    test('should parse first heading as title', () => {
-      const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
-      expect(
-        parseMarkdownString(dedent`
-        # test
-        `),
-      ).toMatchSnapshot();
-      expect(warn).not.toBeCalled();
-    });
-    test('should preserve front-matter title and warn about duplication', () => {
-      const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
-      expect(
-        parseMarkdownString(dedent`
-        ---
-        title: title
-        ---
-        # test
-        `),
-      ).toMatchSnapshot();
-      expect(warn).toBeCalledWith('Duplicate title detected in `this` file');
-      warn.mockReset();
-    });
-    test('should ignore heading if its not a first text', () => {
-      const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
-      expect(
-        parseMarkdownString(dedent`
-        foo
-        # test
-        `),
-      ).toMatchSnapshot();
-      expect(warn).not.toBeCalled();
-    });
-    test('should delete only first heading', () => {
-      const warn = jest.spyOn(console, 'warn').mockImplementation(() => {});
-      expect(
-        parseMarkdownString(dedent`
-        # test
-
-        test test test test test test
-        test test test # test bar
-        # test
-        ### test
-        `),
-      ).toMatchSnapshot();
-      expect(warn).not.toBeCalled();
-    });
-  });
-});
--- a/packages/docusaurus-utils/src/index.ts
+++ b/packages/docusaurus-utils/src/index.ts
@ -7,7 +7,6 @@

 import chalk from 'chalk';
 import path from 'path';
-import matter from 'gray-matter';
 import {createHash} from 'crypto';
 import {camelCase, kebabCase, mapValues} from 'lodash';
 import escapeStringRegexp from 'escape-string-regexp';
@ -23,6 +22,8 @@ import {
 import resolvePathnameUnsafe from 'resolve-pathname';

 export * from './codeTranslationsUtils';
+export * from './markdownParser';
+export * from './markdownLinks';

 const fileHash = new Map();
 export async function generate(
@ -206,135 +207,6 @@ export function getSubFolder(file: string, refDir: string): string | null {
  return match && match[1];
 }

-export function createExcerpt(fileString: string): string | undefined {
-  const fileLines = fileString.trimLeft().split('\n');
-
-  /* eslint-disable no-continue */
-  // eslint-disable-next-line no-restricted-syntax
-  for (const fileLine of fileLines) {
-    // Skip empty line.
-    if (!fileLine.trim()) {
-      continue;
-    }
-
-    // Skip import/export declaration.
-    if (/^\s*?import\s.*(from.*)?;?|export\s.*{.*};?/.test(fileLine)) {
-      continue;
-    }
-
-    const cleanedLine = fileLine
-      // Remove HTML tags.
-      .replace(/<[^>]*>/g, '')
-      // Remove ATX-style headers.
-      .replace(/^\#{1,6}\s*([^#]*)\s*(\#{1,6})?/gm, '$1')
-      // Remove emphasis and strikethroughs.
-      .replace(/([\*_~]{1,3})(\S.*?\S{0,1})\1/g, '$2')
-      // Remove images.
-      .replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
-      // Remove footnotes.
-      .replace(/\[\^.+?\](\: .*?$)?/g, '')
-      // Remove inline links.
-      .replace(/\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
-      // Remove inline code.
-      .replace(/`(.+?)`/g, '$1')
-      // Remove blockquotes.
-      .replace(/^\s{0,3}>\s?/g, '')
-      // Remove admonition definition.
-      .replace(/(:{3}.*)/, '')
-      // Remove Emoji names within colons include preceding whitespace.
-      .replace(/\s?(:(::|[^:\n])+:)/g, '')
-      .trim();
-
-    if (cleanedLine) {
-      return cleanedLine;
-    }
-  }
-
-  return undefined;
-}
-
-type ParsedMarkdown = {
-  // Returned by gray-matter
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  frontMatter: Record<string, any>;
-  content: string;
-  excerpt: string | undefined;
-  hasFrontMatter: boolean;
-};
-
-export function readFrontMatter(
-  markdownString: string,
-  source?: string,
-  options: Record<string, unknown> = {},
-  removeTitleHeading = true,
-): ParsedMarkdown {
-  try {
-    const result = matter(markdownString, options);
-    result.data = result.data || {};
-    result.content = result.content.trim();
-
-    const hasFrontMatter = Object.keys(result.data).length > 0;
-
-    const heading = /^# (.*)[\n\r]?/gi.exec(result.content);
-    if (heading) {
-      if (result.data.title) {
-        if (removeTitleHeading) {
-          console.warn(
-            `Duplicate title detected in \`${source || 'this'}\` file`,
-          );
-        }
-      } else {
-        result.data.title = heading[1].trim();
-        if (removeTitleHeading) {
-          result.content = result.content.replace(heading[0], '');
-          if (result.excerpt) {
-            result.excerpt = result.excerpt.replace(heading[1], '');
-          }
-        }
-      }
-    }
-
-    return {
-      frontMatter: result.data,
-      content: result.content,
-      excerpt: result.excerpt,
-      hasFrontMatter,
-    };
-  } catch (e) {
-    throw new Error(`Error while parsing markdown front matter.
-This can happen if you use special characters like : in frontmatter values (try using "" around that value)
-${e.message}`);
-  }
-}
-
-export function parseMarkdownString(
-  markdownString: string,
-  source?: string,
-): ParsedMarkdown {
-  return readFrontMatter(markdownString, source, {
-    excerpt: (file: matter.GrayMatterFile<string>): void => {
-      // Hacky way of stripping out import statements from the excerpt
-      // TODO: Find a better way to do so, possibly by compiling the Markdown content,
-      // stripping out HTML tags and obtaining the first line.
-      file.excerpt = createExcerpt(file.content);
-    },
-  });
-}
-
-export async function parseMarkdownFile(
-  source: string,
-): Promise<ParsedMarkdown> {
-  const markdownString = await fs.readFile(source, 'utf-8');
-  try {
-    return parseMarkdownString(markdownString, source);
-  } catch (e) {
-    throw new Error(
-      `Error while parsing markdown file ${source}
-${e.message}`,
-    );
-  }
-}
-
 export function normalizeUrl(rawUrls: string[]): string {
  const urls = rawUrls;
  const resultArray = [];
--- a/packages/docusaurus-utils/src/markdownParser.ts
+++ b/packages/docusaurus-utils/src/markdownParser.ts
@ -0,0 +1,185 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+import chalk from 'chalk';
+import fs from 'fs-extra';
+import matter from 'gray-matter';
+
+// Hacky way of stripping out import statements from the excerpt
+// TODO: Find a better way to do so, possibly by compiling the Markdown content,
+// stripping out HTML tags and obtaining the first line.
+export function createExcerpt(fileString: string): string | undefined {
+  const fileLines = fileString
+    .trimLeft()
+    // Remove Markdown alternate title
+    .replace(/^[^\n]*\n[=]+/g, '')
+    .split('\n');
+
+  /* eslint-disable no-continue */
+  // eslint-disable-next-line no-restricted-syntax
+  for (const fileLine of fileLines) {
+    // Skip empty line.
+    if (!fileLine.trim()) {
+      continue;
+    }
+
+    // Skip import/export declaration.
+    if (/^\s*?import\s.*(from.*)?;?|export\s.*{.*};?/.test(fileLine)) {
+      continue;
+    }
+
+    const cleanedLine = fileLine
+      // Remove HTML tags.
+      .replace(/<[^>]*>/g, '')
+      // Remove Title headers
+      .replace(/^\#\s*([^#]*)\s*\#?/gm, '')
+      // Remove Markdown + ATX-style headers
+      .replace(/^\#{1,6}\s*([^#]*)\s*(\#{1,6})?/gm, '$1')
+      // Remove emphasis and strikethroughs.
+      .replace(/([\*_~]{1,3})(\S.*?\S{0,1})\1/g, '$2')
+      // Remove images.
+      .replace(/\!\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
+      // Remove footnotes.
+      .replace(/\[\^.+?\](\: .*?$)?/g, '')
+      // Remove inline links.
+      .replace(/\[(.*?)\][\[\(].*?[\]\)]/g, '$1')
+      // Remove inline code.
+      .replace(/`(.+?)`/g, '$1')
+      // Remove blockquotes.
+      .replace(/^\s{0,3}>\s?/g, '')
+      // Remove admonition definition.
+      .replace(/(:{3}.*)/, '')
+      // Remove Emoji names within colons include preceding whitespace.
+      .replace(/\s?(:(::|[^:\n])+:)/g, '')
+      .trim();
+
+    if (cleanedLine) {
+      return cleanedLine;
+    }
+  }
+
+  return undefined;
+}
+
+export function parseFrontMatter(
+  markdownFileContent: string,
+): {
+  frontMatter: Record<string, unknown>;
+  content: string;
+} {
+  const {data, content} = matter(markdownFileContent);
+  return {
+    frontMatter: data ?? {},
+    content: content?.trim() ?? '',
+  };
+}
+
+export function parseMarkdownContentTitle(
+  contentUntrimmed: string,
+  options?: {keepContentTitle?: boolean},
+): {content: string; contentTitle: string | undefined} {
+  const keepContentTitleOption = options?.keepContentTitle ?? false;
+
+  const content = contentUntrimmed.trim();
+
+  const regularTitleMatch = /^(?<pattern>#\s*(?<title>[^#\n]*)+\s*#*[\s\r]*?\n*?)/g.exec(
+    content,
+  );
+  const alternateTitleMatch = /^(?<pattern>\s*(?<title>[^\n]*)\s*\n[=]+)/g.exec(
+    content,
+  );
+
+  const titleMatch = regularTitleMatch ?? alternateTitleMatch;
+  const {pattern, title} = titleMatch?.groups ?? {};
+
+  if (!pattern || !title) {
+    return {content, contentTitle: undefined};
+  }
+
+  const newContent = keepContentTitleOption
+    ? content
+    : content.replace(pattern, '');
+
+  return {
+    content: newContent.trim(),
+    contentTitle: title.trim(),
+  };
+}
+
+type ParsedMarkdown = {
+  frontMatter: Record<string, unknown>;
+  content: string;
+  contentTitle: string | undefined;
+  excerpt: string | undefined;
+};
+
+export function parseMarkdownString(
+  markdownFileContent: string,
+  options?: {
+    source?: string;
+    keepContentTitle?: boolean;
+  },
+): ParsedMarkdown {
+  try {
+    const sourceOption = options?.source;
+    const keepContentTitle = options?.keepContentTitle ?? false;
+
+    const {frontMatter, content: contentWithoutFrontMatter} = parseFrontMatter(
+      markdownFileContent,
+    );
+
+    const {content, contentTitle} = parseMarkdownContentTitle(
+      contentWithoutFrontMatter,
+      {
+        keepContentTitle,
+      },
+    );
+
+    const excerpt = createExcerpt(content);
+
+    // TODO not sure this is a good place for this warning
+    if (
+      frontMatter.title &&
+      contentTitle &&
+      !keepContentTitle &&
+      !(process.env.DOCUSAURUS_NO_DUPLICATE_TITLE_WARNING === 'false')
+    ) {
+      console.warn(
+        chalk.yellow(`Duplicate title found in ${sourceOption ?? 'this'} file.
+Use either a frontmatter title or a markdown title, not both.
+If this is annoying you, use env DOCUSAURUS_NO_DUPLICATE_TITLE_WARNING=false`),
+      );
+    }
+
+    return {
+      frontMatter,
+      content,
+      contentTitle,
+      excerpt,
+    };
+  } catch (e) {
+    console.error(
+      chalk.red(`Error while parsing markdown front matter.
+This can happen if you use special characters like : in frontmatter values (try using "" around that value)`),
+    );
+    throw e;
+  }
+}
+
+export async function parseMarkdownFile(
+  source: string,
+): Promise<ParsedMarkdown> {
+  const markdownString = await fs.readFile(source, 'utf-8');
+  try {
+    return parseMarkdownString(markdownString, {source});
+  } catch (e) {
+    throw new Error(
+      `Error while parsing markdown file ${source}
+${e.message}`,
+    );
+  }
+}