feat(v1): strip html from TOC (#1762)

The approach here is to first strip the HTML from the heading's content, then rendered it with markdown to get the HTML content for the TOC entry, then to strip the HTML from the rendered content again, as to get the text for the TOC entry's link. Adds an additional dependency of striptags (MIT licensed) Example TOC Entry, given the heading of: ```markdown ``` ```javascript { hashLink: 'foo', rawContent: '<a name="foo"></a> _Foo_', content: '<em>Foo</em>', children: [] } ``` Previously this TOC entry would be: ```javascript { hashLink: 'a-name-foo-a-_foo_', rawContent: '<a name="foo"></a> _Foo_', content: '<a name="foo"></a> <em>Foo</em>', children: [] } ``` closes issue #1703
2025-06-30 00:17:58 +02:00 · 2019-09-13 23:57:22 +02:00 · 2019-09-13 23:57:22 +02:00 · 250a818e7f
commit 250a818e7f
parent 3243e40ca2
4 changed files with 33 additions and 3 deletions
--- a/packages/docusaurus-1.x/lib/core/toc.js
+++ b/packages/docusaurus-1.x/lib/core/toc.js
@ -7,6 +7,7 @@

 const Remarkable = require('remarkable');
 const mdToc = require('markdown-toc');
+const striptags = require('striptags');
 const toSlug = require('./toSlug');

 const tocRegex = new RegExp('<AUTOGENERATED_TABLE_OF_CONTENTS>', 'i');
@ -34,15 +35,20 @@ function getTOC(content, headingTags = 'h2', subHeadingTags = 'h3') {
  headings.forEach(heading => {
    // we need always generate slugs to ensure, that we will have consistent
    // slug indexes for headings with the same names
-    const hashLink = toSlug(heading.content, context);
+    const rawContent = heading.content;
+    const safeContent = striptags(rawContent);
+    const rendered = md.renderInline(safeContent);
+
+    // We striptags again here as to not end up with html tags
+    // from markdown or markdown in our links
+    const hashLink = toSlug(striptags(rendered), context);
    if (!allowedHeadingLevels.includes(heading.lvl)) {
      return;
    }
-    const rawContent = heading.content;
    const entry = {
      hashLink,
      rawContent,
-      content: md.renderInline(rawContent),
+      content: rendered,
      children: [],
    };
    if (headingLevels.includes(heading.lvl)) {