From 250a818e7fbe7e60feaec507a24fc8e8a9b5fe26 Mon Sep 17 00:00:00 2001 From: Emelia Smith Date: Fri, 13 Sep 2019 23:57:22 +0200 Subject: [PATCH] feat(v1): strip html from TOC (#1762) The approach here is to first strip the HTML from the heading's content, then rendered it with markdown to get the HTML content for the TOC entry, then to strip the HTML from the rendered content again, as to get the text for the TOC entry's link. Adds an additional dependency of striptags (MIT licensed) Example TOC Entry, given the heading of: ```markdown ``` ```javascript { hashLink: 'foo', rawContent: ' _Foo_', content: 'Foo', children: [] } ``` Previously this TOC entry would be: ```javascript { hashLink: 'a-name-foo-a-_foo_', rawContent: ' _Foo_', content: '<a name="foo"></a> Foo', children: [] } ``` closes issue #1703 --- .../lib/core/__tests__/toc.test.js | 18 ++++++++++++++++++ packages/docusaurus-1.x/lib/core/toc.js | 12 +++++++++--- packages/docusaurus-1.x/package.json | 1 + yarn.lock | 5 +++++ 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/packages/docusaurus-1.x/lib/core/__tests__/toc.test.js b/packages/docusaurus-1.x/lib/core/__tests__/toc.test.js index 810af4fa41..0a730f5ba7 100644 --- a/packages/docusaurus-1.x/lib/core/__tests__/toc.test.js +++ b/packages/docusaurus-1.x/lib/core/__tests__/toc.test.js @@ -38,6 +38,24 @@ describe('getTOC', () => { expect(headingsJson).toContain('bar-8'); // maximum unique bar index is 8 expect(headingsJson).toContain('4th level headings'); }); + + describe('stripping of HTML', () => { + test('correctly removes', () => { + const headings = getTOC(`## Foo`, 'h2', []); + + expect(headings[0].hashLink).toEqual('foo'); + expect(headings[0].rawContent).toEqual(` Foo`); + expect(headings[0].content).toEqual('Foo'); + }); + + test('retains formatting from Markdown', () => { + const headings = getTOC(`## _Foo_`, 'h2', []); + + expect(headings[0].hashLink).toEqual('foo'); + expect(headings[0].rawContent).toEqual(` _Foo_`); + expect(headings[0].content).toEqual('Foo'); + }); + }); }); describe('insertTOC', () => { diff --git a/packages/docusaurus-1.x/lib/core/toc.js b/packages/docusaurus-1.x/lib/core/toc.js index d413367d5f..36626be16a 100644 --- a/packages/docusaurus-1.x/lib/core/toc.js +++ b/packages/docusaurus-1.x/lib/core/toc.js @@ -7,6 +7,7 @@ const Remarkable = require('remarkable'); const mdToc = require('markdown-toc'); +const striptags = require('striptags'); const toSlug = require('./toSlug'); const tocRegex = new RegExp('', 'i'); @@ -34,15 +35,20 @@ function getTOC(content, headingTags = 'h2', subHeadingTags = 'h3') { headings.forEach(heading => { // we need always generate slugs to ensure, that we will have consistent // slug indexes for headings with the same names - const hashLink = toSlug(heading.content, context); + const rawContent = heading.content; + const safeContent = striptags(rawContent); + const rendered = md.renderInline(safeContent); + + // We striptags again here as to not end up with html tags + // from markdown or markdown in our links + const hashLink = toSlug(striptags(rendered), context); if (!allowedHeadingLevels.includes(heading.lvl)) { return; } - const rawContent = heading.content; const entry = { hashLink, rawContent, - content: md.renderInline(rawContent), + content: rendered, children: [], }; if (headingLevels.includes(heading.lvl)) { diff --git a/packages/docusaurus-1.x/package.json b/packages/docusaurus-1.x/package.json index 5fc0d6f3c0..553fb02ea1 100644 --- a/packages/docusaurus-1.x/package.json +++ b/packages/docusaurus-1.x/package.json @@ -70,6 +70,7 @@ "request": "^2.88.0", "shelljs": "^0.8.3", "sitemap": "^3.2.2", + "striptags": "^3.1.1", "tcp-port-used": "^1.0.1", "tiny-lr": "^1.1.1", "tree-node-cli": "^1.2.5", diff --git a/yarn.lock b/yarn.lock index e9b4714238..ff3a957613 100644 --- a/yarn.lock +++ b/yarn.lock @@ -14559,6 +14559,11 @@ strip-outer@^1.0.0: dependencies: escape-string-regexp "^1.0.2" +striptags@^3.1.1: + version "3.1.1" + resolved "https://registry.yarnpkg.com/striptags/-/striptags-3.1.1.tgz#c8c3e7fdd6fb4bb3a32a3b752e5b5e3e38093ebd" + integrity sha1-yMPn/db7S7OjKjt1LltePjgJPr0= + strong-log-transformer@^2.0.0: version "2.1.0" resolved "https://registry.yarnpkg.com/strong-log-transformer/-/strong-log-transformer-2.1.0.tgz#0f5ed78d325e0421ac6f90f7f10e691d6ae3ae10"