feat(v1): strip html from TOC (#1762)

The approach here is to first strip the HTML from the heading's content, then rendered it with markdown to get the HTML content for the TOC entry, then to strip the HTML from the rendered content again, as to get the text for the TOC entry's link.

Adds an additional dependency of striptags (MIT licensed)

Example TOC Entry, given the heading of:

```markdown
```

```javascript
{
  hashLink: 'foo',
  rawContent: '<a name="foo"></a> _Foo_',
  content: '<em>Foo</em>',
  children: []
}
```

Previously this TOC entry would be:

```javascript
{
  hashLink: 'a-name-foo-a-_foo_',
  rawContent: '<a name="foo"></a> _Foo_',
  content: '&lt;a name=&quot;foo&quot;&gt;&lt;/a&gt; <em>Foo</em>',
  children: []
}
```

closes issue #1703
This commit is contained in:
Emelia Smith 2019-09-13 23:57:22 +02:00 committed by Yangshun Tay
parent 3243e40ca2
commit 250a818e7f
4 changed files with 33 additions and 3 deletions

View file

@ -7,6 +7,7 @@
const Remarkable = require('remarkable');
const mdToc = require('markdown-toc');
const striptags = require('striptags');
const toSlug = require('./toSlug');
const tocRegex = new RegExp('<AUTOGENERATED_TABLE_OF_CONTENTS>', 'i');
@ -34,15 +35,20 @@ function getTOC(content, headingTags = 'h2', subHeadingTags = 'h3') {
headings.forEach(heading => {
// we need always generate slugs to ensure, that we will have consistent
// slug indexes for headings with the same names
const hashLink = toSlug(heading.content, context);
const rawContent = heading.content;
const safeContent = striptags(rawContent);
const rendered = md.renderInline(safeContent);
// We striptags again here as to not end up with html tags
// from markdown or markdown in our links
const hashLink = toSlug(striptags(rendered), context);
if (!allowedHeadingLevels.includes(heading.lvl)) {
return;
}
const rawContent = heading.content;
const entry = {
hashLink,
rawContent,
content: md.renderInline(rawContent),
content: rendered,
children: [],
};
if (headingLevels.includes(heading.lvl)) {