docusaurus/packages/docusaurus-1.x/lib/core/toc.js
Emelia Smith 250a818e7f feat(v1): strip html from TOC (#1762)
The approach here is to first strip the HTML from the heading's content, then rendered it with markdown to get the HTML content for the TOC entry, then to strip the HTML from the rendered content again, as to get the text for the TOC entry's link.

Adds an additional dependency of striptags (MIT licensed)

Example TOC Entry, given the heading of:

```markdown
```

```javascript
{
  hashLink: 'foo',
  rawContent: '<a name="foo"></a> _Foo_',
  content: '<em>Foo</em>',
  children: []
}
```

Previously this TOC entry would be:

```javascript
{
  hashLink: 'a-name-foo-a-_foo_',
  rawContent: '<a name="foo"></a> _Foo_',
  content: '&lt;a name=&quot;foo&quot;&gt;&lt;/a&gt; <em>Foo</em>',
  children: []
}
```

closes issue #1703
2019-09-13 14:57:22 -07:00

82 lines
2.4 KiB
JavaScript

/**
* Copyright (c) 2017-present, Facebook, Inc.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
const Remarkable = require('remarkable');
const mdToc = require('markdown-toc');
const striptags = require('striptags');
const toSlug = require('./toSlug');
const tocRegex = new RegExp('<AUTOGENERATED_TABLE_OF_CONTENTS>', 'i');
/**
* Returns a table of content from the headings
*
* @return array
* Array of heading objects with `hashLink`, `content` and `children` fields
*
*/
function getTOC(content, headingTags = 'h2', subHeadingTags = 'h3') {
const tagToLevel = tag => Number(tag.slice(1));
const headingLevels = [].concat(headingTags).map(tagToLevel);
const subHeadingLevels = subHeadingTags
? [].concat(subHeadingTags).map(tagToLevel)
: [];
const allowedHeadingLevels = headingLevels.concat(subHeadingLevels);
const md = new Remarkable();
const headings = mdToc(content).json;
const toc = [];
const context = {};
let current;
headings.forEach(heading => {
// we need always generate slugs to ensure, that we will have consistent
// slug indexes for headings with the same names
const rawContent = heading.content;
const safeContent = striptags(rawContent);
const rendered = md.renderInline(safeContent);
// We striptags again here as to not end up with html tags
// from markdown or markdown in our links
const hashLink = toSlug(striptags(rendered), context);
if (!allowedHeadingLevels.includes(heading.lvl)) {
return;
}
const entry = {
hashLink,
rawContent,
content: rendered,
children: [],
};
if (headingLevels.includes(heading.lvl)) {
toc.push(entry);
current = entry;
} else if (current) {
current.children.push(entry);
}
});
return toc;
}
// takes the content of a doc article and returns the content with a table of
// contents inserted
function insertTOC(rawContent) {
if (!rawContent || !tocRegex.test(rawContent)) {
return rawContent;
}
const filterRe = /^`[^`]*`/;
const headers = getTOC(rawContent, 'h3', null);
const tableOfContents = headers
.filter(header => filterRe.test(header.rawContent))
.map(header => ` - [${header.rawContent}](#${header.hashLink})`)
.join('\n');
return rawContent.replace(tocRegex, tableOfContents);
}
module.exports = {
getTOC,
insertTOC,
};