fix(mdx-loader): refactor and fix heading to toc html value serialization (#11004)

* refactor with iso behavior

* Add unit tests

* change behavior for <img> tags
This commit is contained in:
Sébastien Lorber 2025-03-18 17:52:26 +01:00 committed by GitHub
parent 1d4d17da18
commit e88f1aaf96
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 207 additions and 76 deletions

View file

@ -0,0 +1,126 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import {toHeadingHTMLValue} from '../utils';
import type {Heading} from 'mdast';
describe('toHeadingHTMLValue', () => {
async function convert(heading: Heading): Promise<string> {
const {toString} = await import('mdast-util-to-string');
return toHeadingHTMLValue(heading, toString);
}
it('converts a simple heading', async () => {
const heading: Heading = {
type: 'heading',
depth: 2,
children: [
{
type: 'text',
value: 'Some heading text',
},
],
};
await expect(convert(heading)).resolves.toMatchInlineSnapshot(
`"Some heading text"`,
);
});
it('converts a heading with b tag', async () => {
const heading: Heading = {
type: 'heading',
depth: 2,
children: [
{
type: 'mdxJsxTextElement',
name: 'b',
attributes: [],
children: [
{
type: 'text',
value: 'Some title',
},
],
},
],
};
await expect(convert(heading)).resolves.toMatchInlineSnapshot(
`"<b>Some title</b>"`,
);
});
it('converts a heading with span tag + className', async () => {
const heading: Heading = {
type: 'heading',
depth: 2,
children: [
{
type: 'mdxJsxTextElement',
name: 'span',
attributes: [
{
type: 'mdxJsxAttribute',
name: 'className',
value: 'my-class',
},
],
children: [
{
type: 'text',
value: 'Some title',
},
],
},
],
};
await expect(convert(heading)).resolves.toMatchInlineSnapshot(
`"<span class="my-class">Some title</span>"`,
);
});
it('converts a heading - remove img tag', async () => {
const heading: Heading = {
type: 'heading',
depth: 2,
children: [
{
type: 'mdxJsxTextElement',
name: 'img',
attributes: [
{
type: 'mdxJsxAttribute',
name: 'src',
value: '/img/slash-introducing.svg',
},
{
type: 'mdxJsxAttribute',
name: 'height',
value: '32',
},
{
type: 'mdxJsxAttribute',
name: 'alt',
value: 'test',
},
],
children: [],
},
{
type: 'text',
value: ' Some title',
},
],
};
await expect(convert(heading)).resolves.toMatchInlineSnapshot(
`"Some title"`,
);
});
});

View file

@ -5,9 +5,13 @@
* LICENSE file in the root directory of this source tree.
*/
import {toValue} from '../utils';
import type {Node} from 'unist';
import type {MdxjsEsm} from 'mdast-util-mdx';
import escapeHtml from 'escape-html';
import type {Node, Parent} from 'unist';
import type {
MdxjsEsm,
MdxJsxAttribute,
MdxJsxTextElement,
} from 'mdast-util-mdx';
import type {TOCHeading, TOCItem, TOCItems, TOCSlice} from './types';
import type {
Program,
@ -15,6 +19,7 @@ import type {
ImportDeclaration,
ImportSpecifier,
} from 'estree';
import type {Heading, PhrasingContent} from 'mdast';
export function getImportDeclarations(program: Program): ImportDeclaration[] {
return program.body.filter(
@ -118,7 +123,7 @@ export async function createTOCExportNodeAST({
const {toString} = await import('mdast-util-to-string');
const {valueToEstree} = await import('estree-util-value-to-estree');
const value: TOCItem = {
value: toValue(heading, toString),
value: toHeadingHTMLValue(heading, toString),
id: heading.data!.id!,
level: heading.depth,
};
@ -172,3 +177,73 @@ export async function createTOCExportNodeAST({
},
};
}
function stringifyChildren(
node: Parent,
toString: (param: unknown) => string, // TODO temporary, due to ESM
): string {
return (node.children as PhrasingContent[])
.map((item) => toHeadingHTMLValue(item, toString))
.join('')
.trim();
}
// TODO This is really a workaround, and not super reliable
// For now we only support serializing tagName, className and content
// Can we implement the TOC with real JSX nodes instead of html strings later?
function mdxJsxTextElementToHtml(
element: MdxJsxTextElement,
toString: (param: unknown) => string, // TODO temporary, due to ESM
): string {
const tag = element.name;
// See https://github.com/facebook/docusaurus/issues/11003#issuecomment-2733925363
if (tag === 'img') {
return '';
}
const attributes = element.attributes.filter(
(child): child is MdxJsxAttribute => child.type === 'mdxJsxAttribute',
);
const classAttribute =
attributes.find((attr) => attr.name === 'className') ??
attributes.find((attr) => attr.name === 'class');
const classAttributeString = classAttribute
? `class="${escapeHtml(String(classAttribute.value))}"`
: ``;
const allAttributes = classAttributeString ? ` ${classAttributeString}` : '';
const content = stringifyChildren(element, toString);
return `<${tag}${allAttributes}>${content}</${tag}>`;
}
export function toHeadingHTMLValue(
node: PhrasingContent | Heading | MdxJsxTextElement,
toString: (param: unknown) => string, // TODO temporary, due to ESM
): string {
switch (node.type) {
case 'mdxJsxTextElement': {
return mdxJsxTextElementToHtml(node as MdxJsxTextElement, toString);
}
case 'text':
return escapeHtml(node.value);
case 'heading':
return stringifyChildren(node, toString);
case 'inlineCode':
return `<code>${escapeHtml(node.value)}</code>`;
case 'emphasis':
return `<em>${stringifyChildren(node, toString)}</em>`;
case 'strong':
return `<strong>${stringifyChildren(node, toString)}</strong>`;
case 'delete':
return `<del>${stringifyChildren(node, toString)}</del>`;
case 'link':
return stringifyChildren(node, toString);
default:
return toString(node);
}
}

View file

@ -5,14 +5,8 @@
* LICENSE file in the root directory of this source tree.
*/
import escapeHtml from 'escape-html';
import type {Parent, Node} from 'unist';
import type {PhrasingContent, Heading} from 'mdast';
import type {
MdxJsxAttribute,
MdxJsxAttributeValueExpression,
MdxJsxTextElement,
} from 'mdast-util-mdx';
import type {Node} from 'unist';
import type {MdxJsxAttributeValueExpression} from 'mdast-util-mdx';
/**
* Util to transform one node type to another node type
@ -35,70 +29,6 @@ export function transformNode<NewNode extends Node>(
return node as NewNode;
}
export function stringifyContent(
node: Parent,
toString: (param: unknown) => string, // TODO weird but works
): string {
return (node.children as PhrasingContent[])
.map((item) => toValue(item, toString))
.join('');
}
// TODO This is really a workaround, and not super reliable
// For now we only support serializing tagName, className and content
// Can we implement the TOC with real JSX nodes instead of html strings later?
function mdxJsxTextElementToHtml(
element: MdxJsxTextElement,
toString: (param: unknown) => string, // TODO weird but works
): string {
const tag = element.name;
const attributes = element.attributes.filter(
(child): child is MdxJsxAttribute => child.type === 'mdxJsxAttribute',
);
const classAttribute =
attributes.find((attr) => attr.name === 'className') ??
attributes.find((attr) => attr.name === 'class');
const classAttributeString = classAttribute
? `class="${escapeHtml(String(classAttribute.value))}"`
: ``;
const allAttributes = classAttributeString ? ` ${classAttributeString}` : '';
const content = stringifyContent(element, toString);
return `<${tag}${allAttributes}>${content}</${tag}>`;
}
export function toValue(
node: PhrasingContent | Heading | MdxJsxTextElement,
toString: (param: unknown) => string, // TODO weird but works
): string {
switch (node.type) {
case 'mdxJsxTextElement': {
return mdxJsxTextElementToHtml(node as MdxJsxTextElement, toString);
}
case 'text':
return escapeHtml(node.value);
case 'heading':
return stringifyContent(node, toString);
case 'inlineCode':
return `<code>${escapeHtml(node.value)}</code>`;
case 'emphasis':
return `<em>${stringifyContent(node, toString)}</em>`;
case 'strong':
return `<strong>${stringifyContent(node, toString)}</strong>`;
case 'delete':
return `<del>${stringifyContent(node, toString)}</del>`;
case 'link':
return stringifyContent(node, toString);
default:
return toString(node);
}
}
export function assetRequireAttributeValue(
requireString: string,
hash: string,