perf(core): optimize SSG collected data memory and worker thread communication (#11162)

This commit is contained in:
Sébastien Lorber 2025-05-09 13:34:02 +02:00 committed by GitHub
parent 53fa0ecb1f
commit 33811e38fe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 105 additions and 46 deletions

View file

@ -72,12 +72,22 @@ function createPerfLogger(): PerfLoggerAPI {
}
};
const formatMemory = (memory: Memory): string => {
const fmtHead = (bytes: number) =>
logger.cyan(`${(bytes / 1000000).toFixed(0)}mb`);
const formatBytesToMb = (bytes: number) =>
logger.cyan(`${(bytes / 1024 / 1024).toFixed(0)}mb`);
const formatMemoryDelta = (memory: Memory): string => {
return logger.dim(
`(${fmtHead(memory.before.heapUsed)} -> ${fmtHead(
`(Heap ${formatBytesToMb(memory.before.heapUsed)} -> ${formatBytesToMb(
memory.after.heapUsed,
)} / Total ${formatBytesToMb(memory.after.heapTotal)})`,
);
};
const formatMemoryCurrent = (): string => {
const memory = getMemory();
return logger.dim(
`(Heap ${formatBytesToMb(memory.heapUsed)} / Total ${formatBytesToMb(
memory.heapTotal,
)})`,
);
};
@ -103,7 +113,7 @@ function createPerfLogger(): PerfLoggerAPI {
console.log(
`${PerfPrefix}${formatStatus(error)} ${label} - ${formatDuration(
duration,
)} - ${formatMemory(memory)}`,
)} - ${formatMemoryDelta(memory)}`,
);
};
@ -144,7 +154,9 @@ function createPerfLogger(): PerfLoggerAPI {
};
const log: PerfLoggerAPI['log'] = (label: string) =>
console.log(`${PerfPrefix} ${applyParentPrefix(label)}`);
console.log(
`${PerfPrefix} ${applyParentPrefix(label)} - ${formatMemoryCurrent()}`,
);
const async: PerfLoggerAPI['async'] = async (label, asyncFn) => {
const finalLabel = applyParentPrefix(label);

View file

@ -16,8 +16,8 @@ import {
createStatefulBrokenLinks,
BrokenLinksProvider,
} from './BrokenLinksContext';
import {toPageCollectedMetadata} from './serverHelmetUtils';
import type {PageCollectedData, AppRenderer} from '../common';
import {toPageCollectedMetadataInternal} from './serverHelmetUtils';
import type {AppRenderer, PageCollectedDataInternal} from '../common';
const render: AppRenderer['render'] = async ({
pathname,
@ -47,7 +47,7 @@ const render: AppRenderer['render'] = async ({
const {helmet} = helmetContext as FilledContext;
const metadata = toPageCollectedMetadata({helmet});
const metadata = toPageCollectedMetadataInternal({helmet});
// TODO Docusaurus v4 remove with deprecated postBuild({head}) API
// the returned collectedData must be serializable to run in workers
@ -55,7 +55,7 @@ const render: AppRenderer['render'] = async ({
metadata.helmet = null;
}
const collectedData: PageCollectedData = {
const collectedData: PageCollectedDataInternal = {
metadata,
anchors: statefulBrokenLinks.getCollectedAnchors(),
links: statefulBrokenLinks.getCollectedLinks(),

View file

@ -6,7 +6,7 @@
*/
import type {ReactElement} from 'react';
import type {PageCollectedMetadata} from '../common';
import type {PageCollectedMetadataInternal} from '../common';
import type {HelmetServerState} from 'react-helmet-async';
type BuildMetaTag = {name?: string; content?: string};
@ -30,11 +30,11 @@ function isNoIndexTag(tag: BuildMetaTag): boolean {
);
}
export function toPageCollectedMetadata({
export function toPageCollectedMetadataInternal({
helmet,
}: {
helmet: HelmetServerState;
}): PageCollectedMetadata {
}): PageCollectedMetadataInternal {
const tags = getBuildMetaTags(helmet);
const noIndex = tags.some(isNoIndexTag);

View file

@ -13,7 +13,7 @@ import type {RouteBuildMetadata} from '@docusaurus/types';
export type AppRenderResult = {
html: string;
collectedData: PageCollectedData;
collectedData: PageCollectedDataInternal;
};
export type AppRenderer = {
@ -40,23 +40,43 @@ export type RouteBuildMetadataInternal = {
script: string;
};
// This data structure must remain serializable!
// See why: https://github.com/facebook/docusaurus/pull/10826
export type PageCollectedMetadata = {
public: RouteBuildMetadata;
internal: RouteBuildMetadataInternal;
// TODO Docusaurus v4 remove legacy unserializable helmet data structure
// See https://github.com/facebook/docusaurus/pull/10850
helmet: HelmetServerState | null;
};
// This data structure must remain serializable!
// See why: https://github.com/facebook/docusaurus/pull/10826
export type PageCollectedMetadataInternal = PageCollectedMetadata & {
internal: {
htmlAttributes: string;
bodyAttributes: string;
title: string;
meta: string;
link: string;
script: string;
};
};
export type PageCollectedDataInternal = {
metadata: PageCollectedMetadataInternal;
modules: string[];
links: string[];
anchors: string[];
};
// Keep this data structure as small as possible
// See https://github.com/facebook/docusaurus/pull/11162
export type PageCollectedData = {
metadata: PageCollectedMetadata;
links: string[];
anchors: string[];
modules: string[];
};
// Keep this data structure as small as possible
// See https://github.com/facebook/docusaurus/pull/11162
export type SiteCollectedData = {
[pathname: string]: PageCollectedData;
};

View file

@ -38,16 +38,13 @@ const createSimpleSSGExecutor: CreateSSGExecutor = async ({
}) => {
return {
run: () => {
return PerfLogger.async(
'Generate static files (current thread)',
async () => {
return PerfLogger.async('SSG (current thread)', async () => {
const ssgResults = await executeSSGInlineTask({
pathnames,
params,
});
return createGlobalSSGResult(ssgResults);
},
);
});
},
destroy: async () => {
@ -111,7 +108,7 @@ const createPooledSSGExecutor: CreateSSGExecutor = async ({
}
const pool = await PerfLogger.async(
`Create SSG pool - ${logger.cyan(numberOfThreads)} threads`,
`Create SSG thread pool - ${logger.cyan(numberOfThreads)} threads`,
async () => {
const Tinypool = await import('tinypool').then((m) => m.default);
@ -134,13 +131,17 @@ const createPooledSSGExecutor: CreateSSGExecutor = async ({
const pathnamesChunks = _.chunk(pathnames, SSGWorkerThreadTaskSize);
// Tiny wrapper for type-safety
const submitTask: ExecuteSSGWorkerThreadTask = (task) => pool.run(task);
const submitTask: ExecuteSSGWorkerThreadTask = async (task) => {
const result = await pool.run(task);
// Note, we don't use PerfLogger.async() because all tasks are submitted
// immediately at once and queued, while results are received progressively
PerfLogger.log(`Result for task ${logger.name(task.id)}`);
return result;
};
return {
run: async () => {
const results = await PerfLogger.async(
`Generate static files (${numberOfThreads} worker threads)`,
async () => {
const results = await PerfLogger.async(`Thread pool`, async () => {
return Promise.all(
pathnamesChunks.map((taskPathnames, taskIndex) => {
return submitTask({
@ -149,8 +150,7 @@ const createPooledSSGExecutor: CreateSSGExecutor = async ({
});
}),
);
},
);
});
const allResults = results.flat();
return createGlobalSSGResult(allResults);
},

View file

@ -22,14 +22,18 @@ import {SSGConcurrency} from './ssgEnv';
import {writeStaticFile} from './ssgUtils';
import {createSSGRequire} from './ssgNodeRequire';
import type {SSGParams} from './ssgParams';
import type {AppRenderer, AppRenderResult} from '../common';
import type {
AppRenderer,
PageCollectedData,
PageCollectedDataInternal,
} from '../common';
import type {HtmlMinifier} from '@docusaurus/bundler';
export type SSGSuccess = {
success: true;
pathname: string;
result: {
collectedData: AppRenderResult['collectedData'];
collectedData: PageCollectedData;
warnings: string[];
// html: we don't include it on purpose!
// we don't need to aggregate all html contents in memory!
@ -144,6 +148,26 @@ export async function loadSSGRenderer({
};
}
// We reduce the page collected data structure after the HTML file is written
// Some data (modules, metadata.internal) is only useful to create the HTML file
// It's not useful to aggregate that collected data in memory
// Keep this data structure as small as possible
// See https://github.com/facebook/docusaurus/pull/11162
function reduceCollectedData(
pageCollectedData: PageCollectedDataInternal,
): PageCollectedData {
// We re-create the object from scratch
// We absolutely want to avoid TS duck typing
return {
anchors: pageCollectedData.anchors,
metadata: {
public: pageCollectedData.metadata.public,
helmet: pageCollectedData.metadata.helmet,
},
links: pageCollectedData.links,
};
}
async function generateStaticFile({
pathname,
appRenderer,
@ -176,11 +200,14 @@ async function generateStaticFile({
content: minifierResult.code,
params,
});
const collectedData = reduceCollectedData(appRenderResult.collectedData);
return {
success: true,
pathname,
result: {
collectedData: appRenderResult.collectedData,
collectedData,
// As of today, only the html minifier can emit SSG warnings
warnings: minifierResult.warnings,
},