refactor(content-blog): replace reading-time with Intl.Segmenter API (#11138)

Co-authored-by: sebastien <lorber.sebastien@gmail.com>
This commit is contained in:
Shreedhar Bhat 2025-05-15 16:52:00 +05:30 committed by GitHub
parent c419d7ec88
commit 9f6360ba82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 96 additions and 69 deletions

View file

@ -43,7 +43,6 @@
"feed": "^4.2.2", "feed": "^4.2.2",
"fs-extra": "^11.1.1", "fs-extra": "^11.1.1",
"lodash": "^4.17.21", "lodash": "^4.17.21",
"reading-time": "^1.5.0",
"schema-dts": "^1.1.2", "schema-dts": "^1.1.2",
"srcset": "^4.0.0", "srcset": "^4.0.0",
"tslib": "^2.6.0", "tslib": "^2.6.0",

View file

@ -150,7 +150,7 @@ exports[`blog plugin process blog posts load content 2`] = `
"title": "Another With Tag", "title": "Another With Tag",
}, },
"permalink": "/blog/simple/slug/another", "permalink": "/blog/simple/slug/another",
"readingTime": 0.015, "readingTime": 0.02,
"source": "@site/blog/another-simple-slug-with-tags.md", "source": "@site/blog/another-simple-slug-with-tags.md",
"tags": [ "tags": [
{ {

View file

@ -120,7 +120,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
xslt: {atom: null, rss: null}, xslt: {atom: null, rss: null},
}, },
readingTime: ({content, defaultReadingTime}) => readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}), defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/, truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore', onInlineTags: 'ignore',
onInlineAuthors: 'ignore', onInlineAuthors: 'ignore',
@ -164,7 +164,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
xslt: {atom: null, rss: null}, xslt: {atom: null, rss: null},
}, },
readingTime: ({content, defaultReadingTime}) => readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}), defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/, truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore', onInlineTags: 'ignore',
onInlineAuthors: 'ignore', onInlineAuthors: 'ignore',
@ -220,7 +220,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
xslt: {atom: null, rss: null}, xslt: {atom: null, rss: null},
}, },
readingTime: ({content, defaultReadingTime}) => readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}), defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/, truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore', onInlineTags: 'ignore',
onInlineAuthors: 'ignore', onInlineAuthors: 'ignore',
@ -267,7 +267,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
xslt: {atom: null, rss: null}, xslt: {atom: null, rss: null},
}, },
readingTime: ({content, defaultReadingTime}) => readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}), defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/, truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore', onInlineTags: 'ignore',
onInlineAuthors: 'ignore', onInlineAuthors: 'ignore',
@ -314,7 +314,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
xslt: {atom: null, rss: null}, xslt: {atom: null, rss: null},
}, },
readingTime: ({content, defaultReadingTime}) => readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}), defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/, truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore', onInlineTags: 'ignore',
onInlineAuthors: 'ignore', onInlineAuthors: 'ignore',
@ -360,7 +360,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
xslt: true, xslt: true,
}, },
readingTime: ({content, defaultReadingTime}) => readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}), defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/, truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore', onInlineTags: 'ignore',
onInlineAuthors: 'ignore', onInlineAuthors: 'ignore',
@ -409,7 +409,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
}, },
}, },
readingTime: ({content, defaultReadingTime}) => readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}), defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/, truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore', onInlineTags: 'ignore',
onInlineAuthors: 'ignore', onInlineAuthors: 'ignore',

View file

@ -211,7 +211,7 @@ describe('blog plugin', () => {
).toEqual({ ).toEqual({
editUrl: `${BaseEditUrl}/blog/2018-12-14-Happy-First-Birthday-Slash.md`, editUrl: `${BaseEditUrl}/blog/2018-12-14-Happy-First-Birthday-Slash.md`,
permalink: '/blog/2018/12/14/Happy-First-Birthday-Slash', permalink: '/blog/2018/12/14/Happy-First-Birthday-Slash',
readingTime: 0.015, readingTime: 0.02,
source: path.posix.join( source: path.posix.join(
'@site', '@site',
path.posix.join('i18n', 'en', 'docusaurus-plugin-content-blog'), path.posix.join('i18n', 'en', 'docusaurus-plugin-content-blog'),
@ -276,7 +276,7 @@ describe('blog plugin', () => {
}).toEqual({ }).toEqual({
editUrl: `${BaseEditUrl}/blog/complex-slug.md`, editUrl: `${BaseEditUrl}/blog/complex-slug.md`,
permalink: '/blog/hey/my super path/héllô', permalink: '/blog/hey/my super path/héllô',
readingTime: 0.015, readingTime: 0.02,
source: path.posix.join('@site', PluginPath, 'complex-slug.md'), source: path.posix.join('@site', PluginPath, 'complex-slug.md'),
title: 'Complex Slug', title: 'Complex Slug',
description: `complex url slug`, description: `complex url slug`,
@ -318,7 +318,7 @@ describe('blog plugin', () => {
}).toEqual({ }).toEqual({
editUrl: `${BaseEditUrl}/blog/simple-slug.md`, editUrl: `${BaseEditUrl}/blog/simple-slug.md`,
permalink: '/blog/simple/slug', permalink: '/blog/simple/slug',
readingTime: 0.015, readingTime: 0.02,
source: path.posix.join('@site', PluginPath, 'simple-slug.md'), source: path.posix.join('@site', PluginPath, 'simple-slug.md'),
title: 'Simple Slug', title: 'Simple Slug',
description: `simple url slug`, description: `simple url slug`,

View file

@ -9,48 +9,46 @@ import {calculateReadingTime} from '../readingTime';
describe('calculateReadingTime', () => { describe('calculateReadingTime', () => {
it('calculates reading time for empty content', () => { it('calculates reading time for empty content', () => {
expect(calculateReadingTime('')).toBe(0); expect(calculateReadingTime('', 'en')).toBe(0);
}); });
it('calculates reading time for short content', () => { it('calculates reading time for short content', () => {
const content = 'This is a short test content.'; const content = 'This is a short test content.';
expect(calculateReadingTime(content)).toBe(0.03); expect(calculateReadingTime(content, 'en')).toBe(0.03);
}); });
it('calculates reading time for long content', () => { it('calculates reading time for long content', () => {
const content = 'This is a test content. '.repeat(100); const content = 'This is a test content. '.repeat(100);
expect(calculateReadingTime(content)).toBe(2.5); expect(calculateReadingTime(content, 'en')).toBe(2.5);
}); });
it('respects custom words per minute', () => { it('respects custom words per minute', () => {
const content = 'This is a test content. '.repeat(100); const content = 'This is a test content. '.repeat(100);
expect(calculateReadingTime(content, {wordsPerMinute: 100})).toBe(5); expect(calculateReadingTime(content, 'en', {wordsPerMinute: 100})).toBe(5);
}); });
it('handles content with special characters', () => { it('handles content with special characters', () => {
const content = 'Hello! How are you? This is a test...'; const content = 'Hello! How are you? This is a test...';
expect(calculateReadingTime(content)).toBe(0.04); expect(calculateReadingTime(content, 'en')).toBe(0.04);
}); });
it('handles content with multiple lines', () => { it('handles content with multiple lines', () => {
const content = `This is line 1. const content = `This is line 1.\n This is line 2.\n This is line 3.`;
This is line 2. expect(calculateReadingTime(content, 'en')).toBe(0.06);
This is line 3.`;
expect(calculateReadingTime(content)).toBe(0.06);
}); });
it('handles content with HTML tags', () => { it('handles content with HTML tags', () => {
const content = '<p>This is a <strong>test</strong> content.</p>'; const content = '<p>This is a <strong>test</strong> content.</p>';
expect(calculateReadingTime(content)).toBe(0.025); expect(calculateReadingTime(content, 'en')).toBe(0.05);
}); });
it('handles content with markdown', () => { it('handles content with markdown', () => {
const content = '# Title\n\nThis is **bold** and *italic* text.'; const content = '# Title\n\nThis is **bold** and *italic* text.';
expect(calculateReadingTime(content)).toBe(0.04); expect(calculateReadingTime(content, 'en')).toBe(0.04);
}); });
it('handles CJK content', () => { it('handles CJK content', () => {
const content = '你好,世界!这是一段测试内容。'; const content = '你好,世界!这是一段测试内容。';
expect(calculateReadingTime(content)).toBe(0.06); expect(calculateReadingTime(content, 'zh')).toBe(0.04);
}); });
}); });

View file

@ -210,8 +210,8 @@ async function parseBlogPostMarkdownFile({
} }
} }
const defaultReadingTime: ReadingTimeFunction = ({content, options}) => const defaultReadingTime: ReadingTimeFunction = ({content, locale, options}) =>
calculateReadingTime(content, options); calculateReadingTime(content, locale, options);
async function processBlogSourceFile( async function processBlogSourceFile(
blogSourceRelative: string, blogSourceRelative: string,
@ -373,6 +373,7 @@ async function processBlogSourceFile(
content, content,
frontMatter, frontMatter,
defaultReadingTime, defaultReadingTime,
locale: i18n.currentLocale,
}) })
: undefined, : undefined,
hasTruncateMarker: truncateMarker.test(content), hasTruncateMarker: truncateMarker.test(content),

View file

@ -63,7 +63,8 @@ export const DEFAULT_OPTIONS: PluginOptions = {
path: 'blog', path: 'blog',
editLocalizedFiles: false, editLocalizedFiles: false,
authorsMapPath: 'authors.yml', authorsMapPath: 'authors.yml',
readingTime: ({content, defaultReadingTime}) => defaultReadingTime({content}), readingTime: ({content, defaultReadingTime, locale}) =>
defaultReadingTime({content, locale}),
sortPosts: 'descending', sortPosts: 'descending',
showLastUpdateTime: false, showLastUpdateTime: false,
showLastUpdateAuthor: false, showLastUpdateAuthor: false,

View file

@ -387,15 +387,10 @@ declare module '@docusaurus/plugin-content-blog' {
}; };
/** /**
* Duplicate from ngryman/reading-time to keep stability of API. * Options for reading time calculation using Intl.Segmenter.
*/ */
type ReadingTimeOptions = { type ReadingTimeOptions = {
wordsPerMinute?: number; wordsPerMinute?: number;
/**
* @param char The character to be matched.
* @returns `true` if this character is a word bound.
*/
wordBound?: (char: string) => boolean;
}; };
/** /**
@ -405,24 +400,22 @@ declare module '@docusaurus/plugin-content-blog' {
export type ReadingTimeFunction = (params: { export type ReadingTimeFunction = (params: {
/** Markdown content. */ /** Markdown content. */
content: string; content: string;
/** Locale for word segmentation. */
locale: string;
/** Front matter. */ /** Front matter. */
frontMatter?: BlogPostFrontMatter & {[key: string]: unknown}; frontMatter?: BlogPostFrontMatter & {[key: string]: unknown};
/** Options accepted by ngryman/reading-time. */ /** Options for reading time calculation. */
options?: ReadingTimeOptions; options?: ReadingTimeOptions;
}) => number; }) => number;
/** /**
* @returns The reading time directly plugged into metadata. `undefined` to * @returns The reading time directly plugged into metadata.
* hide reading time for a specific post. * `undefined` to hide reading time for a specific post.
*/ */
export type ReadingTimeFunctionOption = ( export type ReadingTimeFunctionOption = (
/**
* The `options` is not provided by the caller; the user can inject their
* own option values into `defaultReadingTime`
*/
params: Required<Omit<Parameters<ReadingTimeFunction>[0], 'options'>> & { params: Required<Omit<Parameters<ReadingTimeFunction>[0], 'options'>> & {
/** /**
* The default reading time implementation from ngryman/reading-time. * The default reading time implementation.
*/ */
defaultReadingTime: ReadingTimeFunction; defaultReadingTime: ReadingTimeFunction;
}, },

View file

@ -5,25 +5,45 @@
* LICENSE file in the root directory of this source tree. * LICENSE file in the root directory of this source tree.
*/ */
import readingTime from 'reading-time';
const DEFAULT_WORDS_PER_MINUTE = 200; const DEFAULT_WORDS_PER_MINUTE = 200;
interface ReadingTimeOptions { /**
wordsPerMinute?: number; * Counts the number of words in a string using Intl.Segmenter.
wordBound?: (char: string) => boolean; * @param content The text content to count words in.
* @param locale The locale to use for segmentation.
*/
function countWords(content: string, locale: string): number {
if (!content) {
return 0;
}
const segmenter = new Intl.Segmenter(locale, {granularity: 'word'});
let wordCount = 0;
for (const {isWordLike} of segmenter.segment(content)) {
if (isWordLike) {
wordCount += 1;
}
}
return wordCount;
} }
/** /**
* Calculates the reading time for a given content string. * Calculates the reading time for a given content string using Intl.Segmenter.
* Uses the reading-time package under the hood. * @param content The text content to calculate reading time for.
* @param locale Required locale string for Intl.Segmenter
* @param options Options for reading time calculation.
* - wordsPerMinute: number of words per minute (default 200)
* @returns Estimated reading time in minutes (float, rounded to 2 decimals)
*/ */
export function calculateReadingTime( export function calculateReadingTime(
content: string, content: string,
options: ReadingTimeOptions = {}, locale: string,
options?: {wordsPerMinute?: number},
): number { ): number {
const wordsPerMinute = options.wordsPerMinute ?? DEFAULT_WORDS_PER_MINUTE; const wordsPerMinute = options?.wordsPerMinute ?? DEFAULT_WORDS_PER_MINUTE;
const {wordBound} = options; const words = countWords(content, locale);
return readingTime(content, {wordsPerMinute, ...(wordBound && {wordBound})}) if (words === 0) {
.minutes; return 0;
}
// Calculate reading time in minutes and round to 2 decimal places
return Math.round((words / wordsPerMinute) * 100) / 100;
} }

View file

@ -104,7 +104,11 @@ export const dogfoodingPluginInstances: PluginConfig[] = [
readingTime: ({content, frontMatter, defaultReadingTime}) => readingTime: ({content, frontMatter, defaultReadingTime}) =>
frontMatter.hide_reading_time frontMatter.hide_reading_time
? undefined ? undefined
: defaultReadingTime({content, options: {wordsPerMinute: 5}}), : defaultReadingTime({
content,
locale: 'en',
options: {wordsPerMinute: 5},
}),
onInlineTags: 'warn', onInlineTags: 'warn',
onInlineAuthors: 'ignore', onInlineAuthors: 'ignore',
onUntruncatedBlogPosts: 'ignore', onUntruncatedBlogPosts: 'ignore',

View file

@ -109,17 +109,18 @@ type EditUrlFunction = (params: {
```ts ```ts
type ReadingTimeOptions = { type ReadingTimeOptions = {
wordsPerMinute: number; wordsPerMinute: number;
wordBound: (char: string) => boolean;
}; };
type ReadingTimeCalculator = (params: { type ReadingTimeCalculator = (params: {
content: string; content: string;
locale: string;
frontMatter?: BlogPostFrontMatter & Record<string, unknown>; frontMatter?: BlogPostFrontMatter & Record<string, unknown>;
options?: ReadingTimeOptions; options?: ReadingTimeOptions;
}) => number; }) => number;
type ReadingTimeFn = (params: { type ReadingTimeFn = (params: {
content: string; content: string;
locale: string;
frontMatter: BlogPostFrontMatter & Record<string, unknown>; frontMatter: BlogPostFrontMatter & Record<string, unknown>;
defaultReadingTime: ReadingTimeCalculator; defaultReadingTime: ReadingTimeCalculator;
}) => number | undefined; }) => number | undefined;

View file

@ -476,8 +476,12 @@ export default {
blog: { blog: {
// highlight-start // highlight-start
showReadingTime: true, // When set to false, the "x min read" won't be shown showReadingTime: true, // When set to false, the "x min read" won't be shown
readingTime: ({content, frontMatter, defaultReadingTime}) => readingTime: ({content, locale, frontMatter, defaultReadingTime}) =>
defaultReadingTime({content, options: {wordsPerMinute: 300}}), defaultReadingTime({
content,
locale,
options: {wordsPerMinute: 300},
}),
// highlight-end // highlight-end
}, },
}, },
@ -486,9 +490,16 @@ export default {
}; };
``` ```
The `readingTime` callback receives three parameters: the blog content text as a string, front matter as a record of string keys and their values, and the default reading time function. It returns a number (reading time in minutes) or `undefined` (disable reading time for this page). The `readingTime` callback receives the following parameters:
The default reading time is able to accept additional options: `wordsPerMinute` as a number (default: 300), and `wordBound` as a function from string to boolean. If the string passed to `wordBound` should be a word bound (spaces, tabs, and line breaks by default), the function should return `true`. - `content`: the blog content text as a string
- `frontMatter`: the front matter as a record of string keys and their values
- `locale`: the locale of the current Docusaurus site
- `defaultReadingTime`: the default built-in reading time function. It returns a number (reading time in minutes) or `undefined` (disable reading time for this page).
The default reading time is able to accept additional options:
- `wordsPerMinute` as a number (default: 300)
:::tip :::tip
@ -510,10 +521,10 @@ export default {
blog: { blog: {
showReadingTime: true, showReadingTime: true,
// highlight-start // highlight-start
readingTime: ({content, frontMatter, defaultReadingTime}) => readingTime: ({content, locale, frontMatter, defaultReadingTime}) =>
frontMatter.hide_reading_time frontMatter.hide_reading_time
? undefined ? undefined
: defaultReadingTime({content}), : defaultReadingTime({content, locale}),
// highlight-end // highlight-end
}, },
}, },
@ -547,8 +558,12 @@ export default {
{ {
blog: { blog: {
// highlight-start // highlight-start
readingTime: ({content, defaultReadingTime}) => readingTime: ({content, locale, defaultReadingTime}) =>
defaultReadingTime({content, options: {wordsPerMinute: 100}}), defaultReadingTime({
content,
locale,
options: {wordsPerMinute: 100},
}),
// highlight-end // highlight-end
}, },
}, },
@ -574,7 +589,7 @@ export default {
{ {
blog: { blog: {
// highlight-next-line // highlight-next-line
readingTime: ({content}) => myReadingTime(content), readingTime: ({content, locale}) => myReadingTime(content, locale),
}, },
}, },
], ],

View file

@ -15578,11 +15578,6 @@ readdirp@~3.6.0:
dependencies: dependencies:
picomatch "^2.2.1" picomatch "^2.2.1"
reading-time@^1.5.0:
version "1.5.0"
resolved "https://registry.yarnpkg.com/reading-time/-/reading-time-1.5.0.tgz#d2a7f1b6057cb2e169beaf87113cc3411b5bc5bb"
integrity sha512-onYyVhBNr4CmAxFsKS7bz+uTLRakypIe4R+5A824vBSkQy/hB3fZepoVEf8OVAxzLvK+H/jm9TzpI3ETSm64Kg==
rechoir@^0.6.2: rechoir@^0.6.2:
version "0.6.2" version "0.6.2"
resolved "https://registry.yarnpkg.com/rechoir/-/rechoir-0.6.2.tgz#85204b54dba82d5742e28c96756ef43af50e3384" resolved "https://registry.yarnpkg.com/rechoir/-/rechoir-0.6.2.tgz#85204b54dba82d5742e28c96756ef43af50e3384"