refactor(content-blog): replace reading-time with Intl.Segmenter API (#11138)

Co-authored-by: sebastien <lorber.sebastien@gmail.com>
This commit is contained in:
Shreedhar Bhat 2025-05-15 16:52:00 +05:30 committed by GitHub
parent c419d7ec88
commit 9f6360ba82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 96 additions and 69 deletions

View file

@ -43,7 +43,6 @@
"feed": "^4.2.2",
"fs-extra": "^11.1.1",
"lodash": "^4.17.21",
"reading-time": "^1.5.0",
"schema-dts": "^1.1.2",
"srcset": "^4.0.0",
"tslib": "^2.6.0",

View file

@ -150,7 +150,7 @@ exports[`blog plugin process blog posts load content 2`] = `
"title": "Another With Tag",
},
"permalink": "/blog/simple/slug/another",
"readingTime": 0.015,
"readingTime": 0.02,
"source": "@site/blog/another-simple-slug-with-tags.md",
"tags": [
{

View file

@ -120,7 +120,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
xslt: {atom: null, rss: null},
},
readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}),
defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore',
onInlineAuthors: 'ignore',
@ -164,7 +164,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
xslt: {atom: null, rss: null},
},
readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}),
defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore',
onInlineAuthors: 'ignore',
@ -220,7 +220,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
xslt: {atom: null, rss: null},
},
readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}),
defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore',
onInlineAuthors: 'ignore',
@ -267,7 +267,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
xslt: {atom: null, rss: null},
},
readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}),
defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore',
onInlineAuthors: 'ignore',
@ -314,7 +314,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
xslt: {atom: null, rss: null},
},
readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}),
defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore',
onInlineAuthors: 'ignore',
@ -360,7 +360,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
xslt: true,
},
readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}),
defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore',
onInlineAuthors: 'ignore',
@ -409,7 +409,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => {
},
},
readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content}),
defaultReadingTime({content, locale: 'en'}),
truncateMarker: /<!--\s*truncate\s*-->/,
onInlineTags: 'ignore',
onInlineAuthors: 'ignore',

View file

@ -211,7 +211,7 @@ describe('blog plugin', () => {
).toEqual({
editUrl: `${BaseEditUrl}/blog/2018-12-14-Happy-First-Birthday-Slash.md`,
permalink: '/blog/2018/12/14/Happy-First-Birthday-Slash',
readingTime: 0.015,
readingTime: 0.02,
source: path.posix.join(
'@site',
path.posix.join('i18n', 'en', 'docusaurus-plugin-content-blog'),
@ -276,7 +276,7 @@ describe('blog plugin', () => {
}).toEqual({
editUrl: `${BaseEditUrl}/blog/complex-slug.md`,
permalink: '/blog/hey/my super path/héllô',
readingTime: 0.015,
readingTime: 0.02,
source: path.posix.join('@site', PluginPath, 'complex-slug.md'),
title: 'Complex Slug',
description: `complex url slug`,
@ -318,7 +318,7 @@ describe('blog plugin', () => {
}).toEqual({
editUrl: `${BaseEditUrl}/blog/simple-slug.md`,
permalink: '/blog/simple/slug',
readingTime: 0.015,
readingTime: 0.02,
source: path.posix.join('@site', PluginPath, 'simple-slug.md'),
title: 'Simple Slug',
description: `simple url slug`,

View file

@ -9,48 +9,46 @@ import {calculateReadingTime} from '../readingTime';
describe('calculateReadingTime', () => {
it('calculates reading time for empty content', () => {
expect(calculateReadingTime('')).toBe(0);
expect(calculateReadingTime('', 'en')).toBe(0);
});
it('calculates reading time for short content', () => {
const content = 'This is a short test content.';
expect(calculateReadingTime(content)).toBe(0.03);
expect(calculateReadingTime(content, 'en')).toBe(0.03);
});
it('calculates reading time for long content', () => {
const content = 'This is a test content. '.repeat(100);
expect(calculateReadingTime(content)).toBe(2.5);
expect(calculateReadingTime(content, 'en')).toBe(2.5);
});
it('respects custom words per minute', () => {
const content = 'This is a test content. '.repeat(100);
expect(calculateReadingTime(content, {wordsPerMinute: 100})).toBe(5);
expect(calculateReadingTime(content, 'en', {wordsPerMinute: 100})).toBe(5);
});
it('handles content with special characters', () => {
const content = 'Hello! How are you? This is a test...';
expect(calculateReadingTime(content)).toBe(0.04);
expect(calculateReadingTime(content, 'en')).toBe(0.04);
});
it('handles content with multiple lines', () => {
const content = `This is line 1.
This is line 2.
This is line 3.`;
expect(calculateReadingTime(content)).toBe(0.06);
const content = `This is line 1.\n This is line 2.\n This is line 3.`;
expect(calculateReadingTime(content, 'en')).toBe(0.06);
});
it('handles content with HTML tags', () => {
const content = '<p>This is a <strong>test</strong> content.</p>';
expect(calculateReadingTime(content)).toBe(0.025);
expect(calculateReadingTime(content, 'en')).toBe(0.05);
});
it('handles content with markdown', () => {
const content = '# Title\n\nThis is **bold** and *italic* text.';
expect(calculateReadingTime(content)).toBe(0.04);
expect(calculateReadingTime(content, 'en')).toBe(0.04);
});
it('handles CJK content', () => {
const content = '你好,世界!这是一段测试内容。';
expect(calculateReadingTime(content)).toBe(0.06);
expect(calculateReadingTime(content, 'zh')).toBe(0.04);
});
});

View file

@ -210,8 +210,8 @@ async function parseBlogPostMarkdownFile({
}
}
const defaultReadingTime: ReadingTimeFunction = ({content, options}) =>
calculateReadingTime(content, options);
const defaultReadingTime: ReadingTimeFunction = ({content, locale, options}) =>
calculateReadingTime(content, locale, options);
async function processBlogSourceFile(
blogSourceRelative: string,
@ -373,6 +373,7 @@ async function processBlogSourceFile(
content,
frontMatter,
defaultReadingTime,
locale: i18n.currentLocale,
})
: undefined,
hasTruncateMarker: truncateMarker.test(content),

View file

@ -63,7 +63,8 @@ export const DEFAULT_OPTIONS: PluginOptions = {
path: 'blog',
editLocalizedFiles: false,
authorsMapPath: 'authors.yml',
readingTime: ({content, defaultReadingTime}) => defaultReadingTime({content}),
readingTime: ({content, defaultReadingTime, locale}) =>
defaultReadingTime({content, locale}),
sortPosts: 'descending',
showLastUpdateTime: false,
showLastUpdateAuthor: false,

View file

@ -387,15 +387,10 @@ declare module '@docusaurus/plugin-content-blog' {
};
/**
* Duplicate from ngryman/reading-time to keep stability of API.
* Options for reading time calculation using Intl.Segmenter.
*/
type ReadingTimeOptions = {
wordsPerMinute?: number;
/**
* @param char The character to be matched.
* @returns `true` if this character is a word bound.
*/
wordBound?: (char: string) => boolean;
};
/**
@ -405,24 +400,22 @@ declare module '@docusaurus/plugin-content-blog' {
export type ReadingTimeFunction = (params: {
/** Markdown content. */
content: string;
/** Locale for word segmentation. */
locale: string;
/** Front matter. */
frontMatter?: BlogPostFrontMatter & {[key: string]: unknown};
/** Options accepted by ngryman/reading-time. */
/** Options for reading time calculation. */
options?: ReadingTimeOptions;
}) => number;
/**
* @returns The reading time directly plugged into metadata. `undefined` to
* hide reading time for a specific post.
* @returns The reading time directly plugged into metadata.
* `undefined` to hide reading time for a specific post.
*/
export type ReadingTimeFunctionOption = (
/**
* The `options` is not provided by the caller; the user can inject their
* own option values into `defaultReadingTime`
*/
params: Required<Omit<Parameters<ReadingTimeFunction>[0], 'options'>> & {
/**
* The default reading time implementation from ngryman/reading-time.
* The default reading time implementation.
*/
defaultReadingTime: ReadingTimeFunction;
},

View file

@ -5,25 +5,45 @@
* LICENSE file in the root directory of this source tree.
*/
import readingTime from 'reading-time';
const DEFAULT_WORDS_PER_MINUTE = 200;
interface ReadingTimeOptions {
wordsPerMinute?: number;
wordBound?: (char: string) => boolean;
/**
* Counts the number of words in a string using Intl.Segmenter.
* @param content The text content to count words in.
* @param locale The locale to use for segmentation.
*/
function countWords(content: string, locale: string): number {
if (!content) {
return 0;
}
const segmenter = new Intl.Segmenter(locale, {granularity: 'word'});
let wordCount = 0;
for (const {isWordLike} of segmenter.segment(content)) {
if (isWordLike) {
wordCount += 1;
}
}
return wordCount;
}
/**
* Calculates the reading time for a given content string.
* Uses the reading-time package under the hood.
* Calculates the reading time for a given content string using Intl.Segmenter.
* @param content The text content to calculate reading time for.
* @param locale Required locale string for Intl.Segmenter
* @param options Options for reading time calculation.
* - wordsPerMinute: number of words per minute (default 200)
* @returns Estimated reading time in minutes (float, rounded to 2 decimals)
*/
export function calculateReadingTime(
content: string,
options: ReadingTimeOptions = {},
locale: string,
options?: {wordsPerMinute?: number},
): number {
const wordsPerMinute = options.wordsPerMinute ?? DEFAULT_WORDS_PER_MINUTE;
const {wordBound} = options;
return readingTime(content, {wordsPerMinute, ...(wordBound && {wordBound})})
.minutes;
const wordsPerMinute = options?.wordsPerMinute ?? DEFAULT_WORDS_PER_MINUTE;
const words = countWords(content, locale);
if (words === 0) {
return 0;
}
// Calculate reading time in minutes and round to 2 decimal places
return Math.round((words / wordsPerMinute) * 100) / 100;
}

View file

@ -104,7 +104,11 @@ export const dogfoodingPluginInstances: PluginConfig[] = [
readingTime: ({content, frontMatter, defaultReadingTime}) =>
frontMatter.hide_reading_time
? undefined
: defaultReadingTime({content, options: {wordsPerMinute: 5}}),
: defaultReadingTime({
content,
locale: 'en',
options: {wordsPerMinute: 5},
}),
onInlineTags: 'warn',
onInlineAuthors: 'ignore',
onUntruncatedBlogPosts: 'ignore',

View file

@ -109,17 +109,18 @@ type EditUrlFunction = (params: {
```ts
type ReadingTimeOptions = {
wordsPerMinute: number;
wordBound: (char: string) => boolean;
};
type ReadingTimeCalculator = (params: {
content: string;
locale: string;
frontMatter?: BlogPostFrontMatter & Record<string, unknown>;
options?: ReadingTimeOptions;
}) => number;
type ReadingTimeFn = (params: {
content: string;
locale: string;
frontMatter: BlogPostFrontMatter & Record<string, unknown>;
defaultReadingTime: ReadingTimeCalculator;
}) => number | undefined;

View file

@ -476,8 +476,12 @@ export default {
blog: {
// highlight-start
showReadingTime: true, // When set to false, the "x min read" won't be shown
readingTime: ({content, frontMatter, defaultReadingTime}) =>
defaultReadingTime({content, options: {wordsPerMinute: 300}}),
readingTime: ({content, locale, frontMatter, defaultReadingTime}) =>
defaultReadingTime({
content,
locale,
options: {wordsPerMinute: 300},
}),
// highlight-end
},
},
@ -486,9 +490,16 @@ export default {
};
```
The `readingTime` callback receives three parameters: the blog content text as a string, front matter as a record of string keys and their values, and the default reading time function. It returns a number (reading time in minutes) or `undefined` (disable reading time for this page).
The `readingTime` callback receives the following parameters:
The default reading time is able to accept additional options: `wordsPerMinute` as a number (default: 300), and `wordBound` as a function from string to boolean. If the string passed to `wordBound` should be a word bound (spaces, tabs, and line breaks by default), the function should return `true`.
- `content`: the blog content text as a string
- `frontMatter`: the front matter as a record of string keys and their values
- `locale`: the locale of the current Docusaurus site
- `defaultReadingTime`: the default built-in reading time function. It returns a number (reading time in minutes) or `undefined` (disable reading time for this page).
The default reading time is able to accept additional options:
- `wordsPerMinute` as a number (default: 300)
:::tip
@ -510,10 +521,10 @@ export default {
blog: {
showReadingTime: true,
// highlight-start
readingTime: ({content, frontMatter, defaultReadingTime}) =>
readingTime: ({content, locale, frontMatter, defaultReadingTime}) =>
frontMatter.hide_reading_time
? undefined
: defaultReadingTime({content}),
: defaultReadingTime({content, locale}),
// highlight-end
},
},
@ -547,8 +558,12 @@ export default {
{
blog: {
// highlight-start
readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content, options: {wordsPerMinute: 100}}),
readingTime: ({content, locale, defaultReadingTime}) =>
defaultReadingTime({
content,
locale,
options: {wordsPerMinute: 100},
}),
// highlight-end
},
},
@ -574,7 +589,7 @@ export default {
{
blog: {
// highlight-next-line
readingTime: ({content}) => myReadingTime(content),
readingTime: ({content, locale}) => myReadingTime(content, locale),
},
},
],

View file

@ -15578,11 +15578,6 @@ readdirp@~3.6.0:
dependencies:
picomatch "^2.2.1"
reading-time@^1.5.0:
version "1.5.0"
resolved "https://registry.yarnpkg.com/reading-time/-/reading-time-1.5.0.tgz#d2a7f1b6057cb2e169beaf87113cc3411b5bc5bb"
integrity sha512-onYyVhBNr4CmAxFsKS7bz+uTLRakypIe4R+5A824vBSkQy/hB3fZepoVEf8OVAxzLvK+H/jm9TzpI3ETSm64Kg==
rechoir@^0.6.2:
version "0.6.2"
resolved "https://registry.yarnpkg.com/rechoir/-/rechoir-0.6.2.tgz#85204b54dba82d5742e28c96756ef43af50e3384"