From 9f6360ba820899af9eb8192b8b7a03f116d40e7c Mon Sep 17 00:00:00 2001 From: Shreedhar Bhat <57760562+shreedharbhat98@users.noreply.github.com> Date: Thu, 15 May 2025 16:52:00 +0530 Subject: [PATCH] refactor(content-blog): replace `reading-time` with `Intl.Segmenter` API (#11138) Co-authored-by: sebastien --- .../package.json | 1 - .../__snapshots__/index.test.ts.snap | 2 +- .../src/__tests__/feed.test.ts | 14 +++--- .../src/__tests__/index.test.ts | 6 +-- .../src/__tests__/readingTime.test.ts | 22 +++++----- .../src/blogUtils.ts | 5 ++- .../src/options.ts | 3 +- .../src/plugin-content-blog.d.ts | 21 +++------ .../src/readingTime.ts | 44 ++++++++++++++----- website/_dogfooding/dogfooding.config.ts | 6 ++- .../docs/api/plugins/plugin-content-blog.mdx | 3 +- website/docs/blog.mdx | 33 ++++++++++---- yarn.lock | 5 --- 13 files changed, 96 insertions(+), 69 deletions(-) diff --git a/packages/docusaurus-plugin-content-blog/package.json b/packages/docusaurus-plugin-content-blog/package.json index 8d7e3c1179..4b5f96fcf0 100644 --- a/packages/docusaurus-plugin-content-blog/package.json +++ b/packages/docusaurus-plugin-content-blog/package.json @@ -43,7 +43,6 @@ "feed": "^4.2.2", "fs-extra": "^11.1.1", "lodash": "^4.17.21", - "reading-time": "^1.5.0", "schema-dts": "^1.1.2", "srcset": "^4.0.0", "tslib": "^2.6.0", diff --git a/packages/docusaurus-plugin-content-blog/src/__tests__/__snapshots__/index.test.ts.snap b/packages/docusaurus-plugin-content-blog/src/__tests__/__snapshots__/index.test.ts.snap index 30c2809187..0b0f653af2 100644 --- a/packages/docusaurus-plugin-content-blog/src/__tests__/__snapshots__/index.test.ts.snap +++ b/packages/docusaurus-plugin-content-blog/src/__tests__/__snapshots__/index.test.ts.snap @@ -150,7 +150,7 @@ exports[`blog plugin process blog posts load content 2`] = ` "title": "Another With Tag", }, "permalink": "/blog/simple/slug/another", - "readingTime": 0.015, + "readingTime": 0.02, "source": "@site/blog/another-simple-slug-with-tags.md", "tags": [ { diff --git a/packages/docusaurus-plugin-content-blog/src/__tests__/feed.test.ts b/packages/docusaurus-plugin-content-blog/src/__tests__/feed.test.ts index 96d0d857d7..7241282e1d 100644 --- a/packages/docusaurus-plugin-content-blog/src/__tests__/feed.test.ts +++ b/packages/docusaurus-plugin-content-blog/src/__tests__/feed.test.ts @@ -120,7 +120,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => { xslt: {atom: null, rss: null}, }, readingTime: ({content, defaultReadingTime}) => - defaultReadingTime({content}), + defaultReadingTime({content, locale: 'en'}), truncateMarker: //, onInlineTags: 'ignore', onInlineAuthors: 'ignore', @@ -164,7 +164,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => { xslt: {atom: null, rss: null}, }, readingTime: ({content, defaultReadingTime}) => - defaultReadingTime({content}), + defaultReadingTime({content, locale: 'en'}), truncateMarker: //, onInlineTags: 'ignore', onInlineAuthors: 'ignore', @@ -220,7 +220,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => { xslt: {atom: null, rss: null}, }, readingTime: ({content, defaultReadingTime}) => - defaultReadingTime({content}), + defaultReadingTime({content, locale: 'en'}), truncateMarker: //, onInlineTags: 'ignore', onInlineAuthors: 'ignore', @@ -267,7 +267,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => { xslt: {atom: null, rss: null}, }, readingTime: ({content, defaultReadingTime}) => - defaultReadingTime({content}), + defaultReadingTime({content, locale: 'en'}), truncateMarker: //, onInlineTags: 'ignore', onInlineAuthors: 'ignore', @@ -314,7 +314,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => { xslt: {atom: null, rss: null}, }, readingTime: ({content, defaultReadingTime}) => - defaultReadingTime({content}), + defaultReadingTime({content, locale: 'en'}), truncateMarker: //, onInlineTags: 'ignore', onInlineAuthors: 'ignore', @@ -360,7 +360,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => { xslt: true, }, readingTime: ({content, defaultReadingTime}) => - defaultReadingTime({content}), + defaultReadingTime({content, locale: 'en'}), truncateMarker: //, onInlineTags: 'ignore', onInlineAuthors: 'ignore', @@ -409,7 +409,7 @@ describe.each(['atom', 'rss', 'json'] as const)('%s', (feedType) => { }, }, readingTime: ({content, defaultReadingTime}) => - defaultReadingTime({content}), + defaultReadingTime({content, locale: 'en'}), truncateMarker: //, onInlineTags: 'ignore', onInlineAuthors: 'ignore', diff --git a/packages/docusaurus-plugin-content-blog/src/__tests__/index.test.ts b/packages/docusaurus-plugin-content-blog/src/__tests__/index.test.ts index 5d8cff5778..ebbca9b18d 100644 --- a/packages/docusaurus-plugin-content-blog/src/__tests__/index.test.ts +++ b/packages/docusaurus-plugin-content-blog/src/__tests__/index.test.ts @@ -211,7 +211,7 @@ describe('blog plugin', () => { ).toEqual({ editUrl: `${BaseEditUrl}/blog/2018-12-14-Happy-First-Birthday-Slash.md`, permalink: '/blog/2018/12/14/Happy-First-Birthday-Slash', - readingTime: 0.015, + readingTime: 0.02, source: path.posix.join( '@site', path.posix.join('i18n', 'en', 'docusaurus-plugin-content-blog'), @@ -276,7 +276,7 @@ describe('blog plugin', () => { }).toEqual({ editUrl: `${BaseEditUrl}/blog/complex-slug.md`, permalink: '/blog/hey/my super path/héllô', - readingTime: 0.015, + readingTime: 0.02, source: path.posix.join('@site', PluginPath, 'complex-slug.md'), title: 'Complex Slug', description: `complex url slug`, @@ -318,7 +318,7 @@ describe('blog plugin', () => { }).toEqual({ editUrl: `${BaseEditUrl}/blog/simple-slug.md`, permalink: '/blog/simple/slug', - readingTime: 0.015, + readingTime: 0.02, source: path.posix.join('@site', PluginPath, 'simple-slug.md'), title: 'Simple Slug', description: `simple url slug`, diff --git a/packages/docusaurus-plugin-content-blog/src/__tests__/readingTime.test.ts b/packages/docusaurus-plugin-content-blog/src/__tests__/readingTime.test.ts index 98b130418e..1f62148d6b 100644 --- a/packages/docusaurus-plugin-content-blog/src/__tests__/readingTime.test.ts +++ b/packages/docusaurus-plugin-content-blog/src/__tests__/readingTime.test.ts @@ -9,48 +9,46 @@ import {calculateReadingTime} from '../readingTime'; describe('calculateReadingTime', () => { it('calculates reading time for empty content', () => { - expect(calculateReadingTime('')).toBe(0); + expect(calculateReadingTime('', 'en')).toBe(0); }); it('calculates reading time for short content', () => { const content = 'This is a short test content.'; - expect(calculateReadingTime(content)).toBe(0.03); + expect(calculateReadingTime(content, 'en')).toBe(0.03); }); it('calculates reading time for long content', () => { const content = 'This is a test content. '.repeat(100); - expect(calculateReadingTime(content)).toBe(2.5); + expect(calculateReadingTime(content, 'en')).toBe(2.5); }); it('respects custom words per minute', () => { const content = 'This is a test content. '.repeat(100); - expect(calculateReadingTime(content, {wordsPerMinute: 100})).toBe(5); + expect(calculateReadingTime(content, 'en', {wordsPerMinute: 100})).toBe(5); }); it('handles content with special characters', () => { const content = 'Hello! How are you? This is a test...'; - expect(calculateReadingTime(content)).toBe(0.04); + expect(calculateReadingTime(content, 'en')).toBe(0.04); }); it('handles content with multiple lines', () => { - const content = `This is line 1. - This is line 2. - This is line 3.`; - expect(calculateReadingTime(content)).toBe(0.06); + const content = `This is line 1.\n This is line 2.\n This is line 3.`; + expect(calculateReadingTime(content, 'en')).toBe(0.06); }); it('handles content with HTML tags', () => { const content = '

This is a test content.

'; - expect(calculateReadingTime(content)).toBe(0.025); + expect(calculateReadingTime(content, 'en')).toBe(0.05); }); it('handles content with markdown', () => { const content = '# Title\n\nThis is **bold** and *italic* text.'; - expect(calculateReadingTime(content)).toBe(0.04); + expect(calculateReadingTime(content, 'en')).toBe(0.04); }); it('handles CJK content', () => { const content = '你好,世界!这是一段测试内容。'; - expect(calculateReadingTime(content)).toBe(0.06); + expect(calculateReadingTime(content, 'zh')).toBe(0.04); }); }); diff --git a/packages/docusaurus-plugin-content-blog/src/blogUtils.ts b/packages/docusaurus-plugin-content-blog/src/blogUtils.ts index 2cc84bac76..cd3c1b8837 100644 --- a/packages/docusaurus-plugin-content-blog/src/blogUtils.ts +++ b/packages/docusaurus-plugin-content-blog/src/blogUtils.ts @@ -210,8 +210,8 @@ async function parseBlogPostMarkdownFile({ } } -const defaultReadingTime: ReadingTimeFunction = ({content, options}) => - calculateReadingTime(content, options); +const defaultReadingTime: ReadingTimeFunction = ({content, locale, options}) => + calculateReadingTime(content, locale, options); async function processBlogSourceFile( blogSourceRelative: string, @@ -373,6 +373,7 @@ async function processBlogSourceFile( content, frontMatter, defaultReadingTime, + locale: i18n.currentLocale, }) : undefined, hasTruncateMarker: truncateMarker.test(content), diff --git a/packages/docusaurus-plugin-content-blog/src/options.ts b/packages/docusaurus-plugin-content-blog/src/options.ts index 20981aaa87..fe2d64e664 100644 --- a/packages/docusaurus-plugin-content-blog/src/options.ts +++ b/packages/docusaurus-plugin-content-blog/src/options.ts @@ -63,7 +63,8 @@ export const DEFAULT_OPTIONS: PluginOptions = { path: 'blog', editLocalizedFiles: false, authorsMapPath: 'authors.yml', - readingTime: ({content, defaultReadingTime}) => defaultReadingTime({content}), + readingTime: ({content, defaultReadingTime, locale}) => + defaultReadingTime({content, locale}), sortPosts: 'descending', showLastUpdateTime: false, showLastUpdateAuthor: false, diff --git a/packages/docusaurus-plugin-content-blog/src/plugin-content-blog.d.ts b/packages/docusaurus-plugin-content-blog/src/plugin-content-blog.d.ts index a3056261cf..610fa1052a 100644 --- a/packages/docusaurus-plugin-content-blog/src/plugin-content-blog.d.ts +++ b/packages/docusaurus-plugin-content-blog/src/plugin-content-blog.d.ts @@ -387,15 +387,10 @@ declare module '@docusaurus/plugin-content-blog' { }; /** - * Duplicate from ngryman/reading-time to keep stability of API. + * Options for reading time calculation using Intl.Segmenter. */ type ReadingTimeOptions = { wordsPerMinute?: number; - /** - * @param char The character to be matched. - * @returns `true` if this character is a word bound. - */ - wordBound?: (char: string) => boolean; }; /** @@ -405,24 +400,22 @@ declare module '@docusaurus/plugin-content-blog' { export type ReadingTimeFunction = (params: { /** Markdown content. */ content: string; + /** Locale for word segmentation. */ + locale: string; /** Front matter. */ frontMatter?: BlogPostFrontMatter & {[key: string]: unknown}; - /** Options accepted by ngryman/reading-time. */ + /** Options for reading time calculation. */ options?: ReadingTimeOptions; }) => number; /** - * @returns The reading time directly plugged into metadata. `undefined` to - * hide reading time for a specific post. + * @returns The reading time directly plugged into metadata. + * `undefined` to hide reading time for a specific post. */ export type ReadingTimeFunctionOption = ( - /** - * The `options` is not provided by the caller; the user can inject their - * own option values into `defaultReadingTime` - */ params: Required[0], 'options'>> & { /** - * The default reading time implementation from ngryman/reading-time. + * The default reading time implementation. */ defaultReadingTime: ReadingTimeFunction; }, diff --git a/packages/docusaurus-plugin-content-blog/src/readingTime.ts b/packages/docusaurus-plugin-content-blog/src/readingTime.ts index 76e4513537..7bdf10c767 100644 --- a/packages/docusaurus-plugin-content-blog/src/readingTime.ts +++ b/packages/docusaurus-plugin-content-blog/src/readingTime.ts @@ -5,25 +5,45 @@ * LICENSE file in the root directory of this source tree. */ -import readingTime from 'reading-time'; - const DEFAULT_WORDS_PER_MINUTE = 200; -interface ReadingTimeOptions { - wordsPerMinute?: number; - wordBound?: (char: string) => boolean; +/** + * Counts the number of words in a string using Intl.Segmenter. + * @param content The text content to count words in. + * @param locale The locale to use for segmentation. + */ +function countWords(content: string, locale: string): number { + if (!content) { + return 0; + } + const segmenter = new Intl.Segmenter(locale, {granularity: 'word'}); + let wordCount = 0; + for (const {isWordLike} of segmenter.segment(content)) { + if (isWordLike) { + wordCount += 1; + } + } + return wordCount; } /** - * Calculates the reading time for a given content string. - * Uses the reading-time package under the hood. + * Calculates the reading time for a given content string using Intl.Segmenter. + * @param content The text content to calculate reading time for. + * @param locale Required locale string for Intl.Segmenter + * @param options Options for reading time calculation. + * - wordsPerMinute: number of words per minute (default 200) + * @returns Estimated reading time in minutes (float, rounded to 2 decimals) */ export function calculateReadingTime( content: string, - options: ReadingTimeOptions = {}, + locale: string, + options?: {wordsPerMinute?: number}, ): number { - const wordsPerMinute = options.wordsPerMinute ?? DEFAULT_WORDS_PER_MINUTE; - const {wordBound} = options; - return readingTime(content, {wordsPerMinute, ...(wordBound && {wordBound})}) - .minutes; + const wordsPerMinute = options?.wordsPerMinute ?? DEFAULT_WORDS_PER_MINUTE; + const words = countWords(content, locale); + if (words === 0) { + return 0; + } + // Calculate reading time in minutes and round to 2 decimal places + return Math.round((words / wordsPerMinute) * 100) / 100; } diff --git a/website/_dogfooding/dogfooding.config.ts b/website/_dogfooding/dogfooding.config.ts index 192cb4a8c2..59e90dcc8a 100644 --- a/website/_dogfooding/dogfooding.config.ts +++ b/website/_dogfooding/dogfooding.config.ts @@ -104,7 +104,11 @@ export const dogfoodingPluginInstances: PluginConfig[] = [ readingTime: ({content, frontMatter, defaultReadingTime}) => frontMatter.hide_reading_time ? undefined - : defaultReadingTime({content, options: {wordsPerMinute: 5}}), + : defaultReadingTime({ + content, + locale: 'en', + options: {wordsPerMinute: 5}, + }), onInlineTags: 'warn', onInlineAuthors: 'ignore', onUntruncatedBlogPosts: 'ignore', diff --git a/website/docs/api/plugins/plugin-content-blog.mdx b/website/docs/api/plugins/plugin-content-blog.mdx index 1a1703b7c4..8dee6fdcda 100644 --- a/website/docs/api/plugins/plugin-content-blog.mdx +++ b/website/docs/api/plugins/plugin-content-blog.mdx @@ -109,17 +109,18 @@ type EditUrlFunction = (params: { ```ts type ReadingTimeOptions = { wordsPerMinute: number; - wordBound: (char: string) => boolean; }; type ReadingTimeCalculator = (params: { content: string; + locale: string; frontMatter?: BlogPostFrontMatter & Record; options?: ReadingTimeOptions; }) => number; type ReadingTimeFn = (params: { content: string; + locale: string; frontMatter: BlogPostFrontMatter & Record; defaultReadingTime: ReadingTimeCalculator; }) => number | undefined; diff --git a/website/docs/blog.mdx b/website/docs/blog.mdx index 8c9b0dc0ef..aec3c7b3a9 100644 --- a/website/docs/blog.mdx +++ b/website/docs/blog.mdx @@ -476,8 +476,12 @@ export default { blog: { // highlight-start showReadingTime: true, // When set to false, the "x min read" won't be shown - readingTime: ({content, frontMatter, defaultReadingTime}) => - defaultReadingTime({content, options: {wordsPerMinute: 300}}), + readingTime: ({content, locale, frontMatter, defaultReadingTime}) => + defaultReadingTime({ + content, + locale, + options: {wordsPerMinute: 300}, + }), // highlight-end }, }, @@ -486,9 +490,16 @@ export default { }; ``` -The `readingTime` callback receives three parameters: the blog content text as a string, front matter as a record of string keys and their values, and the default reading time function. It returns a number (reading time in minutes) or `undefined` (disable reading time for this page). +The `readingTime` callback receives the following parameters: -The default reading time is able to accept additional options: `wordsPerMinute` as a number (default: 300), and `wordBound` as a function from string to boolean. If the string passed to `wordBound` should be a word bound (spaces, tabs, and line breaks by default), the function should return `true`. +- `content`: the blog content text as a string +- `frontMatter`: the front matter as a record of string keys and their values +- `locale`: the locale of the current Docusaurus site +- `defaultReadingTime`: the default built-in reading time function. It returns a number (reading time in minutes) or `undefined` (disable reading time for this page). + +The default reading time is able to accept additional options: + +- `wordsPerMinute` as a number (default: 300) :::tip @@ -510,10 +521,10 @@ export default { blog: { showReadingTime: true, // highlight-start - readingTime: ({content, frontMatter, defaultReadingTime}) => + readingTime: ({content, locale, frontMatter, defaultReadingTime}) => frontMatter.hide_reading_time ? undefined - : defaultReadingTime({content}), + : defaultReadingTime({content, locale}), // highlight-end }, }, @@ -547,8 +558,12 @@ export default { { blog: { // highlight-start - readingTime: ({content, defaultReadingTime}) => - defaultReadingTime({content, options: {wordsPerMinute: 100}}), + readingTime: ({content, locale, defaultReadingTime}) => + defaultReadingTime({ + content, + locale, + options: {wordsPerMinute: 100}, + }), // highlight-end }, }, @@ -574,7 +589,7 @@ export default { { blog: { // highlight-next-line - readingTime: ({content}) => myReadingTime(content), + readingTime: ({content, locale}) => myReadingTime(content, locale), }, }, ], diff --git a/yarn.lock b/yarn.lock index 7d0aa88c9e..1cbb35edac 100644 --- a/yarn.lock +++ b/yarn.lock @@ -15578,11 +15578,6 @@ readdirp@~3.6.0: dependencies: picomatch "^2.2.1" -reading-time@^1.5.0: - version "1.5.0" - resolved "https://registry.yarnpkg.com/reading-time/-/reading-time-1.5.0.tgz#d2a7f1b6057cb2e169beaf87113cc3411b5bc5bb" - integrity sha512-onYyVhBNr4CmAxFsKS7bz+uTLRakypIe4R+5A824vBSkQy/hB3fZepoVEf8OVAxzLvK+H/jm9TzpI3ETSm64Kg== - rechoir@^0.6.2: version "0.6.2" resolved "https://registry.yarnpkg.com/rechoir/-/rechoir-0.6.2.tgz#85204b54dba82d5742e28c96756ef43af50e3384"