From be88a80ba0fadb1fc6544fc93ed6389947df3cee Mon Sep 17 00:00:00 2001 From: Shreedhar Bhat Date: Sat, 12 Apr 2025 20:48:16 +0530 Subject: [PATCH] Replaced readingTime npm with Intl.Segmenter --- .../src/__tests__/readingTime.test.ts | 228 ++++++++++++++++++ .../src/blogUtils.ts | 6 +- .../src/readingTime.ts | 50 ++++ .../src/types.ts | 11 + website/docs/blog.mdx | 17 +- 5 files changed, 306 insertions(+), 6 deletions(-) create mode 100644 packages/docusaurus-plugin-content-blog/src/__tests__/readingTime.test.ts create mode 100644 packages/docusaurus-plugin-content-blog/src/readingTime.ts diff --git a/packages/docusaurus-plugin-content-blog/src/__tests__/readingTime.test.ts b/packages/docusaurus-plugin-content-blog/src/__tests__/readingTime.test.ts new file mode 100644 index 0000000000..d35b365a32 --- /dev/null +++ b/packages/docusaurus-plugin-content-blog/src/__tests__/readingTime.test.ts @@ -0,0 +1,228 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ +import readingTime from 'reading-time'; +import {defaultReadingTime} from '../blogUtils'; +import {calculateReadingTime} from '../readingTime'; + +describe('readingTime implementation', () => { + it('calculates reading time for simple text', () => { + const content = 'This is a simple test with 7 words.'; + const result = readingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: expect.any(Number), + }); + }); + + it('calculates reading time for empty content', () => { + const content = ''; + const result = readingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: 0, + time: 0, + words: 0, + }); + }); + + it('calculates reading time for content with emojis', () => { + const content = 'Hello 😊 World 🌍'; + const result = readingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: expect.any(Number), + }); + }); + + it('calculates reading time for content with special characters', () => { + const content = 'Hello! How are you? This is a test...'; + const result = readingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: expect.any(Number), + }); + }); + + it('calculates reading time for content with multiple languages', () => { + const content = 'Hello 你好 Bonjour'; + const result = readingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: expect.any(Number), + }); + }); + + it('calculates reading time for content with HTML tags', () => { + const content = '

This is a test with HTML

'; + const result = readingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: expect.any(Number), + }); + }); + + it('calculates reading time for content with code blocks', () => { + const content = '```js\nconst x = 1;\n```\nThis is a test'; + const result = readingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: expect.any(Number), + }); + }); + + it('calculates reading time for content with frontmatter', () => { + const content = '---\ntitle: Test\n---\nThis is a test'; + const result = readingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: expect.any(Number), + }); + }); + + it('calculates reading time for content with custom options', () => { + const content = 'This is a test'; + const result = readingTime(content, {wordsPerMinute: 100}); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: expect.any(Number), + }); + }); + + it('calculates reading time using defaultReadingTime', () => { + const content = 'This is a test'; + const result = defaultReadingTime({content, options: {}}); + expect(result).toBeGreaterThan(0); + }); + + describe('Intl.Segmenter implementation', () => { + it('calculates reading time for simple text', () => { + const content = 'This is a simple test with 7 words.'; + const result = calculateReadingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: 7, + }); + }); + + it('calculates reading time for empty content', () => { + const content = ''; + const result = calculateReadingTime(content); + expect(result).toMatchObject({ + text: '0 min read', + minutes: 0, + time: 0, + words: 0, + }); + }); + + it('calculates reading time for content with emojis', () => { + const content = 'Hello 😊 World 🌍'; + const result = calculateReadingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: 2, + }); + }); + + it('calculates reading time for content with special characters', () => { + const content = 'Hello! How are you? This is a test...'; + const result = calculateReadingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: 7, + }); + }); + + it('calculates reading time for content with multiple languages', () => { + const content = 'Hello 你好 Bonjour'; + const result = calculateReadingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: 3, + }); + }); + + it('calculates reading time for content with HTML tags', () => { + const content = '

This is a test with HTML

'; + const result = calculateReadingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: 6, + }); + }); + + it('calculates reading time for content with code blocks', () => { + const content = '```js\nconst x = 1;\n```\nThis is a test'; + const result = calculateReadingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: 4, + }); + }); + + it('calculates reading time for content with frontmatter', () => { + const content = '---\ntitle: Test\n---\nThis is a test'; + const result = calculateReadingTime(content); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: 3, + }); + }); + + it('calculates reading time for content with custom options', () => { + const content = 'This is a test'; + const result = calculateReadingTime(content, {wordsPerMinute: 100}); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: 4, + }); + }); + + it('calculates reading time with different locale', () => { + const content = 'Hello 你好 Bonjour'; + const result = calculateReadingTime(content, {locale: 'zh'}); + expect(result).toMatchObject({ + text: expect.any(String), + minutes: expect.any(Number), + time: expect.any(Number), + words: 3, + }); + }); + }); +}); diff --git a/packages/docusaurus-plugin-content-blog/src/blogUtils.ts b/packages/docusaurus-plugin-content-blog/src/blogUtils.ts index ab7426eac5..61af5d5275 100644 --- a/packages/docusaurus-plugin-content-blog/src/blogUtils.ts +++ b/packages/docusaurus-plugin-content-blog/src/blogUtils.ts @@ -9,7 +9,6 @@ import fs from 'fs-extra'; import path from 'path'; import _ from 'lodash'; import logger from '@docusaurus/logger'; -import readingTime from 'reading-time'; import { parseMarkdownFile, normalizeUrl, @@ -32,6 +31,7 @@ import {getTagsFile} from '@docusaurus/utils-validation'; import {validateBlogPostFrontMatter} from './frontMatter'; import {getBlogPostAuthors} from './authors'; import {reportAuthorsProblems} from './authorsProblems'; +import {calculateReadingTime} from './readingTime'; import type {TagsFile} from '@docusaurus/utils'; import type {LoadContext, ParseFrontMatter} from '@docusaurus/types'; import type { @@ -210,8 +210,8 @@ async function parseBlogPostMarkdownFile({ } } -const defaultReadingTime: ReadingTimeFunction = ({content, options}) => - readingTime(content, options).minutes; +export const defaultReadingTime: ReadingTimeFunction = ({content, options}) => + calculateReadingTime(content, options).minutes; async function processBlogSourceFile( blogSourceRelative: string, diff --git a/packages/docusaurus-plugin-content-blog/src/readingTime.ts b/packages/docusaurus-plugin-content-blog/src/readingTime.ts new file mode 100644 index 0000000000..f685169a05 --- /dev/null +++ b/packages/docusaurus-plugin-content-blog/src/readingTime.ts @@ -0,0 +1,50 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ +interface ReadingTimeOptions { + wordsPerMinute?: number; + locale?: string; +} + +interface ReadingTimeResult { + text: string; + minutes: number; + time: number; + words: number; +} + +const DEFAULT_WORDS_PER_MINUTE = 200; +const DEFAULT_LOCALE = 'en'; + +export function calculateReadingTime( + content: string, + options: ReadingTimeOptions = {}, +): ReadingTimeResult { + const wordsPerMinute = options.wordsPerMinute ?? DEFAULT_WORDS_PER_MINUTE; + const locale = options.locale ?? DEFAULT_LOCALE; + const contentWithoutFrontmatter = content.replace(/^---[\s\S]*?---\n/, ''); + + const segmenter = new Intl.Segmenter(locale, {granularity: 'word'}); + const segments = segmenter.segment(contentWithoutFrontmatter); + + let wordCount = 0; + for (const segment of segments) { + if (segment.isWordLike) { + wordCount += 1; + } + } + + const minutes = wordCount / wordsPerMinute; + const time = Math.round(minutes * 60 * 1000); + const displayed = Math.ceil(minutes); + + return { + text: `${displayed} min read`, + minutes, + time, + words: wordCount, + }; +} diff --git a/packages/docusaurus-plugin-content-blog/src/types.ts b/packages/docusaurus-plugin-content-blog/src/types.ts index 14820f3236..b636468774 100644 --- a/packages/docusaurus-plugin-content-blog/src/types.ts +++ b/packages/docusaurus-plugin-content-blog/src/types.ts @@ -12,3 +12,14 @@ export type BlogContentPaths = ContentPaths; export type BlogMarkdownLoaderOptions = { truncateMarker: RegExp; }; + +export type ReadingTimeOptions = { + wordsPerMinute?: number; + locale?: string; +}; + +export type ReadingTimeFunction = (params: { + content: string; + frontMatter?: Record; + options?: ReadingTimeOptions; +}) => number; diff --git a/website/docs/blog.mdx b/website/docs/blog.mdx index 8c9b0dc0ef..78289d9df5 100644 --- a/website/docs/blog.mdx +++ b/website/docs/blog.mdx @@ -477,7 +477,13 @@ export default { // highlight-start showReadingTime: true, // When set to false, the "x min read" won't be shown readingTime: ({content, frontMatter, defaultReadingTime}) => - defaultReadingTime({content, options: {wordsPerMinute: 300}}), + defaultReadingTime({ + content, + options: { + wordsPerMinute: 300, + locale: 'en-US', + }, + }), // highlight-end }, }, @@ -488,7 +494,7 @@ export default { The `readingTime` callback receives three parameters: the blog content text as a string, front matter as a record of string keys and their values, and the default reading time function. It returns a number (reading time in minutes) or `undefined` (disable reading time for this page). -The default reading time is able to accept additional options: `wordsPerMinute` as a number (default: 300), and `wordBound` as a function from string to boolean. If the string passed to `wordBound` should be a word bound (spaces, tabs, and line breaks by default), the function should return `true`. +The default reading time is able to accept additional options: `wordsPerMinute` as a number (default: 300), and `locale` as a string (default: 'en'). The `locale` option allows you to specify the language for word segmentation, which is particularly useful for multilingual content. :::tip @@ -548,7 +554,12 @@ export default { blog: { // highlight-start readingTime: ({content, defaultReadingTime}) => - defaultReadingTime({content, options: {wordsPerMinute: 100}}), + defaultReadingTime({ + content, + options: { + wordsPerMinute: 100, + }, + }), // highlight-end }, },