From be88a80ba0fadb1fc6544fc93ed6389947df3cee Mon Sep 17 00:00:00 2001
From: Shreedhar Bhat <shreedhar.bhat@curefit.com>
Date: Sat, 12 Apr 2025 20:48:16 +0530
Subject: [PATCH] Replaced readingTime npm with Intl.Segmenter

---
 .../src/__tests__/readingTime.test.ts         | 228 ++++++++++++++++++
 .../src/blogUtils.ts                          |   6 +-
 .../src/readingTime.ts                        |  50 ++++
 .../src/types.ts                              |  11 +
 website/docs/blog.mdx                         |  17 +-
 5 files changed, 306 insertions(+), 6 deletions(-)
 create mode 100644 packages/docusaurus-plugin-content-blog/src/__tests__/readingTime.test.ts
 create mode 100644 packages/docusaurus-plugin-content-blog/src/readingTime.ts
diff --git a/packages/docusaurus-plugin-content-blog/src/__tests__/readingTime.test.ts b/packages/docusaurus-plugin-content-blog/src/__tests__/readingTime.test.ts
new file mode 100644
index 0000000000..d35b365a32
--- /dev/null
+++ b/packages/docusaurus-plugin-content-blog/src/__tests__/readingTime.test.ts
@@ -0,0 +1,228 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+import readingTime from 'reading-time';
+import {defaultReadingTime} from '../blogUtils';
+import {calculateReadingTime} from '../readingTime';
+
+describe('readingTime implementation', () => {
+  it('calculates reading time for simple text', () => {
+    const content = 'This is a simple test with 7 words.';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for empty content', () => {
+    const content = '';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: 0,
+      time: 0,
+      words: 0,
+    });
+  });
+
+  it('calculates reading time for content with emojis', () => {
+    const content = 'Hello 😊 World 🌍';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for content with special characters', () => {
+    const content = 'Hello! How are you? This is a test...';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for content with multiple languages', () => {
+    const content = 'Hello 你好 Bonjour';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for content with HTML tags', () => {
+    const content = '<p>This is a <strong>test</strong> with HTML</p>';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for content with code blocks', () => {
+    const content = '```js\nconst x = 1;\n```\nThis is a test';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for content with frontmatter', () => {
+    const content = '---\ntitle: Test\n---\nThis is a test';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for content with custom options', () => {
+    const content = 'This is a test';
+    const result = readingTime(content, {wordsPerMinute: 100});
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time using defaultReadingTime', () => {
+    const content = 'This is a test';
+    const result = defaultReadingTime({content, options: {}});
+    expect(result).toBeGreaterThan(0);
+  });
+
+  describe('Intl.Segmenter implementation', () => {
+    it('calculates reading time for simple text', () => {
+      const content = 'This is a simple test with 7 words.';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 7,
+      });
+    });
+
+    it('calculates reading time for empty content', () => {
+      const content = '';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: '0 min read',
+        minutes: 0,
+        time: 0,
+        words: 0,
+      });
+    });
+
+    it('calculates reading time for content with emojis', () => {
+      const content = 'Hello 😊 World 🌍';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 2,
+      });
+    });
+
+    it('calculates reading time for content with special characters', () => {
+      const content = 'Hello! How are you? This is a test...';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 7,
+      });
+    });
+
+    it('calculates reading time for content with multiple languages', () => {
+      const content = 'Hello 你好 Bonjour';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 3,
+      });
+    });
+
+    it('calculates reading time for content with HTML tags', () => {
+      const content = '<p>This is a <strong>test</strong> with HTML</p>';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 6,
+      });
+    });
+
+    it('calculates reading time for content with code blocks', () => {
+      const content = '```js\nconst x = 1;\n```\nThis is a test';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 4,
+      });
+    });
+
+    it('calculates reading time for content with frontmatter', () => {
+      const content = '---\ntitle: Test\n---\nThis is a test';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 3,
+      });
+    });
+
+    it('calculates reading time for content with custom options', () => {
+      const content = 'This is a test';
+      const result = calculateReadingTime(content, {wordsPerMinute: 100});
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 4,
+      });
+    });
+
+    it('calculates reading time with different locale', () => {
+      const content = 'Hello 你好 Bonjour';
+      const result = calculateReadingTime(content, {locale: 'zh'});
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 3,
+      });
+    });
+  });
+});
diff --git a/packages/docusaurus-plugin-content-blog/src/blogUtils.ts b/packages/docusaurus-plugin-content-blog/src/blogUtils.ts
index ab7426eac5..61af5d5275 100644
--- a/packages/docusaurus-plugin-content-blog/src/blogUtils.ts
+++ b/packages/docusaurus-plugin-content-blog/src/blogUtils.ts
@@ -9,7 +9,6 @@ import fs from 'fs-extra';
 import path from 'path';
 import _ from 'lodash';
 import logger from '@docusaurus/logger';
-import readingTime from 'reading-time';
 import {
   parseMarkdownFile,
   normalizeUrl,
@@ -32,6 +31,7 @@ import {getTagsFile} from '@docusaurus/utils-validation';
 import {validateBlogPostFrontMatter} from './frontMatter';
 import {getBlogPostAuthors} from './authors';
 import {reportAuthorsProblems} from './authorsProblems';
+import {calculateReadingTime} from './readingTime';
 import type {TagsFile} from '@docusaurus/utils';
 import type {LoadContext, ParseFrontMatter} from '@docusaurus/types';
 import type {
@@ -210,8 +210,8 @@ async function parseBlogPostMarkdownFile({
   }
 }
 
-const defaultReadingTime: ReadingTimeFunction = ({content, options}) =>
-  readingTime(content, options).minutes;
+export const defaultReadingTime: ReadingTimeFunction = ({content, options}) =>
+  calculateReadingTime(content, options).minutes;
 
 async function processBlogSourceFile(
   blogSourceRelative: string,
diff --git a/packages/docusaurus-plugin-content-blog/src/readingTime.ts b/packages/docusaurus-plugin-content-blog/src/readingTime.ts
new file mode 100644
index 0000000000..f685169a05
--- /dev/null
+++ b/packages/docusaurus-plugin-content-blog/src/readingTime.ts
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+interface ReadingTimeOptions {
+  wordsPerMinute?: number;
+  locale?: string;
+}
+
+interface ReadingTimeResult {
+  text: string;
+  minutes: number;
+  time: number;
+  words: number;
+}
+
+const DEFAULT_WORDS_PER_MINUTE = 200;
+const DEFAULT_LOCALE = 'en';
+
+export function calculateReadingTime(
+  content: string,
+  options: ReadingTimeOptions = {},
+): ReadingTimeResult {
+  const wordsPerMinute = options.wordsPerMinute ?? DEFAULT_WORDS_PER_MINUTE;
+  const locale = options.locale ?? DEFAULT_LOCALE;
+  const contentWithoutFrontmatter = content.replace(/^---[\s\S]*?---\n/, '');
+
+  const segmenter = new Intl.Segmenter(locale, {granularity: 'word'});
+  const segments = segmenter.segment(contentWithoutFrontmatter);
+
+  let wordCount = 0;
+  for (const segment of segments) {
+    if (segment.isWordLike) {
+      wordCount += 1;
+    }
+  }
+
+  const minutes = wordCount / wordsPerMinute;
+  const time = Math.round(minutes * 60 * 1000);
+  const displayed = Math.ceil(minutes);
+
+  return {
+    text: `${displayed} min read`,
+    minutes,
+    time,
+    words: wordCount,
+  };
+}
diff --git a/packages/docusaurus-plugin-content-blog/src/types.ts b/packages/docusaurus-plugin-content-blog/src/types.ts
index 14820f3236..b636468774 100644
--- a/packages/docusaurus-plugin-content-blog/src/types.ts
+++ b/packages/docusaurus-plugin-content-blog/src/types.ts
@@ -12,3 +12,14 @@ export type BlogContentPaths = ContentPaths;
 export type BlogMarkdownLoaderOptions = {
   truncateMarker: RegExp;
 };
+
+export type ReadingTimeOptions = {
+  wordsPerMinute?: number;
+  locale?: string;
+};
+
+export type ReadingTimeFunction = (params: {
+  content: string;
+  frontMatter?: Record<string, unknown>;
+  options?: ReadingTimeOptions;
+}) => number;
diff --git a/website/docs/blog.mdx b/website/docs/blog.mdx
index 8c9b0dc0ef..78289d9df5 100644
--- a/website/docs/blog.mdx
+++ b/website/docs/blog.mdx
@@ -477,7 +477,13 @@ export default {
           // highlight-start
           showReadingTime: true, // When set to false, the "x min read" won't be shown
           readingTime: ({content, frontMatter, defaultReadingTime}) =>
-            defaultReadingTime({content, options: {wordsPerMinute: 300}}),
+            defaultReadingTime({
+              content,
+              options: {
+                wordsPerMinute: 300,
+                locale: 'en-US',
+              },
+            }),
           // highlight-end
         },
       },
@@ -488,7 +494,7 @@ export default {
 
 The `readingTime` callback receives three parameters: the blog content text as a string, front matter as a record of string keys and their values, and the default reading time function. It returns a number (reading time in minutes) or `undefined` (disable reading time for this page).
 
-The default reading time is able to accept additional options: `wordsPerMinute` as a number (default: 300), and `wordBound` as a function from string to boolean. If the string passed to `wordBound` should be a word bound (spaces, tabs, and line breaks by default), the function should return `true`.
+The default reading time is able to accept additional options: `wordsPerMinute` as a number (default: 300), and `locale` as a string (default: 'en'). The `locale` option allows you to specify the language for word segmentation, which is particularly useful for multilingual content.
 
 :::tip
 
@@ -548,7 +554,12 @@ export default {
         blog: {
           // highlight-start
           readingTime: ({content, defaultReadingTime}) =>
-            defaultReadingTime({content, options: {wordsPerMinute: 100}}),
+            defaultReadingTime({
+              content,
+              options: {
+                wordsPerMinute: 100,
+              },
+            }),
           // highlight-end
         },
       },