Replaced readingTime npm with Intl.Segmenter

2025-04-28 17:57:48 +02:00 · 2025-04-12 20:48:16 +05:30 · 2025-04-12 20:48:16 +05:30 · be88a80ba0
commit be88a80ba0
parent 730ce485ba
5 changed files with 306 additions and 6 deletions
--- a/packages/docusaurus-plugin-content-blog/src/tests/readingTime.test.ts
+++ b/packages/docusaurus-plugin-content-blog/src/tests/readingTime.test.ts
@ -0,0 +1,228 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+import readingTime from 'reading-time';
+import {defaultReadingTime} from '../blogUtils';
+import {calculateReadingTime} from '../readingTime';
+
+describe('readingTime implementation', () => {
+  it('calculates reading time for simple text', () => {
+    const content = 'This is a simple test with 7 words.';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for empty content', () => {
+    const content = '';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: 0,
+      time: 0,
+      words: 0,
+    });
+  });
+
+  it('calculates reading time for content with emojis', () => {
+    const content = 'Hello 😊 World 🌍';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for content with special characters', () => {
+    const content = 'Hello! How are you? This is a test...';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for content with multiple languages', () => {
+    const content = 'Hello 你好 Bonjour';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for content with HTML tags', () => {
+    const content = '<p>This is a <strong>test</strong> with HTML</p>';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for content with code blocks', () => {
+    const content = '```js\nconst x = 1;\n```\nThis is a test';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for content with frontmatter', () => {
+    const content = '---\ntitle: Test\n---\nThis is a test';
+    const result = readingTime(content);
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time for content with custom options', () => {
+    const content = 'This is a test';
+    const result = readingTime(content, {wordsPerMinute: 100});
+    expect(result).toMatchObject({
+      text: expect.any(String),
+      minutes: expect.any(Number),
+      time: expect.any(Number),
+      words: expect.any(Number),
+    });
+  });
+
+  it('calculates reading time using defaultReadingTime', () => {
+    const content = 'This is a test';
+    const result = defaultReadingTime({content, options: {}});
+    expect(result).toBeGreaterThan(0);
+  });
+
+  describe('Intl.Segmenter implementation', () => {
+    it('calculates reading time for simple text', () => {
+      const content = 'This is a simple test with 7 words.';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 7,
+      });
+    });
+
+    it('calculates reading time for empty content', () => {
+      const content = '';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: '0 min read',
+        minutes: 0,
+        time: 0,
+        words: 0,
+      });
+    });
+
+    it('calculates reading time for content with emojis', () => {
+      const content = 'Hello 😊 World 🌍';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 2,
+      });
+    });
+
+    it('calculates reading time for content with special characters', () => {
+      const content = 'Hello! How are you? This is a test...';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 7,
+      });
+    });
+
+    it('calculates reading time for content with multiple languages', () => {
+      const content = 'Hello 你好 Bonjour';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 3,
+      });
+    });
+
+    it('calculates reading time for content with HTML tags', () => {
+      const content = '<p>This is a <strong>test</strong> with HTML</p>';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 6,
+      });
+    });
+
+    it('calculates reading time for content with code blocks', () => {
+      const content = '```js\nconst x = 1;\n```\nThis is a test';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 4,
+      });
+    });
+
+    it('calculates reading time for content with frontmatter', () => {
+      const content = '---\ntitle: Test\n---\nThis is a test';
+      const result = calculateReadingTime(content);
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 3,
+      });
+    });
+
+    it('calculates reading time for content with custom options', () => {
+      const content = 'This is a test';
+      const result = calculateReadingTime(content, {wordsPerMinute: 100});
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 4,
+      });
+    });
+
+    it('calculates reading time with different locale', () => {
+      const content = 'Hello 你好 Bonjour';
+      const result = calculateReadingTime(content, {locale: 'zh'});
+      expect(result).toMatchObject({
+        text: expect.any(String),
+        minutes: expect.any(Number),
+        time: expect.any(Number),
+        words: 3,
+      });
+    });
+  });
+});
--- a/packages/docusaurus-plugin-content-blog/src/blogUtils.ts
+++ b/packages/docusaurus-plugin-content-blog/src/blogUtils.ts
@ -9,7 +9,6 @@ import fs from 'fs-extra';
 import path from 'path';
 import _ from 'lodash';
 import logger from '@docusaurus/logger';
-import readingTime from 'reading-time';
 import {
  parseMarkdownFile,
  normalizeUrl,
@ -32,6 +31,7 @@ import {getTagsFile} from '@docusaurus/utils-validation';
 import {validateBlogPostFrontMatter} from './frontMatter';
 import {getBlogPostAuthors} from './authors';
 import {reportAuthorsProblems} from './authorsProblems';
+import {calculateReadingTime} from './readingTime';
 import type {TagsFile} from '@docusaurus/utils';
 import type {LoadContext, ParseFrontMatter} from '@docusaurus/types';
 import type {
@ -210,8 +210,8 @@ async function parseBlogPostMarkdownFile({
  }
 }

-const defaultReadingTime: ReadingTimeFunction = ({content, options}) =>
-  readingTime(content, options).minutes;
+export const defaultReadingTime: ReadingTimeFunction = ({content, options}) =>
+  calculateReadingTime(content, options).minutes;

 async function processBlogSourceFile(
  blogSourceRelative: string,
--- a/packages/docusaurus-plugin-content-blog/src/readingTime.ts
+++ b/packages/docusaurus-plugin-content-blog/src/readingTime.ts
@ -0,0 +1,50 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+interface ReadingTimeOptions {
+  wordsPerMinute?: number;
+  locale?: string;
+}
+
+interface ReadingTimeResult {
+  text: string;
+  minutes: number;
+  time: number;
+  words: number;
+}
+
+const DEFAULT_WORDS_PER_MINUTE = 200;
+const DEFAULT_LOCALE = 'en';
+
+export function calculateReadingTime(
+  content: string,
+  options: ReadingTimeOptions = {},
+): ReadingTimeResult {
+  const wordsPerMinute = options.wordsPerMinute ?? DEFAULT_WORDS_PER_MINUTE;
+  const locale = options.locale ?? DEFAULT_LOCALE;
+  const contentWithoutFrontmatter = content.replace(/^---[\s\S]*?---\n/, '');
+
+  const segmenter = new Intl.Segmenter(locale, {granularity: 'word'});
+  const segments = segmenter.segment(contentWithoutFrontmatter);
+
+  let wordCount = 0;
+  for (const segment of segments) {
+    if (segment.isWordLike) {
+      wordCount += 1;
+    }
+  }
+
+  const minutes = wordCount / wordsPerMinute;
+  const time = Math.round(minutes * 60 * 1000);
+  const displayed = Math.ceil(minutes);
+
+  return {
+    text: `${displayed} min read`,
+    minutes,
+    time,
+    words: wordCount,
+  };
+}
--- a/packages/docusaurus-plugin-content-blog/src/types.ts
+++ b/packages/docusaurus-plugin-content-blog/src/types.ts
@ -12,3 +12,14 @@ export type BlogContentPaths = ContentPaths;
 export type BlogMarkdownLoaderOptions = {
  truncateMarker: RegExp;
 };
+
+export type ReadingTimeOptions = {
+  wordsPerMinute?: number;
+  locale?: string;
+};
+
+export type ReadingTimeFunction = (params: {
+  content: string;
+  frontMatter?: Record<string, unknown>;
+  options?: ReadingTimeOptions;
+}) => number;
--- a/website/docs/blog.mdx
+++ b/website/docs/blog.mdx
@ -477,7 +477,13 @@ export default {
          // highlight-start
          showReadingTime: true, // When set to false, the "x min read" won't be shown
          readingTime: ({content, frontMatter, defaultReadingTime}) =>
-            defaultReadingTime({content, options: {wordsPerMinute: 300}}),
+            defaultReadingTime({
+              content,
+              options: {
+                wordsPerMinute: 300,
+                locale: 'en-US',
+              },
+            }),
          // highlight-end
        },
      },
@ -488,7 +494,7 @@ export default {

 The `readingTime` callback receives three parameters: the blog content text as a string, front matter as a record of string keys and their values, and the default reading time function. It returns a number (reading time in minutes) or `undefined` (disable reading time for this page).

-The default reading time is able to accept additional options: `wordsPerMinute` as a number (default: 300), and `wordBound` as a function from string to boolean. If the string passed to `wordBound` should be a word bound (spaces, tabs, and line breaks by default), the function should return `true`.
+The default reading time is able to accept additional options: `wordsPerMinute` as a number (default: 300), and `locale` as a string (default: 'en'). The `locale` option allows you to specify the language for word segmentation, which is particularly useful for multilingual content.

 :::tip

@ -548,7 +554,12 @@ export default {
        blog: {
          // highlight-start
          readingTime: ({content, defaultReadingTime}) =>
-            defaultReadingTime({content, options: {wordsPerMinute: 100}}),
+            defaultReadingTime({
+              content,
+              options: {
+                wordsPerMinute: 100,
+              },
+            }),
          // highlight-end
        },
      },