Replaced readingTime npm with Intl.Segmenter

This commit is contained in:
Shreedhar Bhat 2025-04-12 20:48:16 +05:30
parent 730ce485ba
commit be88a80ba0
5 changed files with 306 additions and 6 deletions

View file

@ -0,0 +1,228 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import readingTime from 'reading-time';
import {defaultReadingTime} from '../blogUtils';
import {calculateReadingTime} from '../readingTime';
describe('readingTime implementation', () => {
it('calculates reading time for simple text', () => {
const content = 'This is a simple test with 7 words.';
const result = readingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: expect.any(Number),
});
});
it('calculates reading time for empty content', () => {
const content = '';
const result = readingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: 0,
time: 0,
words: 0,
});
});
it('calculates reading time for content with emojis', () => {
const content = 'Hello 😊 World 🌍';
const result = readingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: expect.any(Number),
});
});
it('calculates reading time for content with special characters', () => {
const content = 'Hello! How are you? This is a test...';
const result = readingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: expect.any(Number),
});
});
it('calculates reading time for content with multiple languages', () => {
const content = 'Hello 你好 Bonjour';
const result = readingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: expect.any(Number),
});
});
it('calculates reading time for content with HTML tags', () => {
const content = '<p>This is a <strong>test</strong> with HTML</p>';
const result = readingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: expect.any(Number),
});
});
it('calculates reading time for content with code blocks', () => {
const content = '```js\nconst x = 1;\n```\nThis is a test';
const result = readingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: expect.any(Number),
});
});
it('calculates reading time for content with frontmatter', () => {
const content = '---\ntitle: Test\n---\nThis is a test';
const result = readingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: expect.any(Number),
});
});
it('calculates reading time for content with custom options', () => {
const content = 'This is a test';
const result = readingTime(content, {wordsPerMinute: 100});
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: expect.any(Number),
});
});
it('calculates reading time using defaultReadingTime', () => {
const content = 'This is a test';
const result = defaultReadingTime({content, options: {}});
expect(result).toBeGreaterThan(0);
});
describe('Intl.Segmenter implementation', () => {
it('calculates reading time for simple text', () => {
const content = 'This is a simple test with 7 words.';
const result = calculateReadingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: 7,
});
});
it('calculates reading time for empty content', () => {
const content = '';
const result = calculateReadingTime(content);
expect(result).toMatchObject({
text: '0 min read',
minutes: 0,
time: 0,
words: 0,
});
});
it('calculates reading time for content with emojis', () => {
const content = 'Hello 😊 World 🌍';
const result = calculateReadingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: 2,
});
});
it('calculates reading time for content with special characters', () => {
const content = 'Hello! How are you? This is a test...';
const result = calculateReadingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: 7,
});
});
it('calculates reading time for content with multiple languages', () => {
const content = 'Hello 你好 Bonjour';
const result = calculateReadingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: 3,
});
});
it('calculates reading time for content with HTML tags', () => {
const content = '<p>This is a <strong>test</strong> with HTML</p>';
const result = calculateReadingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: 6,
});
});
it('calculates reading time for content with code blocks', () => {
const content = '```js\nconst x = 1;\n```\nThis is a test';
const result = calculateReadingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: 4,
});
});
it('calculates reading time for content with frontmatter', () => {
const content = '---\ntitle: Test\n---\nThis is a test';
const result = calculateReadingTime(content);
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: 3,
});
});
it('calculates reading time for content with custom options', () => {
const content = 'This is a test';
const result = calculateReadingTime(content, {wordsPerMinute: 100});
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: 4,
});
});
it('calculates reading time with different locale', () => {
const content = 'Hello 你好 Bonjour';
const result = calculateReadingTime(content, {locale: 'zh'});
expect(result).toMatchObject({
text: expect.any(String),
minutes: expect.any(Number),
time: expect.any(Number),
words: 3,
});
});
});
});

View file

@ -9,7 +9,6 @@ import fs from 'fs-extra';
import path from 'path';
import _ from 'lodash';
import logger from '@docusaurus/logger';
import readingTime from 'reading-time';
import {
parseMarkdownFile,
normalizeUrl,
@ -32,6 +31,7 @@ import {getTagsFile} from '@docusaurus/utils-validation';
import {validateBlogPostFrontMatter} from './frontMatter';
import {getBlogPostAuthors} from './authors';
import {reportAuthorsProblems} from './authorsProblems';
import {calculateReadingTime} from './readingTime';
import type {TagsFile} from '@docusaurus/utils';
import type {LoadContext, ParseFrontMatter} from '@docusaurus/types';
import type {
@ -210,8 +210,8 @@ async function parseBlogPostMarkdownFile({
}
}
const defaultReadingTime: ReadingTimeFunction = ({content, options}) =>
readingTime(content, options).minutes;
export const defaultReadingTime: ReadingTimeFunction = ({content, options}) =>
calculateReadingTime(content, options).minutes;
async function processBlogSourceFile(
blogSourceRelative: string,

View file

@ -0,0 +1,50 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
interface ReadingTimeOptions {
wordsPerMinute?: number;
locale?: string;
}
interface ReadingTimeResult {
text: string;
minutes: number;
time: number;
words: number;
}
const DEFAULT_WORDS_PER_MINUTE = 200;
const DEFAULT_LOCALE = 'en';
export function calculateReadingTime(
content: string,
options: ReadingTimeOptions = {},
): ReadingTimeResult {
const wordsPerMinute = options.wordsPerMinute ?? DEFAULT_WORDS_PER_MINUTE;
const locale = options.locale ?? DEFAULT_LOCALE;
const contentWithoutFrontmatter = content.replace(/^---[\s\S]*?---\n/, '');
const segmenter = new Intl.Segmenter(locale, {granularity: 'word'});
const segments = segmenter.segment(contentWithoutFrontmatter);
let wordCount = 0;
for (const segment of segments) {
if (segment.isWordLike) {
wordCount += 1;
}
}
const minutes = wordCount / wordsPerMinute;
const time = Math.round(minutes * 60 * 1000);
const displayed = Math.ceil(minutes);
return {
text: `${displayed} min read`,
minutes,
time,
words: wordCount,
};
}

View file

@ -12,3 +12,14 @@ export type BlogContentPaths = ContentPaths;
export type BlogMarkdownLoaderOptions = {
truncateMarker: RegExp;
};
export type ReadingTimeOptions = {
wordsPerMinute?: number;
locale?: string;
};
export type ReadingTimeFunction = (params: {
content: string;
frontMatter?: Record<string, unknown>;
options?: ReadingTimeOptions;
}) => number;

View file

@ -477,7 +477,13 @@ export default {
// highlight-start
showReadingTime: true, // When set to false, the "x min read" won't be shown
readingTime: ({content, frontMatter, defaultReadingTime}) =>
defaultReadingTime({content, options: {wordsPerMinute: 300}}),
defaultReadingTime({
content,
options: {
wordsPerMinute: 300,
locale: 'en-US',
},
}),
// highlight-end
},
},
@ -488,7 +494,7 @@ export default {
The `readingTime` callback receives three parameters: the blog content text as a string, front matter as a record of string keys and their values, and the default reading time function. It returns a number (reading time in minutes) or `undefined` (disable reading time for this page).
The default reading time is able to accept additional options: `wordsPerMinute` as a number (default: 300), and `wordBound` as a function from string to boolean. If the string passed to `wordBound` should be a word bound (spaces, tabs, and line breaks by default), the function should return `true`.
The default reading time is able to accept additional options: `wordsPerMinute` as a number (default: 300), and `locale` as a string (default: 'en'). The `locale` option allows you to specify the language for word segmentation, which is particularly useful for multilingual content.
:::tip
@ -548,7 +554,12 @@ export default {
blog: {
// highlight-start
readingTime: ({content, defaultReadingTime}) =>
defaultReadingTime({content, options: {wordsPerMinute: 100}}),
defaultReadingTime({
content,
options: {
wordsPerMinute: 100,
},
}),
// highlight-end
},
},