const fs = require('fs')
		const { join } = require('path')

		module.exports.dict = fs.readFileSync(join(__dirname, 'dict.txt'))
		module.exports.idf = fs.readFileSync(join(__dirname, 'idf.txt'))

dict.txt

Sorry, the diff of this file is too big to display

idf.txt

Sorry, the diff of this file is too big to display

+125

-13

index.d.ts

		/* auto-generated by NAPI-RS */
		/* eslint-disable */
		export declare function cut(sentence: string \| Uint8Array, hmm?: boolean \| undefined \| null): string[]
		export declare class Jieba {
		/** Create a new instance with empty dict */
		constructor()
		/**
		* Create a new instance with dict
		*
		* With the default dict, you can use `dict` from `@node-rs/jieba/dict`:
		* ```js
		* import { Jieba } from '@node-rs/jieba'
		* import { dict } from '@node-rs/jieba/dict'
		*
		* const jieba = Jieba.withDict(dict)
		* ```
		*/
		static withDict(dict: Uint8Array): Jieba
		/** Load dictionary after initialization */
		loadDict(dict: Uint8Array): void
		/**
		* Cut the input text
		*
		* ## Params
		*
		* `sentence`: input text
		*
		* `hmm`: enable HMM or not
		*/
		cut(sentence: string \| Uint8Array, hmm?: boolean \| undefined \| null): string[]
		/** Cut the input text asynchronously */
		cutAsync(sentence: string \| Uint8Array, hmm?: boolean \| undefined \| null, signal?: AbortSignal \| undefined \| null): Promise<unknown>
		/**
		* Cut the input text, return all possible words
		*
		* ## Params
		*
		* `sentence`: input text
		*/
		cutAll(sentence: string \| Uint8Array): string[]
		/**
		* Cut the input text in search mode
		*
		* ## Params
		*
		* `sentence`: input text
		*
		* `hmm`: enable HMM or not
		*/
		cutForSearch(sentence: string \| Uint8Array, hmm?: boolean \| undefined \| null): string[]
		/**
		* Tag the input text
		*
		* ## Params
		*
		* `sentence`: input text
		*
		* `hmm`: enable HMM or not
		*/
		tag(sentence: string \| Uint8Array, hmm?: boolean \| undefined \| null): Array<TaggedWord>
		}

		export declare function cutAll(sentence: string \| Uint8Array): string[]
		export declare class TfIdf {
		static withDict(dict: Uint8Array): TfIdf
		/** Creates an TfIdf. */
		constructor()
		/**
		* Merges entires from `dict` into the `idf_dict`.
		* ```js
		* import { Jieba, TfIdf } from '@node-rs/jieba';
		*
		* import { dict, idf } from '@node-rs/jieba/dict';
		*
		* // Create default Jieba instance
		* const jieba = Jieba.withDict(dict);
		*
		* // Create TfIdf instance and load initial dictionary
		* let initIdf = "生化学 13.900677652
		";
		* const tfidf = new TfIdf();
		* tfidf.loadDict(Buffer.from(initIdf));
		*
		* // Extract keywords with initial dictionary
		* const text = "生化学不是光化学的,";
		* const topK = jieba.extract(text, 3);
		* // Result would be like:
		* // [
		* // { keyword: '不是', weight: 4.6335592173333335 },
		* // { keyword: '光化学', weight: 4.6335592173333335 },
		* // { keyword: '生化学', weight: 4.6335592173333335 }
		* // ]
		*
		* // Load new dictionary with different weights
		* let newIdf = "光化学 99.123456789
		";
		* tfidf.loadDict(Buffer.from(newIdf));
		*
		* // Extract keywords again with updated dictionary
		* const newTopK = jieba.extract(text, 3);
		* // Result would be like:
		* // [
		* // { keyword: '不是', weight: 33.041152263 },
		* // { keyword: '光化学', weight: 33.041152263 },
		* // { keyword: '生化学', weight: 4.6335592173333335 }
		* // ]
		* ```
		*/
		loadDict(dict: Uint8Array): void
		setConfig(config: KeywordExtractConfig): void
		/**
		* Uses TF-IDF algorithm to extract the `top_k` keywords from `sentence`.
		*
		* If `allowed_pos` is not empty, then only terms matching those parts if
		* speech are considered.
		*/
		extractKeywords(jieba: Jieba, sentence: string, topK: number, allowedPos?: Array<string> \| undefined \| null): Array<Keyword>
		}

		export declare function cutForSearch(sentence: string \| Uint8Array, hmm?: boolean \| undefined \| null): string[]

		export declare function extract(sentence: string \| Uint8Array, topn: number, allowedPos?: string \| undefined \| null): Array<Keyword>

		export interface Keyword {
		@@ -16,10 +123,15 @@ keyword: string

		export declare function load(): void
		/**
		* Creates a KeywordExtractConfig state that contains filter criteria as
		* well as segmentation configuration for use by keyword extraction
		* implementations.
		*/
		export interface KeywordExtractConfig {
		stopWords?: Set<string> \| undefined
		/** Any segments less than this length will not be considered a Keyword */
		minKeywordLength?: number
		/** If true, fall back to hmm model if segment cannot be found in the dictionary */
		useHmm?: boolean
		}

		export declare function loadDict(dict: Uint8Array): void

		export declare function loadTFIDFDict(dict: Uint8Array): void

		export declare function tag(sentence: string \| Uint8Array, hmm?: boolean \| undefined \| null): Array<TaggedWord>

		export interface TaggedWord {
		@@ -26,0 +138,0 @@ tag: string

+6

-9

index.js

		// prettier-ignore
		/* eslint-disable */
		// @ts-nocheck
		/* auto-generated by NAPI-RS */

		const { readFileSync } = require('fs')
		const { createRequire } = require('node:module')
		require = createRequire(__filename)

		const { readFileSync } = require('node:fs')
		let nativeBinding = null
		@@ -364,9 +367,3 @@ const loadErrors = []

		module.exports.cut = nativeBinding.cut
		module.exports.cutAll = nativeBinding.cutAll
		module.exports.cutForSearch = nativeBinding.cutForSearch
		module.exports.extract = nativeBinding.extract
		module.exports.load = nativeBinding.load
		module.exports.loadDict = nativeBinding.loadDict
		module.exports.loadTFIDFDict = nativeBinding.loadTFIDFDict
		module.exports.tag = nativeBinding.tag
		module.exports.Jieba = nativeBinding.Jieba
		module.exports.TfIdf = nativeBinding.TfIdf

+27

-21

package.json

		{
		"name": "@node-rs/jieba",
		"version": "1.10.4",
		"version": "2.0.1",
		"description": "Fastest Chinese word segmentation in Node.js",
		@@ -23,3 +23,7 @@ "keywords": [
		"browser.js",
		"LICENSE"
		"LICENSE",
		"dict.txt",
		"idf.txt",
		"dict.js",
		"dict.d.ts"
		],
		@@ -58,7 +62,7 @@ "napi": {
		"artifacts": "napi artifacts -d ../../artifacts",
		"bench": "cross-env NODE_ENV=production node benchmark/jieba.js",
		"bench": "cross-env NODE_ENV=production node --import @oxc-node/core/register benchmark/jieba.ts",
		"build": "napi build --platform --release",
		"build:debug": "napi build --platform",
		"prepublishOnly": "napi prepublish",
		"version": "napi version && git add npm"
		"version": "napi version"
		},
		@@ -69,4 +73,6 @@ "bugs": {
		"devDependencies": {
		"@napi-rs/cli": "^3.0.0-alpha.63",
		"nodejieba": "^3.0.0"
		"@napi-rs/cli": "^3.0.0-alpha.64",
		"cross-env": "^7.0.3",
		"nodejieba": "^3.0.0",
		"tinybench": "^3.0.0"
		},
		@@ -77,19 +83,19 @@ "funding": {
		},
		"gitHead": "59fddf62f9c0eaa21443a540cbd6d900d0eb2672",
		"gitHead": "83ca124b3e3968c945708136d6ff68d0e0549582",
		"optionalDependencies": {
		"@node-rs/jieba-darwin-x64": "1.10.4",
		"@node-rs/jieba-darwin-arm64": "1.10.4",
		"@node-rs/jieba-win32-x64-msvc": "1.10.4",
		"@node-rs/jieba-linux-x64-gnu": "1.10.4",
		"@node-rs/jieba-android-arm64": "1.10.4",
		"@node-rs/jieba-linux-arm64-gnu": "1.10.4",
		"@node-rs/jieba-linux-arm64-musl": "1.10.4",
		"@node-rs/jieba-win32-arm64-msvc": "1.10.4",
		"@node-rs/jieba-linux-arm-gnueabihf": "1.10.4",
		"@node-rs/jieba-linux-x64-musl": "1.10.4",
		"@node-rs/jieba-freebsd-x64": "1.10.4",
		"@node-rs/jieba-win32-ia32-msvc": "1.10.4",
		"@node-rs/jieba-android-arm-eabi": "1.10.4",
		"@node-rs/jieba-wasm32-wasi": "1.10.4"
		"@node-rs/jieba-darwin-x64": "2.0.1",
		"@node-rs/jieba-darwin-arm64": "2.0.1",
		"@node-rs/jieba-win32-x64-msvc": "2.0.1",
		"@node-rs/jieba-linux-x64-gnu": "2.0.1",
		"@node-rs/jieba-android-arm64": "2.0.1",
		"@node-rs/jieba-linux-arm64-gnu": "2.0.1",
		"@node-rs/jieba-linux-arm64-musl": "2.0.1",
		"@node-rs/jieba-win32-arm64-msvc": "2.0.1",
		"@node-rs/jieba-linux-arm-gnueabihf": "2.0.1",
		"@node-rs/jieba-linux-x64-musl": "2.0.1",
		"@node-rs/jieba-freebsd-x64": "2.0.1",
		"@node-rs/jieba-win32-ia32-msvc": "2.0.1",
		"@node-rs/jieba-android-arm-eabi": "2.0.1",
		"@node-rs/jieba-wasm32-wasi": "2.0.1"
		}
		}

+42

-45

README.md

		@@ -17,47 +17,42 @@ # `@node-rs/jieba`
		```bash
		@node-rs/jieba x 3,763 ops/sec ±1.18% (92 runs sampled)
		nodejieba x 2,783 ops/sec ±0.67% (91 runs sampled)
		Cut 1184 words bench suite: Fastest is @node-rs/jieba

		@node-rs/jieba x 16.10 ops/sec ±1.58% (44 runs sampled)
		nodejieba x 9.81 ops/sec ±2.39% (29 runs sampled)
		Cut 246568 words bench suite: Fastest is @node-rs/jieba

		@node-rs/jieba x 1,739 ops/sec ±0.87% (92 runs sampled)
		nodejieba x 931 ops/sec ±1.31% (89 runs sampled)
		Tag 1184 words bench suite: Fastest is @node-rs/jieba

		@node-rs/jieba x 6.19 ops/sec ±2.01% (20 runs sampled)
		nodejieba x 3.06 ops/sec ±5.39% (12 runs sampled)
		Tag 246568 words bench suite: Fastest is @node-rs/jieba
		Benchmark Cut 1184 words result
		┌─────────┬──────────────────┬─────────┬────────────────────┬──────────┬─────────┐
		│ (index) │ Task Name │ ops/sec │ Average Time (ns) │ Margin │ Samples │
		├─────────┼──────────────────┼─────────┼────────────────────┼──────────┼─────────┤
		│ 0 │ '@node-rs/jieba' │ '8,246' │ 121266.9342871014 │ '±0.17%' │ 4124 │
		│ 1 │ 'nodejieba' │ '6,392' │ 156439.52799499547 │ '±0.20%' │ 3197 │
		└─────────┴──────────────────┴─────────┴────────────────────┴──────────┴─────────┘
		Benchmark Cut 246568 words result
		┌─────────┬──────────────────┬─────────┬────────────────────┬──────────┬─────────┐
		│ (index) │ Task Name │ ops/sec │ Average Time (ns) │ Margin │ Samples │
		├─────────┼──────────────────┼─────────┼────────────────────┼──────────┼─────────┤
		│ 0 │ '@node-rs/jieba' │ '32' │ 30760703.470588237 │ '±3.01%' │ 17 │
		│ 1 │ 'nodejieba' │ '19' │ 51275112.699999996 │ '±2.68%' │ 10 │
		└─────────┴──────────────────┴─────────┴────────────────────┴──────────┴─────────┘
		Benchmark Tag 1184 words result
		┌─────────┬──────────────────┬─────────┬───────────────────┬──────────┬─────────┐
		│ (index) │ Task Name │ ops/sec │ Average Time (ns) │ Margin │ Samples │
		├─────────┼──────────────────┼─────────┼───────────────────┼──────────┼─────────┤
		│ 0 │ '@node-rs/jieba' │ '3,174' │ 315048.8916876547 │ '±0.20%' │ 1588 │
		│ 1 │ 'nodejieba' │ '2,672' │ 374213.8870605615 │ '±0.23%' │ 1337 │
		└─────────┴──────────────────┴─────────┴───────────────────┴──────────┴─────────┘
		Benchmark Tag 246568 words result
		┌─────────┬──────────────────┬─────────┬────────────────────┬──────────┬─────────┐
		│ (index) │ Task Name │ ops/sec │ Average Time (ns) │ Margin │ Samples │
		├─────────┼──────────────────┼─────────┼────────────────────┼──────────┼─────────┤
		│ 0 │ '@node-rs/jieba' │ '11' │ 84886341.7999999 │ '±5.74%' │ 10 │
		│ 1 │ 'nodejieba' │ '7' │ 125781083.30000004 │ '±4.75%' │ 10 │
		└─────────┴──────────────────┴─────────┴────────────────────┴──────────┴─────────┘
		```

		## Support matrix

		\| \| node12 \| node14 \| node16 \| node18 \|
		\| ---------------- \| ------ \| ------ \| ------ \| ------ \|
		\| Windows x64 \| ✓ \| ✓ \| ✓ \| ✓ \|
		\| Windows x32 \| ✓ \| ✓ \| ✓ \| ✓ \|
		\| Windows arm64 \| ✓ \| ✓ \| ✓ \| ✓ \|
		\| macOS x64 \| ✓ \| ✓ \| ✓ \| ✓ \|
		\| macOS arm64 \| ✓ \| ✓ \| ✓ \| ✓ \|
		\| Linux x64 gnu \| ✓ \| ✓ \| ✓ \| ✓ \|
		\| Linux x64 musl \| ✓ \| ✓ \| ✓ \| ✓ \|
		\| Linux arm gnu \| ✓ \| ✓ \| ✓ \| ✓ \|
		\| Linux arm64 gnu \| ✓ \| ✓ \| ✓ \| ✓ \|
		\| Linux arm64 musl \| ✓ \| ✓ \| ✓ \| ✓ \|
		\| Android arm64 \| ✓ \| ✓ \| ✓ \| ✓ \|
		\| Android armv7 \| ✓ \| ✓ \| ✓ \| ✓ \|
		\| FreeBSD x64 \| ✓ \| ✓ \| ✓ \| ✓ \|

		## Usage

		```javascript
		const { load, cut } = require('@node-rs/jieba')
		import { Jieba } from '@node-rs/jieba'
		import { dict } from '@node-rs/jieba/dict'

		load()
		// loadDict(fs.readFileSync(...))
		// loadTFIDFDict(fs.readFileSync(...))
		// load jieba with the default dict
		const jieba = Jieba.withDict(dict)

		cut('我们中出了一个叛徒', false)
		console.info(jieba.cut('我们中出了一个叛徒', false))

		@@ -68,7 +63,10 @@ // ["我们", "中", "出", "了", "一个", "叛徒"]
		```javascript
		const { load, cut } = require('@node-rs/jieba')
		import { Jieba, TfIdf } from '@node-rs/jieba'
		import { dict, idf } from '@node-rs/jieba/dict'

		load()
		const jieba = Jieba.withDict(dict)
		const tfIdf = TfIdf.withDict(idf)

		extract(
		tfIdf.extractKeywords(
		jieba,
		'今天纽约的天气真好啊，京华大酒店的张尧经理吃了一只北京烤鸭。后天纽约的天气不好，昨天纽约的天气也不好，北京烤鸭真好吃',
		@@ -88,11 +86,10 @@ 3,
		```javascript
		const { loadDict, cut } = require('@node-rs/jieba')
		import { Jieba } from '@node-rs/jieba'
		const customDict = ['哪行 50', '干一行 51', '行一行 52', '行行 53']

		const dictBuffer = Buffer.from(customDict.join('\n'), 'utf-8')
		// loadDict doc: https://github.com/fxsjy/jieba?tab=readme-ov-file#%E8%BD%BD%E5%85%A5%E8%AF%8D%E5%85%B8
		loadDict(dictBuffer)
		const jieba = Jieba.withDict(dictBuffer)

		const text = '人要是行干一行行一行，一行行行行行，行行行干哪行都行'
		const output = cut(text, false)
		const output = jieba.cut(text, false)
		console.log('分词结果⤵️\n', output.join('/'))
		@@ -99,0 +96,0 @@ // Before: 人/要是/行/干/一行行/一行/，/一行行/行/行/行/，/行/行/行/干/哪/行/都行

@node-rs/jieba - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics