Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

@node-rs/jieba

Package Overview
Dependencies
Maintainers
2
Versions
40
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@node-rs/jieba - npm Package Compare versions

Comparing version
1.10.4
to
2.0.1
+2
dict.d.ts
export const dict: Uint8Array
export const idf: Uint8Array
const fs = require('fs')
const { join } = require('path')
module.exports.dict = fs.readFileSync(join(__dirname, 'dict.txt'))
module.exports.idf = fs.readFileSync(join(__dirname, 'idf.txt'))

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

+125
-13
/* auto-generated by NAPI-RS */
/* eslint-disable */
export declare function cut(sentence: string | Uint8Array, hmm?: boolean | undefined | null): string[]
export declare class Jieba {
/** Create a new instance with empty dict */
constructor()
/**
* Create a new instance with dict
*
* With the default dict, you can use `dict` from `@node-rs/jieba/dict`:
* ```js
* import { Jieba } from '@node-rs/jieba'
* import { dict } from '@node-rs/jieba/dict'
*
* const jieba = Jieba.withDict(dict)
* ```
*/
static withDict(dict: Uint8Array): Jieba
/** Load dictionary after initialization */
loadDict(dict: Uint8Array): void
/**
* Cut the input text
*
* ## Params
*
* `sentence`: input text
*
* `hmm`: enable HMM or not
*/
cut(sentence: string | Uint8Array, hmm?: boolean | undefined | null): string[]
/** Cut the input text asynchronously */
cutAsync(sentence: string | Uint8Array, hmm?: boolean | undefined | null, signal?: AbortSignal | undefined | null): Promise<unknown>
/**
* Cut the input text, return all possible words
*
* ## Params
*
* `sentence`: input text
*/
cutAll(sentence: string | Uint8Array): string[]
/**
* Cut the input text in search mode
*
* ## Params
*
* `sentence`: input text
*
* `hmm`: enable HMM or not
*/
cutForSearch(sentence: string | Uint8Array, hmm?: boolean | undefined | null): string[]
/**
* Tag the input text
*
* ## Params
*
* `sentence`: input text
*
* `hmm`: enable HMM or not
*/
tag(sentence: string | Uint8Array, hmm?: boolean | undefined | null): Array<TaggedWord>
}
export declare function cutAll(sentence: string | Uint8Array): string[]
export declare class TfIdf {
static withDict(dict: Uint8Array): TfIdf
/** Creates an TfIdf. */
constructor()
/**
* Merges entires from `dict` into the `idf_dict`.
* ```js
* import { Jieba, TfIdf } from '@node-rs/jieba';
*
* import { dict, idf } from '@node-rs/jieba/dict';
*
* // Create default Jieba instance
* const jieba = Jieba.withDict(dict);
*
* // Create TfIdf instance and load initial dictionary
* let initIdf = "生化学 13.900677652
";
* const tfidf = new TfIdf();
* tfidf.loadDict(Buffer.from(initIdf));
*
* // Extract keywords with initial dictionary
* const text = "生化学不是光化学的,";
* const topK = jieba.extract(text, 3);
* // Result would be like:
* // [
* // { keyword: '不是', weight: 4.6335592173333335 },
* // { keyword: '光化学', weight: 4.6335592173333335 },
* // { keyword: '生化学', weight: 4.6335592173333335 }
* // ]
*
* // Load new dictionary with different weights
* let newIdf = "光化学 99.123456789
";
* tfidf.loadDict(Buffer.from(newIdf));
*
* // Extract keywords again with updated dictionary
* const newTopK = jieba.extract(text, 3);
* // Result would be like:
* // [
* // { keyword: '不是', weight: 33.041152263 },
* // { keyword: '光化学', weight: 33.041152263 },
* // { keyword: '生化学', weight: 4.6335592173333335 }
* // ]
* ```
*/
loadDict(dict: Uint8Array): void
setConfig(config: KeywordExtractConfig): void
/**
* Uses TF-IDF algorithm to extract the `top_k` keywords from `sentence`.
*
* If `allowed_pos` is not empty, then only terms matching those parts if
* speech are considered.
*/
extractKeywords(jieba: Jieba, sentence: string, topK: number, allowedPos?: Array<string> | undefined | null): Array<Keyword>
}
export declare function cutForSearch(sentence: string | Uint8Array, hmm?: boolean | undefined | null): string[]
export declare function extract(sentence: string | Uint8Array, topn: number, allowedPos?: string | undefined | null): Array<Keyword>
export interface Keyword {

@@ -16,10 +123,15 @@ keyword: string

export declare function load(): void
/**
* Creates a KeywordExtractConfig state that contains filter criteria as
* well as segmentation configuration for use by keyword extraction
* implementations.
*/
export interface KeywordExtractConfig {
stopWords?: Set<string> | undefined
/** Any segments less than this length will not be considered a Keyword */
minKeywordLength?: number
/** If true, fall back to hmm model if segment cannot be found in the dictionary */
useHmm?: boolean
}
export declare function loadDict(dict: Uint8Array): void
export declare function loadTFIDFDict(dict: Uint8Array): void
export declare function tag(sentence: string | Uint8Array, hmm?: boolean | undefined | null): Array<TaggedWord>
export interface TaggedWord {

@@ -26,0 +138,0 @@ tag: string

+6
-9
// prettier-ignore
/* eslint-disable */
// @ts-nocheck
/* auto-generated by NAPI-RS */
const { readFileSync } = require('fs')
const { createRequire } = require('node:module')
require = createRequire(__filename)
const { readFileSync } = require('node:fs')
let nativeBinding = null

@@ -364,9 +367,3 @@ const loadErrors = []

module.exports.cut = nativeBinding.cut
module.exports.cutAll = nativeBinding.cutAll
module.exports.cutForSearch = nativeBinding.cutForSearch
module.exports.extract = nativeBinding.extract
module.exports.load = nativeBinding.load
module.exports.loadDict = nativeBinding.loadDict
module.exports.loadTFIDFDict = nativeBinding.loadTFIDFDict
module.exports.tag = nativeBinding.tag
module.exports.Jieba = nativeBinding.Jieba
module.exports.TfIdf = nativeBinding.TfIdf
{
"name": "@node-rs/jieba",
"version": "1.10.4",
"version": "2.0.1",
"description": "Fastest Chinese word segmentation in Node.js",

@@ -23,3 +23,7 @@ "keywords": [

"browser.js",
"LICENSE"
"LICENSE",
"dict.txt",
"idf.txt",
"dict.js",
"dict.d.ts"
],

@@ -58,7 +62,7 @@ "napi": {

"artifacts": "napi artifacts -d ../../artifacts",
"bench": "cross-env NODE_ENV=production node benchmark/jieba.js",
"bench": "cross-env NODE_ENV=production node --import @oxc-node/core/register benchmark/jieba.ts",
"build": "napi build --platform --release",
"build:debug": "napi build --platform",
"prepublishOnly": "napi prepublish",
"version": "napi version && git add npm"
"version": "napi version"
},

@@ -69,4 +73,6 @@ "bugs": {

"devDependencies": {
"@napi-rs/cli": "^3.0.0-alpha.63",
"nodejieba": "^3.0.0"
"@napi-rs/cli": "^3.0.0-alpha.64",
"cross-env": "^7.0.3",
"nodejieba": "^3.0.0",
"tinybench": "^3.0.0"
},

@@ -77,19 +83,19 @@ "funding": {

},
"gitHead": "59fddf62f9c0eaa21443a540cbd6d900d0eb2672",
"gitHead": "83ca124b3e3968c945708136d6ff68d0e0549582",
"optionalDependencies": {
"@node-rs/jieba-darwin-x64": "1.10.4",
"@node-rs/jieba-darwin-arm64": "1.10.4",
"@node-rs/jieba-win32-x64-msvc": "1.10.4",
"@node-rs/jieba-linux-x64-gnu": "1.10.4",
"@node-rs/jieba-android-arm64": "1.10.4",
"@node-rs/jieba-linux-arm64-gnu": "1.10.4",
"@node-rs/jieba-linux-arm64-musl": "1.10.4",
"@node-rs/jieba-win32-arm64-msvc": "1.10.4",
"@node-rs/jieba-linux-arm-gnueabihf": "1.10.4",
"@node-rs/jieba-linux-x64-musl": "1.10.4",
"@node-rs/jieba-freebsd-x64": "1.10.4",
"@node-rs/jieba-win32-ia32-msvc": "1.10.4",
"@node-rs/jieba-android-arm-eabi": "1.10.4",
"@node-rs/jieba-wasm32-wasi": "1.10.4"
"@node-rs/jieba-darwin-x64": "2.0.1",
"@node-rs/jieba-darwin-arm64": "2.0.1",
"@node-rs/jieba-win32-x64-msvc": "2.0.1",
"@node-rs/jieba-linux-x64-gnu": "2.0.1",
"@node-rs/jieba-android-arm64": "2.0.1",
"@node-rs/jieba-linux-arm64-gnu": "2.0.1",
"@node-rs/jieba-linux-arm64-musl": "2.0.1",
"@node-rs/jieba-win32-arm64-msvc": "2.0.1",
"@node-rs/jieba-linux-arm-gnueabihf": "2.0.1",
"@node-rs/jieba-linux-x64-musl": "2.0.1",
"@node-rs/jieba-freebsd-x64": "2.0.1",
"@node-rs/jieba-win32-ia32-msvc": "2.0.1",
"@node-rs/jieba-android-arm-eabi": "2.0.1",
"@node-rs/jieba-wasm32-wasi": "2.0.1"
}
}
+42
-45

@@ -17,47 +17,42 @@ # `@node-rs/jieba`

```bash
@node-rs/jieba x 3,763 ops/sec ±1.18% (92 runs sampled)
nodejieba x 2,783 ops/sec ±0.67% (91 runs sampled)
Cut 1184 words bench suite: Fastest is @node-rs/jieba
@node-rs/jieba x 16.10 ops/sec ±1.58% (44 runs sampled)
nodejieba x 9.81 ops/sec ±2.39% (29 runs sampled)
Cut 246568 words bench suite: Fastest is @node-rs/jieba
@node-rs/jieba x 1,739 ops/sec ±0.87% (92 runs sampled)
nodejieba x 931 ops/sec ±1.31% (89 runs sampled)
Tag 1184 words bench suite: Fastest is @node-rs/jieba
@node-rs/jieba x 6.19 ops/sec ±2.01% (20 runs sampled)
nodejieba x 3.06 ops/sec ±5.39% (12 runs sampled)
Tag 246568 words bench suite: Fastest is @node-rs/jieba
Benchmark Cut 1184 words result
┌─────────┬──────────────────┬─────────┬────────────────────┬──────────┬─────────┐
│ (index) │ Task Name │ ops/sec │ Average Time (ns) │ Margin │ Samples │
├─────────┼──────────────────┼─────────┼────────────────────┼──────────┼─────────┤
│ 0 │ '@node-rs/jieba' │ '8,246' │ 121266.9342871014 │ '±0.17%' │ 4124 │
│ 1 │ 'nodejieba' │ '6,392' │ 156439.52799499547 │ '±0.20%' │ 3197 │
└─────────┴──────────────────┴─────────┴────────────────────┴──────────┴─────────┘
Benchmark Cut 246568 words result
┌─────────┬──────────────────┬─────────┬────────────────────┬──────────┬─────────┐
│ (index) │ Task Name │ ops/sec │ Average Time (ns) │ Margin │ Samples │
├─────────┼──────────────────┼─────────┼────────────────────┼──────────┼─────────┤
│ 0 │ '@node-rs/jieba' │ '32' │ 30760703.470588237 │ '±3.01%' │ 17 │
│ 1 │ 'nodejieba' │ '19' │ 51275112.699999996 │ '±2.68%' │ 10 │
└─────────┴──────────────────┴─────────┴────────────────────┴──────────┴─────────┘
Benchmark Tag 1184 words result
┌─────────┬──────────────────┬─────────┬───────────────────┬──────────┬─────────┐
│ (index) │ Task Name │ ops/sec │ Average Time (ns) │ Margin │ Samples │
├─────────┼──────────────────┼─────────┼───────────────────┼──────────┼─────────┤
│ 0 │ '@node-rs/jieba' │ '3,174' │ 315048.8916876547 │ '±0.20%' │ 1588 │
│ 1 │ 'nodejieba' │ '2,672' │ 374213.8870605615 │ '±0.23%' │ 1337 │
└─────────┴──────────────────┴─────────┴───────────────────┴──────────┴─────────┘
Benchmark Tag 246568 words result
┌─────────┬──────────────────┬─────────┬────────────────────┬──────────┬─────────┐
│ (index) │ Task Name │ ops/sec │ Average Time (ns) │ Margin │ Samples │
├─────────┼──────────────────┼─────────┼────────────────────┼──────────┼─────────┤
│ 0 │ '@node-rs/jieba' │ '11' │ 84886341.7999999 │ '±5.74%' │ 10 │
│ 1 │ 'nodejieba' │ '7' │ 125781083.30000004 │ '±4.75%' │ 10 │
└─────────┴──────────────────┴─────────┴────────────────────┴──────────┴─────────┘
```
## Support matrix
| | node12 | node14 | node16 | node18 |
| ---------------- | ------ | ------ | ------ | ------ |
| Windows x64 | ✓ | ✓ | ✓ | ✓ |
| Windows x32 | ✓ | ✓ | ✓ | ✓ |
| Windows arm64 | ✓ | ✓ | ✓ | ✓ |
| macOS x64 | ✓ | ✓ | ✓ | ✓ |
| macOS arm64 | ✓ | ✓ | ✓ | ✓ |
| Linux x64 gnu | ✓ | ✓ | ✓ | ✓ |
| Linux x64 musl | ✓ | ✓ | ✓ | ✓ |
| Linux arm gnu | ✓ | ✓ | ✓ | ✓ |
| Linux arm64 gnu | ✓ | ✓ | ✓ | ✓ |
| Linux arm64 musl | ✓ | ✓ | ✓ | ✓ |
| Android arm64 | ✓ | ✓ | ✓ | ✓ |
| Android armv7 | ✓ | ✓ | ✓ | ✓ |
| FreeBSD x64 | ✓ | ✓ | ✓ | ✓ |
## Usage
```javascript
const { load, cut } = require('@node-rs/jieba')
import { Jieba } from '@node-rs/jieba'
import { dict } from '@node-rs/jieba/dict'
load()
// loadDict(fs.readFileSync(...))
// loadTFIDFDict(fs.readFileSync(...))
// load jieba with the default dict
const jieba = Jieba.withDict(dict)
cut('我们中出了一个叛徒', false)
console.info(jieba.cut('我们中出了一个叛徒', false))

@@ -68,7 +63,10 @@ // ["我们", "中", "出", "了", "一个", "叛徒"]

```javascript
const { load, cut } = require('@node-rs/jieba')
import { Jieba, TfIdf } from '@node-rs/jieba'
import { dict, idf } from '@node-rs/jieba/dict'
load()
const jieba = Jieba.withDict(dict)
const tfIdf = TfIdf.withDict(idf)
extract(
tfIdf.extractKeywords(
jieba,
'今天纽约的天气真好啊,京华大酒店的张尧经理吃了一只北京烤鸭。后天纽约的天气不好,昨天纽约的天气也不好,北京烤鸭真好吃',

@@ -88,11 +86,10 @@ 3,

```javascript
const { loadDict, cut } = require('@node-rs/jieba')
import { Jieba } from '@node-rs/jieba'
const customDict = ['哪行 50', '干一行 51', '行一行 52', '行行 53']
const dictBuffer = Buffer.from(customDict.join('\n'), 'utf-8')
// loadDict doc: https://github.com/fxsjy/jieba?tab=readme-ov-file#%E8%BD%BD%E5%85%A5%E8%AF%8D%E5%85%B8
loadDict(dictBuffer)
const jieba = Jieba.withDict(dictBuffer)
const text = '人要是行干一行行一行,一行行行行行,行行行干哪行都行'
const output = cut(text, false)
const output = jieba.cut(text, false)
console.log('分词结果⤵️\n', output.join('/'))

@@ -99,0 +96,0 @@ // Before: 人/要是/行/干/一行行/一行/,/一行行/行/行/行/,/行/行/行/干/哪/行/都行