alfaaz
Advanced tools
Comparing version 1.0.5 to 1.1.0
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.countLines = exports.countWords = void 0; | ||
const languages_1 = require("./languages"); | ||
const CHINESE_MAX_CODE_POINT = 205743; | ||
@@ -28,35 +29,9 @@ const CHINESE_MIN_CODE_POINT = 11904; | ||
const NEWLINE = "\n"; | ||
insertCharsIntoMap(" ", "\n", "\t", "\v", "*", "/", "&", ":", ";", ".", ",", "?", "="); | ||
// CJK Unified Ideographs 4E00-9FFF Common | ||
insertRangeIntoMap(19968, 40959); | ||
// CJK Unified Ideographs Extension A 3400-4DBF Rare | ||
insertRangeIntoMap(13312, 19903); | ||
// CJK Unified Ideographs Extension B 20000-2A6DF Rare, historic | ||
insertRangeIntoMap(131072, 173791); | ||
// CJK Unified Ideographs Extension C 2A700–2B73F Rare, historic | ||
insertRangeIntoMap(173824, 177983); | ||
// CJK Unified Ideographs Extension D 2B740–2B81F Uncommon, some in current use | ||
insertRangeIntoMap(177984, 178207); | ||
// CJK Unified Ideographs Extension E 2B820–2CEAF Rare, historic | ||
insertRangeIntoMap(178208, 183983); | ||
// CJK Unified Ideographs Extension F 2CEB0–2EBEF Rare, historic | ||
insertRangeIntoMap(183984, 191471); | ||
// CJK Unified Ideographs Extension G 30000–3134F Rare, historic | ||
insertRangeIntoMap(196608, 201551); | ||
// CJK Unified Ideographs Extension H 31350–323AF Rare, historic | ||
insertRangeIntoMap(201552, 205743); | ||
// CJK Compatibility Ideographs F900-FAFF Duplicates, unifiable variants, corporate characters | ||
insertRangeIntoMap(63744, 64255); | ||
// CJK Compatibility Ideographs Supplement 2F800-2FA1F Unifiable variants | ||
insertRangeIntoMap(194560, 195103); | ||
// CJK Radicals / Kangxi Radicals 2F00–2FDF | ||
insertRangeIntoMap(12032, 12255); | ||
// CJK Radicals Supplement 2E80–2EFF | ||
insertRangeIntoMap(11904, 12031); | ||
// CJK Symbols and Punctuation 3000–303F | ||
insertRangeIntoMap(12288, 12351); | ||
// CJK Compatibility 3300-33FF | ||
insertRangeIntoMap(13056, 13311); | ||
// CJK Compatibility Forms FE30-FE4F | ||
insertRangeIntoMap(65072, 65103); | ||
insertCharsIntoMap(" ", "\n", "\t", "\v", "*", "/", "&", ":", ";", ".", ",", "?", "=", "\u0F0B", // Tibetan uses [U+0F0B TIBETAN MARK INTERSYLLABIC TSHEG] (pronounced tsek) to signal the end of a syllable. | ||
"\u1361", // Ethiopic text uses the traditional wordspace character [U+1361 ETHIOPIC WORDSPACE] to indicate word boundaries | ||
"\u200b" // ZERO-WIDTH-SPACE can also be considered a word boundary | ||
); | ||
for (const range of languages_1.UNICODE_RANGES) { | ||
insertRangeIntoMap(range[0], range[1]); | ||
} | ||
function countWords(str) { | ||
@@ -69,5 +44,8 @@ let count = 0; | ||
const bitIndex = charCode % BYTE_SIZE; | ||
const isMatch = (BITMAP[byteIndex] >> bitIndex) & 1; | ||
const byteAtIndex = BITMAP[byteIndex]; | ||
const isMatch = ((byteAtIndex >> bitIndex) & 1) === 1; | ||
// 255 means this is probably a Unicode range match in which case | ||
// we should ignore the value of shouldCount | ||
// @ts-ignore allow JS to naturally coerce boolean into a number | ||
count += isMatch && (shouldCount || charCode > CHINESE_MIN_CODE_POINT); | ||
count += isMatch && (shouldCount || byteAtIndex === 255); | ||
shouldCount = !isMatch; | ||
@@ -74,0 +52,0 @@ } |
{ | ||
"name": "alfaaz", | ||
"version": "1.0.5", | ||
"version": "1.1.0", | ||
"description": "The fastest multilingual word counter that can count millions of words per second.", | ||
@@ -5,0 +5,0 @@ "main": "dist/index.js", |
@@ -117,5 +117,5 @@ <p align="center"> | ||
Counting words is not an uncommon need. Having a fast word counter can greatly increase your productivity. Ultimately the goal is to make all software operate with you having to wait for it. | ||
Counting words is not an uncommon need. Having a fast word counter can greatly increase your productivity. Ultimately the goal is to make all software operate without you having to wait for it. | ||
At its current speed, `alfaaz` can handle millions of words per second. | ||
At its current speed, `alfaaz` can easily handle millions of words per second. | ||
@@ -170,3 +170,3 @@ ## What's the secret sauce? | ||
const LENGTH = 32 / BYTE_SIZE; | ||
const bitmap = new Uint8Array(LENGTH); | ||
const bitmap = new Uint8Array(LENGTH + 1); | ||
@@ -189,3 +189,3 @@ const charCode = 32; | ||
count += (BITMAP[byteIndex] >> bitIndex) & 1; | ||
count += (bitmap[byteIndex] >> bitIndex) & 1; | ||
} | ||
@@ -192,0 +192,0 @@ ``` |
17555
23
160