cspell-trie-lib
Advanced tools
Comparing version 8.6.1 to 8.7.0
import type { BuilderCursor } from './BuilderCursor.js'; | ||
export declare function insertWordsAtCursor(cursor: BuilderCursor, words: Iterable<string>): void; | ||
export declare function commonStringPrefixLen(a: string, b: string): number; | ||
//# sourceMappingURL=cursor-util.d.ts.map |
export function insertWordsAtCursor(cursor, words) { | ||
let prevWord = ''; | ||
for (const word of words) { | ||
const pLen = commonPrefixLen(prevWord, word); | ||
const pLen = commonStrPrefix(prevWord, word); | ||
const stepBack = prevWord.length - pLen; | ||
cursor.backStep(stepBack); | ||
for (let i = pLen; i < word.length; ++i) { | ||
const wLen = word.length; | ||
for (let i = pLen; i < wLen; ++i) { | ||
cursor.insertChar(word[i]); | ||
@@ -15,3 +16,3 @@ } | ||
} | ||
function commonPrefixLen(a, b) { | ||
export function commonStringPrefixLen(a, b) { | ||
let i = 0; | ||
@@ -21,4 +22,18 @@ for (i = 0; i < a.length && a[i] === b[i]; ++i) { | ||
} | ||
if (i) { | ||
// detect second half of a surrogate pair and backup. | ||
const c = a.charCodeAt(i) & 0xffff; | ||
if (c >= 0xdc00 && c <= 0xdfff) { | ||
--i; | ||
} | ||
} | ||
return i; | ||
} | ||
function commonStrPrefix(a, b) { | ||
let i = 0; | ||
for (i = 0; i < a.length && a[i] === b[i]; ++i) { | ||
/* empty */ | ||
} | ||
return i; | ||
} | ||
//# sourceMappingURL=cursor-util.js.map |
@@ -5,2 +5,12 @@ import type { PartialTrieOptions, TrieOptions } from '../trie.js'; | ||
export interface TrieBuilder<T extends TrieData> { | ||
/** | ||
* Use this method to convert a word into an array of characters. | ||
* Since `[...word]` is not equal to `word.split('')` or `word[i]` in some cases, | ||
* this method is used to ensure that the characters are split correctly. | ||
* @see [String.codePointAt](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/codePointAt) | ||
* @see [String.charCodeAt](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/charCodeAt) | ||
* @param word - The word to convert into an array of characters. | ||
* @returns An array of characters, one for each character in the word. | ||
*/ | ||
wordToCharacters(word: string): string[]; | ||
getCursor(): BuilderCursor; | ||
@@ -7,0 +17,0 @@ build(): T; |
@@ -189,6 +189,6 @@ import { CASE_INSENSITIVE_PREFIX, COMPOUND_FIX, FORBID_PREFIX } from '../constants.js'; | ||
function walk(root, word) { | ||
const w = word; | ||
const w = [...word]; | ||
let n = root; | ||
let i = 0; | ||
while (n && i < word.length) { | ||
while (n && i < w.length) { | ||
const h = w[i++]; | ||
@@ -195,0 +195,0 @@ n = n.get(h); |
@@ -5,5 +5,5 @@ import type { ITrieNodeRoot } from '../ITrieNode/ITrieNode.js'; | ||
import { TrieBlob } from './TrieBlob.js'; | ||
export declare function createTrieBlob(words: string[], options?: PartialTrieInfo): TrieBlob; | ||
export declare function createTrieBlob(words: readonly string[], options?: PartialTrieInfo): TrieBlob; | ||
export declare function createTrieBlobFromITrieNodeRoot(root: ITrieNodeRoot): TrieBlob; | ||
export declare function createTrieBlobFromTrieData(trie: TrieData): TrieBlob; | ||
//# sourceMappingURL=createTrieBlob.d.ts.map |
@@ -9,11 +9,14 @@ import type { ITrieNode, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js'; | ||
private nodes; | ||
private charIndex; | ||
private _charIndex; | ||
readonly bitMasksInfo: FastTrieBlobBitMaskInfo; | ||
private charToIndexMap; | ||
private _charToIndexMap; | ||
private _readonly; | ||
private _forbidIdx; | ||
private _iTrieRoot; | ||
wordToCharacters: (word: string) => readonly string[]; | ||
readonly info: Readonly<TrieInfo>; | ||
private constructor(); | ||
private lookUpCharIndex; | ||
private _lookUpCharIndex; | ||
private wordToNodeCharIndexSequence; | ||
private letterToNodeCharIndexSequence; | ||
has(word: string): boolean; | ||
@@ -25,2 +28,7 @@ private _has; | ||
freeze(): this; | ||
toJSON(): { | ||
info: Readonly<TrieInfo>; | ||
nodes: NodeElement[]; | ||
charIndex: readonly string[]; | ||
}; | ||
static create(data: FastTrieBlobInternals, options?: PartialTrieInfo): FastTrieBlob; | ||
@@ -39,4 +47,18 @@ static toITrieNodeRoot(trie: FastTrieBlob): ITrieNodeRoot; | ||
get size(): number; | ||
private _lookupChar; | ||
private _lookupCharIndexNode; | ||
/** Search from nodeIdx for the node index representing the character. */ | ||
private _searchNodeForChar; | ||
get charIndex(): readonly string[]; | ||
static fromTrieBlob(trie: TrieBlob): FastTrieBlob; | ||
} | ||
interface NodeElement { | ||
id: number; | ||
eow: boolean; | ||
n: number; | ||
c: { | ||
c: number | string; | ||
i: number; | ||
}[]; | ||
} | ||
export {}; | ||
//# sourceMappingURL=FastTrieBlob.d.ts.map |
@@ -6,23 +6,32 @@ import { findNode } from '../ITrieNode/trie-util.js'; | ||
import { FastTrieBlobIRoot } from './FastTrieBlobIRoot.js'; | ||
import { NumberSequenceByteDecoderAccumulator } from './NumberSequenceByteDecoderAccumulator.js'; | ||
import { TrieBlob } from './TrieBlob.js'; | ||
export class FastTrieBlob { | ||
nodes; | ||
charIndex; | ||
_charIndex; | ||
bitMasksInfo; | ||
charToIndexMap; | ||
_charToIndexMap; | ||
_readonly = false; | ||
_forbidIdx; | ||
_iTrieRoot; | ||
wordToCharacters; | ||
info; | ||
constructor(nodes, charIndex, bitMasksInfo, options) { | ||
constructor(nodes, _charIndex, bitMasksInfo, options) { | ||
this.nodes = nodes; | ||
this.charIndex = charIndex; | ||
this._charIndex = _charIndex; | ||
this.bitMasksInfo = bitMasksInfo; | ||
this.info = mergeOptionalWithDefaults(options); | ||
this.charToIndexMap = createCharToIndexMap(charIndex); | ||
this._forbidIdx = this._lookupChar(0, this.info.forbiddenWordPrefix); | ||
this.wordToCharacters = (word) => [...word]; | ||
this._charToIndexMap = createCharToIndexMap(_charIndex); | ||
this._forbidIdx = this._searchNodeForChar(0, this.info.forbiddenWordPrefix); | ||
} | ||
lookUpCharIndex(char) { | ||
return this.charToIndexMap[char] ?? -1; | ||
_lookUpCharIndex(char) { | ||
return this._charToIndexMap[char] ?? -1; | ||
} | ||
wordToNodeCharIndexSequence(word) { | ||
return TrieBlob.charactersToCharIndexSequence(this.wordToCharacters(word), (c) => this._lookUpCharIndex(c)); | ||
} | ||
letterToNodeCharIndexSequence(letter) { | ||
return TrieBlob.toCharIndexSequence(this._lookUpCharIndex(letter)); | ||
} | ||
has(word) { | ||
@@ -36,6 +45,7 @@ return this._has(0, word); | ||
const nodes = this.nodes; | ||
const len = word.length; | ||
const charIndexes = this.wordToNodeCharIndexSequence(word); | ||
const len = charIndexes.length; | ||
let node = nodes[nodeIdx]; | ||
for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) { | ||
const letterIdx = this.lookUpCharIndex(word[p]); | ||
const letterIdx = charIndexes[p]; | ||
const count = node.length; | ||
@@ -61,6 +71,7 @@ let i = count - 1; | ||
const nodes = this.nodes; | ||
const stack = [{ nodeIdx: 0, pos: 0, word: '' }]; | ||
const accumulator = NumberSequenceByteDecoderAccumulator.create(); | ||
const stack = [{ nodeIdx: 0, pos: 0, word: '', accumulator }]; | ||
let depth = 0; | ||
while (depth >= 0) { | ||
const { nodeIdx, pos, word } = stack[depth]; | ||
const { nodeIdx, pos, word, accumulator } = stack[depth]; | ||
const node = nodes[nodeIdx]; | ||
@@ -77,3 +88,5 @@ if (!pos && node[0] & NodeMaskEOW) { | ||
const charIdx = entry & NodeMaskChildCharIndex; | ||
const letter = this.charIndex[charIdx]; | ||
const acc = accumulator.clone(); | ||
const letterIdx = acc.decode(charIdx); | ||
const letter = (letterIdx && this._charIndex[letterIdx]) || ''; | ||
++depth; | ||
@@ -84,2 +97,3 @@ stack[depth] = { | ||
word: word + letter, | ||
accumulator: acc, | ||
}; | ||
@@ -119,3 +133,3 @@ } | ||
} | ||
return new TrieBlob(binNodes, this.charIndex, this.info); | ||
return new TrieBlob(binNodes, this._charIndex, this.info); | ||
} | ||
@@ -129,2 +143,9 @@ isReadonly() { | ||
} | ||
toJSON() { | ||
return { | ||
info: this.info, | ||
nodes: nodesToJson(this.nodes), | ||
charIndex: this._charIndex, | ||
}; | ||
} | ||
static create(data, options) { | ||
@@ -134,3 +155,3 @@ return new FastTrieBlob(data.nodes, data.charIndex, extractInfo(data), options); | ||
static toITrieNodeRoot(trie) { | ||
return new FastTrieBlobIRoot(new FastTrieBlobInternals(trie.nodes, trie.charIndex, trie.charToIndexMap, trie.bitMasksInfo), 0, trie.info); | ||
return new FastTrieBlobIRoot(new FastTrieBlobInternals(trie.nodes, trie._charIndex, trie._charToIndexMap, trie.bitMasksInfo), 0, trie.info); | ||
} | ||
@@ -164,3 +185,3 @@ static NodeMaskEOW = TrieBlob.NodeMaskEOW; | ||
} | ||
_lookupChar(nodeIdx, char) { | ||
_lookupCharIndexNode(nodeIdx, charIndex) { | ||
const NodeMaskChildCharIndex = this.bitMasksInfo.NodeMaskChildCharIndex; | ||
@@ -170,3 +191,3 @@ const NodeChildRefShift = this.bitMasksInfo.NodeChildRefShift; | ||
const node = nodes[nodeIdx]; | ||
const letterIdx = this.lookUpCharIndex(char); | ||
const letterIdx = charIndex; | ||
const count = node.length; | ||
@@ -181,2 +202,50 @@ let i = count - 1; | ||
} | ||
/** Search from nodeIdx for the node index representing the character. */ | ||
_searchNodeForChar(nodeIdx, char) { | ||
const charIndexes = this.letterToNodeCharIndexSequence(char); | ||
let idx = nodeIdx; | ||
for (let i = 0; i < charIndexes.length; ++i) { | ||
idx = this._lookupCharIndexNode(idx, charIndexes[i]); | ||
if (!idx) | ||
return 0; | ||
} | ||
return idx; | ||
} | ||
get charIndex() { | ||
return [...this._charIndex]; | ||
} | ||
static fromTrieBlob(trie) { | ||
const bitMasksInfo = { | ||
NodeMaskEOW: TrieBlob.NodeMaskEOW, | ||
NodeMaskChildCharIndex: TrieBlob.NodeMaskChildCharIndex, | ||
NodeChildRefShift: TrieBlob.NodeChildRefShift, | ||
}; | ||
const trieNodesBin = TrieBlob.nodesView(trie); | ||
const nodeOffsets = []; | ||
for (let offset = 0; offset < trieNodesBin.length; offset += (trieNodesBin[offset] & TrieBlob.NodeMaskNumChildren) + 1) { | ||
nodeOffsets.push(offset); | ||
} | ||
const offsetToNodeIndex = new Map(nodeOffsets.map((offset, i) => [offset, i])); | ||
const nodes = new Array(nodeOffsets.length); | ||
for (let i = 0; i < nodes.length; ++i) { | ||
const offset = nodeOffsets[i]; | ||
const n = trieNodesBin[offset]; | ||
const eow = n & TrieBlob.NodeMaskEOW; | ||
const count = n & TrieBlob.NodeMaskNumChildren; | ||
const node = new Array(count + 1); | ||
node[0] = eow; | ||
nodes[i] = node; | ||
for (let j = 1; j <= count; ++j) { | ||
const n = trieNodesBin[offset + j]; | ||
const charIndex = n & TrieBlob.NodeMaskChildCharIndex; | ||
const nodeIndex = n >>> TrieBlob.NodeChildRefShift; | ||
const idx = offsetToNodeIndex.get(nodeIndex); | ||
if (idx === undefined) { | ||
throw new Error(`Invalid node index ${nodeIndex}`); | ||
} | ||
node[j] = (idx << TrieBlob.NodeChildRefShift) | charIndex; | ||
} | ||
} | ||
return new FastTrieBlob(nodes, trie.charIndex, bitMasksInfo, trie.info); | ||
} | ||
} | ||
@@ -192,2 +261,14 @@ function createCharToIndexMap(charIndex) { | ||
} | ||
function nodesToJson(nodes) { | ||
function nodeElement(node, index) { | ||
const eow = !!(node[0] & TrieBlob.NodeMaskEOW); | ||
const children = node.slice(1).map((n) => ({ | ||
c: ('00' + (n & TrieBlob.NodeMaskChildCharIndex).toString(16)).slice(-2), | ||
i: n >>> TrieBlob.NodeChildRefShift, | ||
})); | ||
return { id: index, eow, n: node.length, c: children }; | ||
} | ||
const elements = nodes.map((n, i) => nodeElement(n, i)); | ||
return elements; | ||
} | ||
//# sourceMappingURL=FastTrieBlob.js.map |
@@ -14,2 +14,3 @@ import type { BuilderCursor, TrieBuilder } from '../Builder/index.js'; | ||
private _options; | ||
wordToCharacters: (word: string) => string[]; | ||
readonly bitMasksInfo: FastTrieBlobBitMaskInfo; | ||
@@ -20,2 +21,4 @@ constructor(options?: PartialTrieInfo, bitMasksInfo?: FastTrieBlobBitMaskInfo); | ||
private getCharIndex; | ||
private wordToNodeCharIndexSequence; | ||
private letterToNodeCharIndexSequence; | ||
insert(word: string | Iterable<string> | string[]): this; | ||
@@ -29,3 +32,3 @@ getCursor(): BuilderCursor; | ||
build(): FastTrieBlob; | ||
static fromWordList(words: string[] | Iterable<string>, options?: PartialTrieInfo): FastTrieBlob; | ||
static fromWordList(words: readonly string[] | Iterable<string>, options?: PartialTrieInfo): FastTrieBlob; | ||
static fromTrieRoot(root: TrieRoot): FastTrieBlob; | ||
@@ -32,0 +35,0 @@ static NodeMaskEOW: number; |
import { assert } from '../utils/assert.js'; | ||
import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js'; | ||
import { assertValidUtf16Character } from '../utils/text.js'; | ||
import { FastTrieBlob } from './FastTrieBlob.js'; | ||
@@ -15,2 +16,3 @@ import { FastTrieBlobInternals } from './FastTrieBlobInternals.js'; | ||
_options; | ||
wordToCharacters = (word) => [...word]; | ||
bitMasksInfo; | ||
@@ -41,2 +43,8 @@ constructor(options, bitMasksInfo = FastTrieBlobBuilder.DefaultBitMaskInfo) { | ||
} | ||
wordToNodeCharIndexSequence(word) { | ||
return TrieBlob.charactersToCharIndexSequence(this.wordToCharacters(word), (c) => this.getCharIndex(c)); | ||
} | ||
letterToNodeCharIndexSequence(letter) { | ||
return TrieBlob.toCharIndexSequence(this.getCharIndex(letter)); | ||
} | ||
insert(word) { | ||
@@ -81,6 +89,39 @@ if (this.isReadonly()) { | ||
const nodes = this.nodes; | ||
const stack = [{ nodeIdx: 0, pos: 0 }]; | ||
const stack = [{ nodeIdx: 0, pos: 0, dCount: 1, ps: '' }]; | ||
let nodeIdx = 0; | ||
let depth = 0; | ||
const insertChar = (char) => { | ||
const cc = char.charCodeAt(0) & 0xfc00; | ||
// Work with partial surrogate pairs. | ||
if (cc === 0xd800 && char.length == 1) { | ||
// We have a high surrogate | ||
const s = stack[depth]; | ||
const ns = stack[++depth]; | ||
if (ns) { | ||
ns.nodeIdx = s.nodeIdx; | ||
ns.pos = s.pos; | ||
ns.dCount = 1; | ||
ns.ps = char; | ||
} | ||
else { | ||
stack[depth] = { nodeIdx: s.nodeIdx, pos: s.pos, dCount: 1, ps: char }; | ||
} | ||
return; | ||
} | ||
if (stack[depth].ps) { | ||
char = stack[depth].ps + char; | ||
assertValidUtf16Character(char); | ||
} | ||
const indexSeq = this.letterToNodeCharIndexSequence(char); | ||
for (let i = 0; i < indexSeq.length; ++i) { | ||
insertCharIndexes(indexSeq[i], i + 1); | ||
} | ||
}; | ||
/** | ||
* A single character can result in multiple nodes being created | ||
* because it takes multiple bytes to represent a character. | ||
* @param charIndex - partial character index. | ||
* @param char - the source character | ||
*/ | ||
const insertCharIndexes = (charIndex, dCount) => { | ||
// console.warn('i %o at %o', char, nodeIdx); | ||
@@ -97,3 +138,3 @@ if (nodes[nodeIdx] && Object.isFrozen(nodes[nodeIdx])) { | ||
nodes[nodeIdx] = node; | ||
const letterIdx = this.getCharIndex(char); | ||
const letterIdx = charIndex; | ||
const hasIdx = childPos(node, letterIdx); | ||
@@ -107,5 +148,7 @@ const childIdx = hasIdx ? node[hasIdx] >>> NodeChildRefShift : nodes.length; | ||
s.pos = pos; | ||
s.dCount = dCount; | ||
s.ps = ''; | ||
} | ||
else { | ||
stack[depth] = { nodeIdx, pos }; | ||
stack[depth] = { nodeIdx, pos, dCount, ps: '' }; | ||
} | ||
@@ -149,3 +192,5 @@ nodeIdx = childIdx; | ||
assert(num <= depth && num > 0); | ||
depth -= num; | ||
for (; num > 0; --num) { | ||
depth -= stack[depth].dCount; | ||
} | ||
nodeIdx = stack[depth + 1].nodeIdx; | ||
@@ -170,6 +215,7 @@ }; | ||
const nodes = this.nodes; | ||
const len = word.length; | ||
const charIndexes = this.wordToNodeCharIndexSequence(word); | ||
const len = charIndexes.length; | ||
let nodeIdx = 0; | ||
for (let p = 0; p < len; ++p) { | ||
const letterIdx = this.getCharIndex(word[p]); | ||
const letterIdx = charIndexes[p]; | ||
const node = nodes[nodeIdx]; | ||
@@ -207,7 +253,8 @@ const count = node.length; | ||
const nodes = this.nodes; | ||
const len = word.length; | ||
const charIndexes = this.wordToNodeCharIndexSequence(word); | ||
const len = charIndexes.length; | ||
let nodeIdx = 0; | ||
let node = nodes[nodeIdx]; | ||
for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) { | ||
const letterIdx = this.charToIndexMap[word[p]]; | ||
const letterIdx = charIndexes[p]; | ||
const count = node.length; | ||
@@ -245,2 +292,3 @@ let i = count - 1; | ||
const NodeChildRefShift = bitMasksInfo.NodeChildRefShift; | ||
const NodeCharIndexMask = bitMasksInfo.NodeMaskChildCharIndex; | ||
const NodeMaskEOW = bitMasksInfo.NodeMaskEOW; | ||
@@ -265,11 +313,33 @@ const tf = new FastTrieBlobBuilder(undefined, bitMasksInfo); | ||
const children = Object.entries(n.c); | ||
node.length = children.length + 1; | ||
for (let p = 0; p < children.length; ++p) { | ||
const [char, childNode] = children[p]; | ||
const letterIdx = tf.getCharIndex(char); | ||
const childIdx = walk(childNode); | ||
node[p + 1] = (childIdx << NodeChildRefShift) | letterIdx; | ||
addCharToNode(node, char, childNode); | ||
} | ||
return nodeIdx; | ||
} | ||
function resolveChild(node, charIndex) { | ||
let i = 1; | ||
for (i = 1; i < node.length && (node[i] & NodeCharIndexMask) !== charIndex; ++i) { | ||
// empty | ||
} | ||
return i; | ||
} | ||
function addCharToNode(node, char, n) { | ||
const indexSeq = tf.letterToNodeCharIndexSequence(char); | ||
for (const idx of indexSeq.slice(0, -1)) { | ||
const pos = resolveChild(node, idx); | ||
if (pos < node.length) { | ||
node = tf.nodes[node[pos] >>> NodeChildRefShift]; | ||
} | ||
else { | ||
const next = [0]; | ||
const nodeIdx = tf.nodes.push(next) - 1; | ||
node[pos] = (nodeIdx << NodeChildRefShift) | idx; | ||
node = next; | ||
} | ||
} | ||
const letterIdx = indexSeq[indexSeq.length - 1]; | ||
const i = node.push(letterIdx) - 1; | ||
node[i] = (walk(n) << NodeChildRefShift) | letterIdx; | ||
} | ||
walk(root); | ||
@@ -276,0 +346,0 @@ return tf.build(); |
import type { FastTrieBlobBitMaskInfo } from './FastTrieBlobBitMaskInfo.js'; | ||
export declare class FastTrieBlobInternals implements FastTrieBlobBitMaskInfo { | ||
readonly nodes: number[][]; | ||
readonly charIndex: string[]; | ||
readonly charIndex: readonly string[]; | ||
readonly charToIndexMap: Readonly<Record<string, number>>; | ||
@@ -9,4 +9,5 @@ readonly NodeMaskEOW: number; | ||
readonly NodeChildRefShift: number; | ||
constructor(nodes: number[][], charIndex: string[], charToIndexMap: Readonly<Record<string, number>>, maskInfo: FastTrieBlobBitMaskInfo); | ||
readonly isIndexDecoderNeeded: boolean; | ||
constructor(nodes: number[][], charIndex: readonly string[], charToIndexMap: Readonly<Record<string, number>>, maskInfo: FastTrieBlobBitMaskInfo); | ||
} | ||
//# sourceMappingURL=FastTrieBlobInternals.d.ts.map |
@@ -0,1 +1,2 @@ | ||
import { NumberSequenceByteEncoderDecoder } from './NumberSequenceByteDecoderAccumulator.js'; | ||
export class FastTrieBlobInternals { | ||
@@ -8,2 +9,3 @@ nodes; | ||
NodeChildRefShift; | ||
isIndexDecoderNeeded; | ||
constructor(nodes, charIndex, charToIndexMap, maskInfo) { | ||
@@ -17,4 +19,5 @@ this.nodes = nodes; | ||
this.NodeChildRefShift = NodeChildRefShift; | ||
this.isIndexDecoderNeeded = charIndex.length > NumberSequenceByteEncoderDecoder.MaxCharIndex; | ||
} | ||
} | ||
//# sourceMappingURL=FastTrieBlobInternals.js.map |
import type { ITrieNode, ITrieNodeId, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js'; | ||
import type { TrieInfo } from '../ITrieNode/TrieInfo.js'; | ||
import type { FastTrieBlobInternals } from './FastTrieBlobInternals.js'; | ||
type Node = readonly number[]; | ||
type NodeIndex = number; | ||
declare class FastTrieBlobINode implements ITrieNode { | ||
readonly trie: FastTrieBlobInternals; | ||
readonly nodeIdx: number; | ||
readonly nodeIdx: NodeIndex; | ||
readonly id: number; | ||
readonly size: number; | ||
readonly node: number[]; | ||
readonly node: Node; | ||
readonly eow: boolean; | ||
charToIdx: Record<string, number> | undefined; | ||
private _keys; | ||
constructor(trie: FastTrieBlobInternals, nodeIdx: number); | ||
private _count; | ||
private _size; | ||
private _chained; | ||
private _nodesEntries; | ||
private _entries; | ||
private _values; | ||
protected charToIdx: Readonly<Record<string, NodeIndex>> | undefined; | ||
constructor(trie: FastTrieBlobInternals, nodeIdx: NodeIndex); | ||
/** get keys to children */ | ||
keys(): readonly string[]; | ||
/** get keys to children */ | ||
private calcKeys; | ||
values(): readonly ITrieNode[]; | ||
@@ -25,2 +31,6 @@ entries(): readonly (readonly [string, ITrieNode])[]; | ||
getCharToIdxMap(): Record<string, number>; | ||
private containsChainedIndexes; | ||
private getNodesEntries; | ||
private walkChainedIndexes; | ||
get size(): number; | ||
} | ||
@@ -27,0 +37,0 @@ export declare class FastTrieBlobIRoot extends FastTrieBlobINode implements ITrieNodeRoot { |
@@ -0,3 +1,6 @@ | ||
import { NumberSequenceByteDecoderAccumulator, NumberSequenceByteEncoderDecoder, } from './NumberSequenceByteDecoderAccumulator.js'; | ||
const SpecialCharIndexMask = NumberSequenceByteEncoderDecoder.SpecialCharIndexMask; | ||
const EmptyKeys = Object.freeze([]); | ||
const EmptyNodes = Object.freeze([]); | ||
const EmptyEntries = Object.freeze([]); | ||
class FastTrieBlobINode { | ||
@@ -7,7 +10,12 @@ trie; | ||
id; | ||
size; | ||
node; | ||
eow; | ||
_keys; | ||
_count; | ||
_size; | ||
_chained; | ||
_nodesEntries; | ||
_entries; | ||
_values; | ||
charToIdx; | ||
_keys; | ||
constructor(trie, nodeIdx) { | ||
@@ -18,44 +26,31 @@ this.trie = trie; | ||
this.node = node; | ||
const value = node[0]; | ||
this.eow = !!(value & trie.NodeMaskEOW); | ||
this.size = node.length - 1; | ||
this.eow = !!(node[0] & trie.NodeMaskEOW); | ||
this._count = node.length - 1; | ||
this.id = nodeIdx; | ||
} | ||
/** get keys to children */ | ||
keys() { | ||
return (this._keys ??= this.calcKeys()); | ||
} | ||
/** get keys to children */ | ||
calcKeys() { | ||
if (!this.size) | ||
if (this._keys) | ||
return this._keys; | ||
if (!this._count) | ||
return EmptyKeys; | ||
const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex; | ||
const charIndex = this.trie.charIndex; | ||
const keys = Array(this.size); | ||
const len = this.size; | ||
const node = this.trie.nodes[this.nodeIdx]; | ||
for (let i = 0; i < len; ++i) { | ||
const entry = node[i + 1]; | ||
const charIdx = entry & NodeMaskChildCharIndex; | ||
keys[i] = charIndex[charIdx]; | ||
} | ||
return Object.freeze(keys); | ||
this._keys = this.getNodesEntries().map(([key]) => key); | ||
return this._keys; | ||
} | ||
values() { | ||
if (!this.size) | ||
if (!this._count) | ||
return EmptyNodes; | ||
const nodes = Array(this.size); | ||
for (let i = 0; i < this.size; ++i) { | ||
nodes[i] = this.child(i); | ||
} | ||
return nodes; | ||
if (this._values) | ||
return this._values; | ||
this._values = this.entries().map(([, value]) => value); | ||
return this._values; | ||
} | ||
entries() { | ||
const keys = this.keys(); | ||
const values = this.values(); | ||
const len = keys.length; | ||
const entries = Array(len); | ||
for (let i = 0; i < len; ++i) { | ||
entries[i] = [keys[i], values[i]]; | ||
} | ||
return entries; | ||
if (this._entries) | ||
return this._entries; | ||
if (!this._count) | ||
return EmptyEntries; | ||
const entries = this.getNodesEntries(); | ||
this._entries = entries.map(([key, value]) => [key, new FastTrieBlobINode(this.trie, value)]); | ||
return this._entries; | ||
} | ||
@@ -74,8 +69,11 @@ /** get child ITrieNode */ | ||
hasChildren() { | ||
return this.size > 0; | ||
return this._count > 0; | ||
} | ||
child(keyIdx) { | ||
const node = this.trie.nodes[this.nodeIdx]; | ||
const nodeIdx = node[keyIdx + 1] >>> this.trie.NodeChildRefShift; | ||
return new FastTrieBlobINode(this.trie, nodeIdx); | ||
if (!this._values && !this.containsChainedIndexes()) { | ||
const n = this.node[keyIdx + 1]; | ||
const nodeIdx = n >>> this.trie.NodeChildRefShift; | ||
return new FastTrieBlobINode(this.trie, nodeIdx); | ||
} | ||
return this.values()[keyIdx]; | ||
} | ||
@@ -94,2 +92,94 @@ getCharToIdxMap() { | ||
} | ||
containsChainedIndexes() { | ||
if (this._chained !== undefined) | ||
return this._chained; | ||
if (!this._count || !this.trie.isIndexDecoderNeeded) { | ||
this._chained = false; | ||
return false; | ||
} | ||
// scan the node to see if there are encoded entries. | ||
let found = false; | ||
const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex; | ||
const len = this._count; | ||
const node = this.node; | ||
for (let i = 1; i <= len && !found; ++i) { | ||
const entry = node[i]; | ||
const charIdx = entry & NodeMaskChildCharIndex; | ||
found = (charIdx & SpecialCharIndexMask) === SpecialCharIndexMask; | ||
} | ||
this._chained = !!found; | ||
return this._chained; | ||
} | ||
getNodesEntries() { | ||
if (this._nodesEntries) | ||
return this._nodesEntries; | ||
if (!this.containsChainedIndexes()) { | ||
const entries = Array(this._count); | ||
const nodes = this.node; | ||
const charIndex = this.trie.charIndex; | ||
const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex; | ||
const RefShift = this.trie.NodeChildRefShift; | ||
for (let i = 0; i < this._count; ++i) { | ||
const entry = nodes[i + 1]; | ||
const charIdx = entry & NodeMaskChildCharIndex; | ||
entries[i] = [charIndex[charIdx], entry >>> RefShift]; | ||
} | ||
this._nodesEntries = entries; | ||
return entries; | ||
} | ||
this._nodesEntries = this.walkChainedIndexes(); | ||
return this._nodesEntries; | ||
} | ||
walkChainedIndexes() { | ||
const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex; | ||
const NodeChildRefShift = this.trie.NodeChildRefShift; | ||
const nodes = this.trie.nodes; | ||
const acc = NumberSequenceByteDecoderAccumulator.create(); | ||
const stack = [{ n: this.node, c: 1, acc }]; | ||
let depth = 0; | ||
/** there is at least this._count number of entries, more if there are nested indexes. */ | ||
const entries = Array(this._count); | ||
let eIdx = 0; | ||
const charIndex = this.trie.charIndex; | ||
while (depth >= 0) { | ||
const s = stack[depth]; | ||
const { n: node, c: off } = s; | ||
if (off >= node.length) { | ||
--depth; | ||
continue; | ||
} | ||
++s.c; | ||
const entry = node[off]; | ||
const charIdx = entry & NodeMaskChildCharIndex; | ||
const acc = s.acc.clone(); | ||
const letterIdx = acc.decode(charIdx); | ||
if (letterIdx !== undefined) { | ||
const char = charIndex[letterIdx]; | ||
const nodeIdx = entry >>> NodeChildRefShift; | ||
entries[eIdx++] = [char, nodeIdx]; | ||
continue; | ||
} | ||
const idx = entry >>> NodeChildRefShift; | ||
const ss = stack[++depth]; | ||
if (ss) { | ||
ss.n = nodes[idx]; | ||
ss.c = 1; | ||
ss.acc = acc; | ||
} | ||
else { | ||
stack[depth] = { n: nodes[idx], c: 1, acc }; | ||
} | ||
} | ||
return entries; | ||
} | ||
get size() { | ||
if (this._size === undefined) { | ||
if (!this.containsChainedIndexes()) { | ||
this._size = this._count; | ||
return this._size; | ||
} | ||
this._size = this.getNodesEntries().length; | ||
} | ||
return this._size; | ||
} | ||
} | ||
@@ -96,0 +186,0 @@ export class FastTrieBlobIRoot extends FastTrieBlobINode { |
@@ -6,3 +6,3 @@ import type { ITrieNode, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js'; | ||
protected nodes: Uint32Array; | ||
protected charIndex: string[]; | ||
readonly charIndex: readonly string[]; | ||
protected charToIndexMap: Record<string, number>; | ||
@@ -13,3 +13,7 @@ readonly info: Readonly<TrieInfo>; | ||
private _iTrieRoot; | ||
constructor(nodes: Uint32Array, charIndex: string[], info: PartialTrieInfo); | ||
wordToCharacters: (word: string) => string[]; | ||
constructor(nodes: Uint32Array, charIndex: readonly string[], info: PartialTrieInfo); | ||
private _lookUpCharIndex; | ||
private wordToNodeCharIndexSequence; | ||
private letterToNodeCharIndexSequence; | ||
has(word: string): boolean; | ||
@@ -22,9 +26,22 @@ isForbiddenWord(word: string): boolean; | ||
private _has; | ||
/** | ||
* Find the node index for the given character. | ||
* @param nodeIdx - node index to start the search | ||
* @param char - character to look for | ||
* @returns | ||
*/ | ||
private _lookupNode; | ||
/** | ||
* Find the node index for the given character. | ||
* @param nodeIdx - node index to start the search | ||
* @param char - character to look for | ||
* @returns | ||
*/ | ||
private _lookupNodeByCharIndexSeq; | ||
words(): Iterable<string>; | ||
get size(): number; | ||
toJSON(): { | ||
charIndex: string[]; | ||
options: Readonly<TrieInfo>; | ||
nodes: string[]; | ||
nodes: NodeElement[]; | ||
charIndex: readonly string[]; | ||
}; | ||
@@ -37,4 +54,40 @@ encodeBin(): Uint8Array; | ||
static NodeChildRefShift: number; | ||
/** | ||
* Only 8 bits are reserved for the character index. | ||
* The max index is {@link TrieBlob.SpecialCharIndexMask} - 1. | ||
* Node chaining is used to reference higher character indexes. | ||
* - @see {@link TrieBlob.SpecialCharIndexMask} | ||
* - @see {@link TrieBlob.MaxCharIndex} | ||
*/ | ||
static NodeMaskChildCharIndex: number; | ||
/** SpecialCharIndexMask is used to indicate a node chain */ | ||
static SpecialCharIndexMask: number; | ||
static MaxCharIndex: number; | ||
/** | ||
* SpecialCharIndex8bit is used to indicate a node chain. Where the final character index is 248 + the index found in the next node. | ||
*/ | ||
static SpecialCharIndex8bit: number; | ||
static SpecialCharIndex16bit: number; | ||
static SpecialCharIndex24bit: number; | ||
/** | ||
* Since it is only possible to store single byte indexes, a multi-byte index is stored as a sequence of indexes chained between nodes. | ||
* @param charIndex - character index to convert to a sequence of indexes | ||
* @returns encoded index values. | ||
*/ | ||
static toCharIndexSequence(charIndex: number): number[]; | ||
static fromCharIndexSequence(charIndexes: Iterable<number>): Iterable<number>; | ||
static charactersToCharIndexSequence(chars: readonly string[], charToIndexMap: Readonly<Record<string, number>> | ((char: string) => number)): number[]; | ||
static charIndexSequenceToCharacters(charIndexSequence: number[], charIndex: readonly string[] | Readonly<Record<number, string>>): readonly string[]; | ||
static nodesView(trie: TrieBlob): Readonly<Uint32Array>; | ||
} | ||
interface NodeElement { | ||
id: number; | ||
eow: boolean; | ||
c: { | ||
c: number | string; | ||
o: number; | ||
}[]; | ||
n: number; | ||
} | ||
export {}; | ||
//# sourceMappingURL=TrieBlob.d.ts.map |
import { defaultTrieInfo } from '../constants.js'; | ||
import { findNode } from '../ITrieNode/trie-util.js'; | ||
import { mergeOptionalWithDefaults } from '../utils/mergeOptionalWithDefaults.js'; | ||
import { NumberSequenceByteDecoderAccumulator, NumberSequenceByteEncoderDecoder, } from './NumberSequenceByteDecoderAccumulator.js'; | ||
import { TrieBlobInternals, TrieBlobIRoot } from './TrieBlobIRoot.js'; | ||
@@ -38,2 +39,3 @@ const NodeHeaderNumChildrenBits = 8; | ||
_iTrieRoot; | ||
wordToCharacters; | ||
constructor(nodes, charIndex, info) { | ||
@@ -43,3 +45,5 @@ this.nodes = nodes; | ||
this.info = mergeOptionalWithDefaults(info); | ||
this.wordToCharacters = (word) => [...word]; | ||
this.charToIndexMap = Object.create(null); | ||
Object.freeze(this.charIndex); | ||
for (let i = 0; i < charIndex.length; ++i) { | ||
@@ -52,2 +56,11 @@ const char = charIndex[i]; | ||
} | ||
_lookUpCharIndex = (char) => { | ||
return this.charToIndexMap[char] || 0; | ||
}; | ||
wordToNodeCharIndexSequence(word) { | ||
return TrieBlob.charactersToCharIndexSequence(this.wordToCharacters(word), this._lookUpCharIndex); | ||
} | ||
letterToNodeCharIndexSequence(letter) { | ||
return TrieBlob.toCharIndexSequence(this._lookUpCharIndex(letter)); | ||
} | ||
has(word) { | ||
@@ -82,7 +95,7 @@ return this._has(0, word); | ||
const nodes = this.nodes; | ||
const len = word.length; | ||
const charToIndexMap = this.charToIndexMap; | ||
const wordIndexes = this.wordToNodeCharIndexSequence(word); | ||
const len = wordIndexes.length; | ||
let node = nodes[nodeIdx]; | ||
for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) { | ||
const letterIdx = charToIndexMap[word[p]]; | ||
const letterIdx = wordIndexes[p]; | ||
const count = node & NodeMaskNumChildren; | ||
@@ -101,3 +114,29 @@ let i = count; | ||
} | ||
/** | ||
* Find the node index for the given character. | ||
* @param nodeIdx - node index to start the search | ||
* @param char - character to look for | ||
* @returns | ||
*/ | ||
_lookupNode(nodeIdx, char) { | ||
const indexSeq = this.letterToNodeCharIndexSequence(char); | ||
const len = indexSeq.length; | ||
if (!len) | ||
return undefined; | ||
let currNodeIdx = nodeIdx; | ||
for (let i = 0; i < len; ++i) { | ||
currNodeIdx = this._lookupNodeByCharIndexSeq(currNodeIdx, indexSeq[i]); | ||
if (currNodeIdx === undefined) { | ||
return undefined; | ||
} | ||
} | ||
return currNodeIdx; | ||
} | ||
/** | ||
* Find the node index for the given character. | ||
* @param nodeIdx - node index to start the search | ||
* @param char - character to look for | ||
* @returns | ||
*/ | ||
_lookupNodeByCharIndexSeq(nodeIdx, index) { | ||
const NodeMaskNumChildren = TrieBlob.NodeMaskNumChildren; | ||
@@ -107,5 +146,4 @@ const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex; | ||
const nodes = this.nodes; | ||
const charToIndexMap = this.charToIndexMap; | ||
const node = nodes[nodeIdx]; | ||
const letterIdx = charToIndexMap[char]; | ||
const letterIdx = index; | ||
const count = node & NodeMaskNumChildren; | ||
@@ -126,6 +164,8 @@ let i = count; | ||
const nodes = this.nodes; | ||
const stack = [{ nodeIdx: 0, pos: 0, word: '' }]; | ||
const stack = [ | ||
{ nodeIdx: 0, pos: 0, word: '', acc: NumberSequenceByteDecoderAccumulator.create() }, | ||
]; | ||
let depth = 0; | ||
while (depth >= 0) { | ||
const { nodeIdx, pos, word } = stack[depth]; | ||
const { nodeIdx, pos, word, acc } = stack[depth]; | ||
const node = nodes[nodeIdx]; | ||
@@ -143,4 +183,5 @@ // pos is 0 when first entering a node | ||
const entry = nodes[nodeIdx + nextPos]; | ||
const charIdx = entry & NodeMaskChildCharIndex; | ||
const letter = this.charIndex[charIdx]; | ||
const nAcc = acc.clone(); | ||
const charIdx = nAcc.decode(entry & NodeMaskChildCharIndex); | ||
const letter = (charIdx && this.charIndex[charIdx]) || ''; | ||
++depth; | ||
@@ -151,2 +192,3 @@ stack[depth] = { | ||
word: word + letter, | ||
acc: nAcc, | ||
}; | ||
@@ -171,5 +213,5 @@ } | ||
return { | ||
options: this.info, | ||
nodes: nodesToJson(this.nodes), | ||
charIndex: this.charIndex, | ||
options: this.info, | ||
nodes: splitString(Buffer.from(this.nodes.buffer, 128).toString('base64')), | ||
}; | ||
@@ -179,3 +221,3 @@ } | ||
const charIndex = Buffer.from(this.charIndex.join('\n')); | ||
const charIndexLen = (charIndex.byteLength + 3) & ~3; | ||
const charIndexLen = (charIndex.byteLength + 3) & ~3; // round up to the nearest 4 byte boundary. | ||
const nodeOffset = HEADER_SIZE + charIndexLen; | ||
@@ -196,3 +238,4 @@ const size = nodeOffset + this.nodes.length * 4; | ||
buffer.set(nodeData, nodeOffset); | ||
// dumpBin(nodeData); | ||
// console.log('encodeBin: %o', this.toJSON()); | ||
// console.log('encodeBin: buf %o nodes %o', buffer, this.nodes); | ||
return buffer; | ||
@@ -220,4 +263,6 @@ } | ||
.split('\n'); | ||
const nodes = new Uint32Array(blob.buffer).subarray(offsetNodes / 4, offsetNodes / 4 + lenNodes); | ||
return new TrieBlob(nodes, charIndex, defaultTrieInfo); | ||
const nodes = new Uint32Array(blob.buffer, offsetNodes, lenNodes); | ||
const trieBlob = new TrieBlob(nodes, charIndex, defaultTrieInfo); | ||
// console.log('decodeBin: %o', trieBlob.toJSON()); | ||
return trieBlob; | ||
} | ||
@@ -228,3 +273,41 @@ static NodeMaskEOW = 0x00000100; | ||
static NodeChildRefShift = 8; | ||
/** | ||
* Only 8 bits are reserved for the character index. | ||
* The max index is {@link TrieBlob.SpecialCharIndexMask} - 1. | ||
* Node chaining is used to reference higher character indexes. | ||
* - @see {@link TrieBlob.SpecialCharIndexMask} | ||
* - @see {@link TrieBlob.MaxCharIndex} | ||
*/ | ||
static NodeMaskChildCharIndex = 0x000000ff; | ||
/** SpecialCharIndexMask is used to indicate a node chain */ | ||
static SpecialCharIndexMask = 0xf8; | ||
static MaxCharIndex = this.SpecialCharIndexMask - 1; | ||
/** | ||
* SpecialCharIndex8bit is used to indicate a node chain. Where the final character index is 248 + the index found in the next node. | ||
*/ | ||
static SpecialCharIndex8bit = this.SpecialCharIndexMask | 0x01; | ||
static SpecialCharIndex16bit = this.SpecialCharIndexMask | 0x02; | ||
static SpecialCharIndex24bit = this.SpecialCharIndexMask | 0x03; | ||
/** | ||
* Since it is only possible to store single byte indexes, a multi-byte index is stored as a sequence of indexes chained between nodes. | ||
* @param charIndex - character index to convert to a sequence of indexes | ||
* @returns encoded index values. | ||
*/ | ||
static toCharIndexSequence(charIndex) { | ||
return NumberSequenceByteEncoderDecoder.encode(charIndex); | ||
} | ||
static fromCharIndexSequence(charIndexes) { | ||
return NumberSequenceByteEncoderDecoder.decodeSequence(charIndexes); | ||
} | ||
static charactersToCharIndexSequence(chars, charToIndexMap) { | ||
const fn = typeof charToIndexMap === 'function' ? charToIndexMap : (c) => charToIndexMap[c]; | ||
return chars.map(fn).flatMap((c) => this.toCharIndexSequence(c)); | ||
} | ||
static charIndexSequenceToCharacters(charIndexSequence, charIndex) { | ||
const chars = [...this.fromCharIndexSequence(charIndexSequence)].map((c) => charIndex[c]); | ||
return chars; | ||
} | ||
static nodesView(trie) { | ||
return new Uint32Array(trie.nodes); | ||
} | ||
} | ||
@@ -251,9 +334,25 @@ function isLittleEndian() { | ||
} | ||
function splitString(s, len = 64) { | ||
const splits = []; | ||
for (let i = 0; i < s.length; i += len) { | ||
splits.push(s.slice(i, i + len)); | ||
function nodesToJson(nodes) { | ||
function nodeElement(offset) { | ||
const node = nodes[offset]; | ||
const numChildren = node & TrieBlob.NodeMaskNumChildren; | ||
const eow = !!(node & TrieBlob.NodeMaskEOW); | ||
const children = []; | ||
for (let i = 1; i <= numChildren; ++i) { | ||
children.push({ | ||
c: ('00' + (nodes[offset + i] & TrieBlob.NodeMaskChildCharIndex).toString(16)).slice(-2), | ||
o: nodes[offset + i] >>> TrieBlob.NodeChildRefShift, | ||
}); | ||
} | ||
return { id: offset, eow, n: offset + numChildren + 1, c: children }; | ||
} | ||
return splits; | ||
const elements = []; | ||
let offset = 0; | ||
while (offset < nodes.length) { | ||
const e = nodeElement(offset); | ||
elements.push(e); | ||
offset = e.n; | ||
} | ||
return elements; | ||
} | ||
//# sourceMappingURL=TrieBlob.js.map |
@@ -9,5 +9,7 @@ import type { ITrieNode, ITrieNodeId, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js'; | ||
} | ||
type Node = number; | ||
type NodeIndex = number; | ||
export declare class TrieBlobInternals implements BitMaskInfo { | ||
readonly nodes: Uint32Array; | ||
readonly charIndex: string[]; | ||
readonly charIndex: readonly string[]; | ||
readonly charToIndexMap: Readonly<Record<string, number>>; | ||
@@ -18,14 +20,20 @@ readonly NodeMaskEOW: number; | ||
readonly NodeChildRefShift: number; | ||
constructor(nodes: Uint32Array, charIndex: string[], charToIndexMap: Readonly<Record<string, number>>, maskInfo: BitMaskInfo); | ||
readonly isIndexDecoderNeeded: boolean; | ||
constructor(nodes: Uint32Array, charIndex: readonly string[], charToIndexMap: Readonly<Record<string, number>>, maskInfo: BitMaskInfo); | ||
} | ||
declare class TrieBlobINode implements ITrieNode { | ||
readonly trie: TrieBlobInternals; | ||
readonly nodeIdx: number; | ||
readonly nodeIdx: NodeIndex; | ||
readonly id: number; | ||
readonly size: number; | ||
readonly node: number; | ||
readonly node: Node; | ||
readonly eow: boolean; | ||
private _keys; | ||
charToIdx: Record<string, number> | undefined; | ||
constructor(trie: TrieBlobInternals, nodeIdx: number); | ||
private _count; | ||
private _size; | ||
private _chained; | ||
private _nodesEntries; | ||
private _entries; | ||
private _values; | ||
protected charToIdx: Readonly<Record<string, number>> | undefined; | ||
constructor(trie: TrieBlobInternals, nodeIdx: NodeIndex); | ||
/** get keys to children */ | ||
@@ -41,2 +49,6 @@ keys(): readonly string[]; | ||
getCharToIdxMap(): Record<string, number>; | ||
private containsChainedIndexes; | ||
private getNodesEntries; | ||
private walkChainedIndexes; | ||
get size(): number; | ||
} | ||
@@ -43,0 +55,0 @@ export declare class TrieBlobIRoot extends TrieBlobINode implements ITrieNodeRoot { |
@@ -0,1 +1,3 @@ | ||
import { NumberSequenceByteDecoderAccumulator, NumberSequenceByteEncoderDecoder, } from './NumberSequenceByteDecoderAccumulator.js'; | ||
const SpecialCharIndexMask = NumberSequenceByteEncoderDecoder.SpecialCharIndexMask; | ||
export class TrieBlobInternals { | ||
@@ -9,2 +11,3 @@ nodes; | ||
NodeChildRefShift; | ||
isIndexDecoderNeeded; | ||
constructor(nodes, charIndex, charToIndexMap, maskInfo) { | ||
@@ -19,2 +22,3 @@ this.nodes = nodes; | ||
this.NodeChildRefShift = NodeChildRefShift; | ||
this.isIndexDecoderNeeded = charIndex.length > NumberSequenceByteEncoderDecoder.MaxCharIndex; | ||
} | ||
@@ -24,2 +28,3 @@ } | ||
const EmptyNodes = Object.freeze([]); | ||
const EmptyEntries = Object.freeze([]); | ||
class TrieBlobINode { | ||
@@ -29,6 +34,11 @@ trie; | ||
id; | ||
size; | ||
node; | ||
eow; | ||
_keys; | ||
_count; | ||
_size; | ||
_chained; | ||
_nodesEntries; | ||
_entries; | ||
_values; | ||
charToIdx; | ||
@@ -41,3 +51,3 @@ constructor(trie, nodeIdx) { | ||
this.eow = !!(node & trie.NodeMaskEOW); | ||
this.size = node & trie.NodeMaskNumChildren; | ||
this._count = node & trie.NodeMaskNumChildren; | ||
this.id = nodeIdx; | ||
@@ -49,35 +59,23 @@ } | ||
return this._keys; | ||
if (!this.size) | ||
if (!this._count) | ||
return EmptyKeys; | ||
const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex; | ||
const charIndex = this.trie.charIndex; | ||
const keys = Array(this.size); | ||
const offset = this.nodeIdx + 1; | ||
const len = this.size; | ||
for (let i = 0; i < len; ++i) { | ||
const entry = this.trie.nodes[i + offset]; | ||
const charIdx = entry & NodeMaskChildCharIndex; | ||
keys[i] = charIndex[charIdx]; | ||
} | ||
this._keys = keys; | ||
return keys; | ||
this._keys = this.getNodesEntries().map(([key]) => key); | ||
return this._keys; | ||
} | ||
values() { | ||
if (!this.size) | ||
if (!this._count) | ||
return EmptyNodes; | ||
const nodes = Array(this.size); | ||
for (let i = 0; i < this.size; ++i) { | ||
nodes[i] = this.child(i); | ||
} | ||
return nodes; | ||
if (this._values) | ||
return this._values; | ||
this._values = this.entries().map(([, value]) => value); | ||
return this._values; | ||
} | ||
entries() { | ||
const keys = this.keys(); | ||
const values = this.values(); | ||
const len = keys.length; | ||
const entries = Array(len); | ||
for (let i = 0; i < len; ++i) { | ||
entries[i] = [keys[i], values[i]]; | ||
} | ||
return entries; | ||
if (this._entries) | ||
return this._entries; | ||
if (!this._count) | ||
return EmptyEntries; | ||
const entries = this.getNodesEntries(); | ||
this._entries = entries.map(([key, value]) => [key, new TrieBlobINode(this.trie, value)]); | ||
return this._entries; | ||
} | ||
@@ -96,8 +94,11 @@ /** get child ITrieNode */ | ||
hasChildren() { | ||
return this.size > 0; | ||
return this._count > 0; | ||
} | ||
child(keyIdx) { | ||
const n = this.trie.nodes[this.nodeIdx + keyIdx + 1]; | ||
const nodeIdx = n >>> this.trie.NodeChildRefShift; | ||
return new TrieBlobINode(this.trie, nodeIdx); | ||
if (!this._values && !this.containsChainedIndexes()) { | ||
const n = this.trie.nodes[this.nodeIdx + keyIdx + 1]; | ||
const nodeIdx = n >>> this.trie.NodeChildRefShift; | ||
return new TrieBlobINode(this.trie, nodeIdx); | ||
} | ||
return this.values()[keyIdx]; | ||
} | ||
@@ -116,2 +117,97 @@ getCharToIdxMap() { | ||
} | ||
containsChainedIndexes() { | ||
if (this._chained !== undefined) | ||
return this._chained; | ||
if (!this._count || !this.trie.isIndexDecoderNeeded) { | ||
this._chained = false; | ||
return false; | ||
} | ||
// scan the node to see if there are encoded entries. | ||
let found = false; | ||
const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex; | ||
const offset = this.nodeIdx + 1; | ||
const nodes = this.trie.nodes; | ||
const len = this._count; | ||
for (let i = 0; i < len && !found; ++i) { | ||
const entry = nodes[i + offset]; | ||
const charIdx = entry & NodeMaskChildCharIndex; | ||
found = (charIdx & SpecialCharIndexMask) === SpecialCharIndexMask; | ||
} | ||
this._chained = !!found; | ||
return this._chained; | ||
} | ||
getNodesEntries() { | ||
if (this._nodesEntries) | ||
return this._nodesEntries; | ||
if (!this.containsChainedIndexes()) { | ||
const entries = Array(this._count); | ||
const nodes = this.trie.nodes; | ||
const offset = this.nodeIdx + 1; | ||
const charIndex = this.trie.charIndex; | ||
const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex; | ||
const RefShift = this.trie.NodeChildRefShift; | ||
for (let i = 0; i < this._count; ++i) { | ||
const entry = nodes[offset + i]; | ||
const charIdx = entry & NodeMaskChildCharIndex; | ||
entries[i] = [charIndex[charIdx], entry >>> RefShift]; | ||
} | ||
this._nodesEntries = entries; | ||
return entries; | ||
} | ||
this._nodesEntries = this.walkChainedIndexes(); | ||
return this._nodesEntries; | ||
} | ||
walkChainedIndexes() { | ||
const NodeMaskChildCharIndex = this.trie.NodeMaskChildCharIndex; | ||
const NodeChildRefShift = this.trie.NodeChildRefShift; | ||
const NodeMaskNumChildren = this.trie.NodeMaskNumChildren; | ||
const nodes = this.trie.nodes; | ||
const acc = NumberSequenceByteDecoderAccumulator.create(); | ||
const stack = [{ nodeIdx: this.nodeIdx + 1, lastIdx: this.nodeIdx + this._count, acc }]; | ||
let depth = 0; | ||
const entries = Array(this._count); | ||
let eIdx = 0; | ||
const charIndex = this.trie.charIndex; | ||
while (depth >= 0) { | ||
const s = stack[depth]; | ||
const { nodeIdx, lastIdx } = s; | ||
if (nodeIdx > lastIdx) { | ||
--depth; | ||
continue; | ||
} | ||
++s.nodeIdx; | ||
const entry = nodes[nodeIdx]; | ||
const charIdx = entry & NodeMaskChildCharIndex; | ||
const acc = s.acc.clone(); | ||
const letterIdx = acc.decode(charIdx); | ||
if (letterIdx !== undefined) { | ||
const char = charIndex[letterIdx]; | ||
const nodeIdx = entry >>> NodeChildRefShift; | ||
entries[eIdx++] = [char, nodeIdx]; | ||
continue; | ||
} | ||
const idx = entry >>> NodeChildRefShift; | ||
const lIdx = idx + (nodes[idx] & NodeMaskNumChildren); | ||
const ss = stack[++depth]; | ||
if (ss) { | ||
ss.nodeIdx = idx + 1; | ||
ss.lastIdx = lIdx; | ||
ss.acc = acc; | ||
} | ||
else { | ||
stack[depth] = { nodeIdx: idx + 1, lastIdx: lIdx, acc }; | ||
} | ||
} | ||
return entries; | ||
} | ||
get size() { | ||
if (this._size === undefined) { | ||
if (!this.containsChainedIndexes()) { | ||
this._size = this._count; | ||
return this._size; | ||
} | ||
this._size = this.getNodesEntries().length; | ||
} | ||
return this._size; | ||
} | ||
} | ||
@@ -118,0 +214,0 @@ export class TrieBlobIRoot extends TrieBlobINode { |
@@ -5,2 +5,4 @@ import type { ITrieNode, ITrieNodeRoot } from './ITrieNode/ITrieNode.js'; | ||
info: Readonly<TrieInfo>; | ||
/** Method used to split words into individual characters. */ | ||
wordToCharacters(word: string): readonly string[]; | ||
words(): Iterable<string>; | ||
@@ -7,0 +9,0 @@ getRoot(): ITrieNodeRoot; |
@@ -8,2 +8,3 @@ import { type BuilderCursor, type TrieBuilder } from '../Builder/index.js'; | ||
root: TrieRoot; | ||
wordToCharacters: (word: string) => string[]; | ||
setOptions(options: Readonly<PartialTrieOptions>): Readonly<TrieOptions>; | ||
@@ -10,0 +11,0 @@ build(): TrieNodeTrie; |
@@ -10,2 +10,3 @@ import { insertWordsAtCursor } from '../Builder/index.js'; | ||
root = { ...defaultTrieInfo, c: Object.create(null) }; | ||
wordToCharacters = (word) => word.split(''); | ||
setOptions(options) { | ||
@@ -12,0 +13,0 @@ const opts = mergeOptionalWithDefaults(options, this.root); |
@@ -11,2 +11,3 @@ import type { ITrieNode, ITrieNodeRoot } from '../ITrieNode/ITrieNode.js'; | ||
constructor(root: TrieRoot); | ||
wordToCharacters: (word: string) => string[]; | ||
get iTrieRoot(): ITrieNodeRoot; | ||
@@ -13,0 +14,0 @@ getRoot(): ITrieNodeRoot; |
@@ -16,2 +16,3 @@ import { consolidate } from '../consolidate.js'; | ||
} | ||
wordToCharacters = (word) => word.split(''); | ||
get iTrieRoot() { | ||
@@ -18,0 +19,0 @@ return this._iTrieRoot || (this._iTrieRoot = trieRootToITrieRoot(this.root)); |
@@ -53,2 +53,4 @@ /** | ||
export declare function stripNonAccents(characters: string): string; | ||
export declare function isValidUtf16Character(char: string): boolean; | ||
export declare function assertValidUtf16Character(char: string): void; | ||
//# sourceMappingURL=text.d.ts.map |
@@ -102,2 +102,29 @@ /** | ||
} | ||
export function isValidUtf16Character(char) { | ||
const len = char.length; | ||
const code = char.charCodeAt(0) & 0xfc00; | ||
const valid = (len === 1 && (code & 0xf800) !== 0xd800) || | ||
(len === 2 && (code & 0xfc00) === 0xd800 && (char.charCodeAt(1) & 0xfc00) === 0xdc00); | ||
return valid; | ||
} | ||
export function assertValidUtf16Character(char) { | ||
if (!isValidUtf16Character(char)) { | ||
const len = char.length; | ||
const codes = char | ||
.slice(0, 2) | ||
.split('') | ||
.map((c) => '0x' + ('0000' + c.charCodeAt(0).toString(16)).slice(-4)); | ||
let message; | ||
if (len == 1) { | ||
message = `Invalid utf16 character, lone surrogate: ${codes[0]}`; | ||
} | ||
else if (len == 2) { | ||
message = `Invalid utf16 character, not a valid surrogate pair: [${codes.join(', ')}]`; | ||
} | ||
else { | ||
message = `Invalid utf16 character, must be a single character, found: ${len}`; | ||
} | ||
throw new Error(message); | ||
} | ||
} | ||
//# sourceMappingURL=text.js.map |
{ | ||
"name": "cspell-trie-lib", | ||
"version": "8.6.1", | ||
"version": "8.7.0", | ||
"description": "Trie Data Structure to support cspell.", | ||
@@ -51,4 +51,4 @@ "type": "module", | ||
"dependencies": { | ||
"@cspell/cspell-pipe": "8.6.1", | ||
"@cspell/cspell-types": "8.6.1", | ||
"@cspell/cspell-pipe": "8.7.0", | ||
"@cspell/cspell-types": "8.7.0", | ||
"gensequence": "^7.0.0" | ||
@@ -65,3 +65,3 @@ }, | ||
}, | ||
"gitHead": "9279c50f26a32327e38fb1480ac1b823aa898a5f" | ||
"gitHead": "5318079ed11fe77e981287ecf1c40d6f28dd91ed" | ||
} |
422366
219
11252
+ Added@cspell/cspell-pipe@8.7.0(transitive)
+ Added@cspell/cspell-types@8.7.0(transitive)
- Removed@cspell/cspell-pipe@8.6.1(transitive)
- Removed@cspell/cspell-types@8.6.1(transitive)
Updated@cspell/cspell-pipe@8.7.0
Updated@cspell/cspell-types@8.7.0