cspell-tools
Advanced tools
Comparing version 4.1.13 to 5.0.1-alpha.3
@@ -0,1 +1,14 @@ | ||
# Change Log | ||
All notable changes to this project will be documented in this file. | ||
See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. | ||
## [5.0.1-alpha.0](https://github.com/streetsidesoftware/cspell/compare/cspell-tools@4.1.12...cspell-tools@5.0.1-alpha.0) (2020-02-20) | ||
**Note:** Version bump only for package cspell-tools | ||
# Release Notes | ||
@@ -28,2 +41,2 @@ | ||
## 1.0.7 | ||
* update packages | ||
* update packages |
@@ -22,2 +22,5 @@ "use strict"; | ||
compiler.setLogger(log); | ||
function collect(value, previous) { | ||
return previous.concat([value]); | ||
} | ||
function run(program, argv) { | ||
@@ -36,6 +39,9 @@ program.exitOverride(); | ||
.option('-s, --no-split', 'Treat each line as a dictionary entry, do not split') | ||
.option('-x, --experimental <flag>', 'Experimental flags, used for testing new concepts. Flags: compound', collect, []) | ||
.option('--no-sort', 'Do not sort the result') | ||
.action((src, options) => { | ||
const experimental = new Set(options.experimental); | ||
const skipNormalization = experimental.has('compound'); | ||
const result = processAction(src, '.txt', options, async (src, dst) => { | ||
return compiler_1.compileWordList(src, dst, { splitWords: options.split, sort: options.sort }).then(() => src); | ||
return compiler_1.compileWordList(src, dst, { splitWords: options.split, sort: options.sort, skipNormalization }).then(() => src); | ||
}); | ||
@@ -51,6 +57,10 @@ resolve(result); | ||
.option('-n, --no-compress', 'By default the files are Gzipped, this will turn that off.') | ||
.option('-x, --experimental <flag>', 'Experimental flags, used for testing new concepts. Flags: compound', collect, []) | ||
.option('--trie3', '[Beta] Use file format trie3') | ||
.action((src, options) => { | ||
const experimental = new Set(options.experimental); | ||
const skipNormalization = experimental.has('compound'); | ||
const compileOptions = Object.assign(Object.assign({}, options), { skipNormalization }); | ||
const result = processAction(src, '.trie', options, async (words, dst) => { | ||
return compiler_1.compileTrie(words, dst, options); | ||
return compiler_1.compileTrie(words, dst, compileOptions); | ||
}); | ||
@@ -73,3 +83,3 @@ resolve(result); | ||
function parseNumber(s) { | ||
const n = parseInt((s !== null && s !== void 0 ? s : '')); | ||
const n = parseInt(s !== null && s !== void 0 ? s : ''); | ||
return isNaN(n) ? undefined : n; | ||
@@ -81,3 +91,5 @@ } | ||
const maxDepth = parseNumber(options.max_depth); | ||
const readerOptions = { maxDepth }; | ||
const experimental = new Set(options.experimental); | ||
const useAnnotation = experimental.has('compound'); | ||
const readerOptions = { maxDepth, useAnnotation }; | ||
const globResults = await Promise.all(src.map(s => globP(s))); | ||
@@ -107,8 +119,8 @@ const filesToProcess = gensequence_1.genSequence(globResults) | ||
const outFileName = toFilename(filename, ext); | ||
const dir = (destination !== null && destination !== void 0 ? destination : path.dirname(filename)); | ||
const dir = destination !== null && destination !== void 0 ? destination : path.dirname(filename); | ||
return path.join(dir, outFileName); | ||
} | ||
function toMergeTargetFile(filename, destination, ext) { | ||
const outFileName = toFilename(filename, ext); | ||
return path.resolve((destination !== null && destination !== void 0 ? destination : '.'), outFileName); | ||
const outFileName = path.join(path.dirname(filename), toFilename(filename, ext)); | ||
return path.resolve(destination !== null && destination !== void 0 ? destination : '.', outFileName); | ||
} | ||
@@ -115,0 +127,0 @@ async function processFilesIndividually(action, filesToProcess, srcToTarget) { |
import { Sequence } from 'gensequence'; | ||
export interface ReaderOptions { | ||
useAnnotation?: boolean; | ||
maxDepth?: number; | ||
} | ||
export declare type AnnotatedWord = string; | ||
export interface Reader { | ||
interface BaseReader { | ||
size: number; | ||
[Symbol.iterator]: () => Sequence<string>; | ||
annotatedWords: () => Sequence<AnnotatedWord>; | ||
rawWords: () => Sequence<string>; | ||
} | ||
export interface Reader extends BaseReader { | ||
[Symbol.iterator]: () => Sequence<string>; | ||
} | ||
export declare function createReader(filename: string, options: ReaderOptions): Promise<Reader>; | ||
export declare function createArrayReader(lines: string[]): Reader; | ||
export declare function readHunspellFiles(filename: string, options: ReaderOptions): Promise<Reader>; | ||
export declare function createArrayReader(lines: string[]): BaseReader; | ||
export declare function readHunspellFiles(filename: string, options: ReaderOptions): Promise<BaseReader>; | ||
export {}; |
@@ -9,6 +9,2 @@ "use strict"; | ||
const regHunspellFile = /\.(dic|aff)$/i; | ||
const COMPOUND = '+'; | ||
const OPTIONAL_COMPOUND = '*'; | ||
const NORMALIZED = '~'; | ||
const FORBID = '!'; | ||
// cspell:word dedupe | ||
@@ -22,3 +18,3 @@ const DEDUPE_SIZE = 1000; | ||
]; | ||
function createReader(filename, options) { | ||
function findMatchingReader(filename, options) { | ||
for (const reader of readers) { | ||
@@ -29,10 +25,16 @@ if (reader.test.test(filename)) { | ||
} | ||
return textFileReader(filename); | ||
return textFileReader(filename, options); | ||
} | ||
async function createReader(filename, options) { | ||
const baseReader = await findMatchingReader(filename, options); | ||
return Object.assign(baseReader, { [Symbol.iterator]: options.useAnnotation ? baseReader.annotatedWords : baseReader.rawWords }); | ||
} | ||
exports.createReader = createReader; | ||
function createArrayReader(lines) { | ||
const rawWords = () => gensequence_1.genSequence(lines); | ||
const annotatedWords = () => gensequence_1.genSequence(lines).pipe(_mapText, dedupeAndSort); | ||
return { | ||
size: lines.length, | ||
[Symbol.iterator]: () => gensequence_1.genSequence(lines), | ||
annotatedWords() { return gensequence_1.genSequence(lines).pipe(_mapText).pipe(dedupeAndSort); }, | ||
annotatedWords, | ||
rawWords, | ||
}; | ||
@@ -47,7 +49,7 @@ } | ||
const normalizeAndDedupe = gensequence_1.operators.pipe(_stripCaseAndAccents, dedupeAndSort); | ||
const rawWords = () => reader.seqWords(); | ||
return { | ||
size: reader.dic.length, | ||
// seqWords is used for backwards compatibility. | ||
[Symbol.iterator]: () => reader.seqWords(), | ||
annotatedWords() { return reader.seqAffWords().pipe(_mapAffWords).pipe(normalizeAndDedupe); }, | ||
rawWords, | ||
}; | ||
@@ -57,15 +59,20 @@ } | ||
async function trieFileReader(filename) { | ||
const trieRoot = cspell_trie_lib_1.importTrie(await textFileReader(filename)); | ||
const trieRoot = cspell_trie_lib_1.importTrie(await readTextFile(filename)); | ||
const trie = new cspell_trie_lib_1.Trie(trieRoot); | ||
const rawWords = () => trie.words(); | ||
return { | ||
get size() { return trie.size(); }, | ||
[Symbol.iterator]: () => trie.words(), | ||
annotatedWords() { return trie.words(); }, | ||
annotatedWords: rawWords, | ||
rawWords, | ||
}; | ||
} | ||
async function textFileReader(filename) { | ||
const content = await fs.readFile(filename) | ||
function readTextFile(filename) { | ||
const lines = fs.readFile(filename) | ||
.then(buffer => (/\.gz$/).test(filename) ? zlib.gunzipSync(buffer) : buffer) | ||
.then(buffer => buffer.toString('UTF-8')); | ||
const lines = content.split('\n'); | ||
.then(buffer => buffer.toString('UTF-8')) | ||
.then(content => content.split(/\r?\n/g)); | ||
return lines; | ||
} | ||
async function textFileReader(filename, options) { | ||
const lines = await readTextFile(filename); | ||
return createArrayReader(lines); | ||
@@ -83,3 +90,3 @@ } | ||
for (const line of lines) { | ||
if (line[0] !== OPTIONAL_COMPOUND) { | ||
if (line[0] !== cspell_trie_lib_1.OPTIONAL_COMPOUND) { | ||
yield line; | ||
@@ -90,3 +97,3 @@ continue; | ||
yield w; | ||
yield COMPOUND + w; | ||
yield cspell_trie_lib_1.COMPOUND + w; | ||
} | ||
@@ -96,3 +103,3 @@ } | ||
for (const line of lines) { | ||
if (line[line.length - 1] !== OPTIONAL_COMPOUND) { | ||
if (line[line.length - 1] !== cspell_trie_lib_1.OPTIONAL_COMPOUND) { | ||
yield line; | ||
@@ -103,3 +110,3 @@ continue; | ||
yield w; | ||
yield w + COMPOUND; | ||
yield w + cspell_trie_lib_1.COMPOUND; | ||
} | ||
@@ -110,8 +117,9 @@ } | ||
for (const word of words) { | ||
yield word; | ||
if (regNotLower.test(word)) { | ||
// covert to lower case and strip accents. | ||
const n = word.toLowerCase().normalize('NFD').replace(/[\u0300-\u036f]/g, ''); | ||
yield NORMALIZED + n; | ||
} | ||
// Words are normalized to the compact format: e + ` => è | ||
yield word.normalize(); | ||
// covert to lower case and strip accents. | ||
const n = word.toLowerCase().normalize('NFD').replace(/[\u0300-\u036f]/g, ''); | ||
// All words are added for case-insensitive searches. | ||
// It is a space / speed trade-off. In this case, speed is more important. | ||
yield cspell_trie_lib_1.NORMALIZED + n; | ||
} | ||
@@ -137,4 +145,4 @@ } | ||
const { word, flags } = affWord; | ||
const compound = flags.isCompoundForbidden ? '' : COMPOUND; | ||
const forbid = flags.isForbiddenWord ? FORBID : ''; | ||
const compound = flags.isCompoundForbidden ? '' : cspell_trie_lib_1.COMPOUND; | ||
const forbid = flags.isForbiddenWord ? cspell_trie_lib_1.FORBID : ''; | ||
if (!forbid) { | ||
@@ -141,0 +149,0 @@ if (flags.canBeCompoundBegin) |
@@ -5,5 +5,9 @@ import { Sequence } from 'gensequence'; | ||
export declare function setLogger(logger?: Logger): void; | ||
declare type Normalizer = (lines: Sequence<string>) => Sequence<string>; | ||
export declare function normalizeWords(lines: Sequence<string>): Sequence<string>; | ||
export declare function lineToWords(line: string): Sequence<string>; | ||
interface CompileWordListOptions { | ||
export interface CompileOptions { | ||
skipNormalization?: boolean; | ||
} | ||
export interface CompileWordListOptions extends CompileOptions { | ||
splitWords: boolean; | ||
@@ -13,4 +17,4 @@ sort: boolean; | ||
export declare function compileWordList(words: Sequence<string>, destFilename: string, options: CompileWordListOptions): Promise<void>; | ||
export declare function normalizeWordsToTrie(words: Sequence<string>): Trie.TrieNode; | ||
export interface CompileTrieOptions { | ||
export declare function normalizeWordsToTrie(words: Sequence<string>, normalizer?: Normalizer): Trie.TrieRoot; | ||
export interface CompileTrieOptions extends CompileOptions { | ||
base?: number; | ||
@@ -17,0 +21,0 @@ trie3?: boolean; |
@@ -16,3 +16,3 @@ "use strict"; | ||
function setLogger(logger) { | ||
log = (logger !== null && logger !== void 0 ? logger : defaultLogger); | ||
log = logger !== null && logger !== void 0 ? logger : defaultLogger; | ||
} | ||
@@ -52,3 +52,5 @@ exports.setLogger = setLogger; | ||
const pDir = fs_extra_1.mkdirp(destDir); | ||
const compile = options.splitWords ? compileWordListWithSplitSeq : compileSimpleWordListSeq; | ||
const compile = options.skipNormalization ? a => a | ||
: options.splitWords ? compileWordListWithSplitSeq | ||
: compileSimpleWordListSeq; | ||
const seq = compile(words) | ||
@@ -71,5 +73,4 @@ .filter(a => !!a) | ||
} | ||
function normalizeWordsToTrie(words) { | ||
const trie = Trie.buildTrie(normalizeWords(words)); | ||
return trie.root; | ||
function normalizeWordsToTrie(words, normalizer = normalizeWords) { | ||
return Trie.buildTrie(normalizer(words)).root; | ||
} | ||
@@ -81,7 +82,8 @@ exports.normalizeWordsToTrie = normalizeWordsToTrie; | ||
log('Reading Words into Trie'); | ||
const base = (_a = options.base, (_a !== null && _a !== void 0 ? _a : 32)); | ||
const base = (_a = options.base) !== null && _a !== void 0 ? _a : 32; | ||
const version = options.trie3 ? 3 : 1; | ||
const destDir = path.dirname(destFilename); | ||
const pDir = fs_extra_1.mkdirp(destDir); | ||
const root = normalizeWordsToTrie(words); | ||
const normalizer = options.skipNormalization ? a => a : normalizeWords; | ||
const root = normalizeWordsToTrie(words, normalizer); | ||
log('Reduce duplicate word endings'); | ||
@@ -88,0 +90,0 @@ const trie = exports.consolidate(root); |
{ | ||
"name": "cspell-tools", | ||
"version": "4.1.13", | ||
"version": "5.0.1-alpha.3", | ||
"description": "Tools to assist with the development of cSpell", | ||
@@ -43,7 +43,7 @@ "typings": "dist/index.d.ts", | ||
"commander": "^4.0.1", | ||
"cspell-io": "^4.0.20", | ||
"cspell-trie-lib": "^4.1.8", | ||
"cspell-util-bundle": "^4.0.10", | ||
"fs-extra": "^8.1.0", | ||
"gensequence": "^3.0.3", | ||
"cspell-io": "^5.0.1-alpha.3", | ||
"cspell-trie-lib": "^5.0.1-alpha.3", | ||
"cspell-util-bundle": "^5.0.1-alpha.3", | ||
"fs-extra": "^9.0.0", | ||
"gensequence": "^3.1.1", | ||
"glob": "^7.1.6", | ||
@@ -75,3 +75,3 @@ "hunspell-reader": "^3.2.0", | ||
}, | ||
"gitHead": "06ec0485b49bc73f175931467b8eaf6ee78391df" | ||
"gitHead": "efceae2801b83dd00d976926ddaf29069ac97a23" | ||
} |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No v1
QualityPackage is not semver >=1. This means it is not stable and does not support ^ ranges.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
25759
497
1
+ Added@cspell/cspell-pipe@5.21.2(transitive)
+ Addedat-least-node@1.0.0(transitive)
+ Addedcspell-io@5.21.2(transitive)
+ Addedcspell-trie-lib@5.21.2(transitive)
+ Addedcspell-util-bundle@5.1.3(transitive)
+ Addedfs-extra@10.1.09.1.0(transitive)
+ Addedjsonfile@6.1.0(transitive)
+ Addeduniversalify@2.0.1(transitive)
- Removedcspell-io@4.1.7(transitive)
- Removedcspell-trie-lib@4.2.8(transitive)
- Removedcspell-util-bundle@4.1.11(transitive)
- Removediconv-lite@0.6.3(transitive)
- Removediterable-to-stream@1.0.1(transitive)
Updatedcspell-io@^5.0.1-alpha.3
Updatedfs-extra@^9.0.0
Updatedgensequence@^3.1.1