hunspell-reader - npm Package Compare versions

dist/IterableHunspellReader.d.ts

dist/IterableHunspellReader.js

dist/IterableHunspellReader.js.map

dist/types.d.ts

dist/types.js

dist/types.js.map

src/IterableHunspellReader.ts

src/types.ts

6

CHANGELOG.md

		# Release Notes

		## 2.1.0
		- Add an Iterable Reader, this works much better for very large dictionaries.

		## 2.0.0
		- Move to RxJs 6 and Node 8

		## 1.2.1
		@@ -4,0 +10,0 @@ - Update packages.

2

dist/app.d.ts

		@@ -1,2 +0,2 @@
		#!/usr/bin/env node --max_old_space_size=8192
		#!/usr/bin/env node
		export {};

95

dist/app.js

		@@ -1,15 +0,21 @@
		#!/usr/bin/env node --max_old_space_size=8192
		#!/usr/bin/env node
		"use strict";
		var __awaiter = (this && this.__awaiter) \|\| function (thisArg, _arguments, P, generator) {
		return new (P \|\| (P = Promise))(function (resolve, reject) {
		function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
		function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
		function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); }
		step((generator = generator.apply(thisArg, _arguments \|\| [])).next());
		});
		};
		Object.defineProperty(exports, "__esModule", { value: true });
		const commander = require("commander");
		const HunspellReader_1 = require("./HunspellReader");
		const IterableHunspellReader_1 = require("./IterableHunspellReader");
		const fs = require("fs");
		const rxjs_stream_1 = require("rxjs-stream");
		const fs_extra_1 = require("fs-extra");
		const rxjs_1 = require("rxjs");
		const operators_1 = require("rxjs/operators");
		const path = require("path");
		// import * as monitor from './monitor';
		const util_1 = require("./util");
		const gensequence_1 = require("gensequence");
		const uniqueHistorySize = 500000;
		const packageInfo = require('../package.json');
		const version = packageInfo['version'];
		let displayHelp = true;
		commander
		@@ -22,47 +28,46 @@ .version(version);
		.option('-u, --unique', 'make sure the words are unique.')
		.option('-i, --ignore_case', 'used with --unique and --sort')
		.option('-l, --lower_case', 'output in lower case')
		.option('-T, --no-transform', 'Do not apply the prefix and suffix transforms. Root words only.')
		.description('Output all the words in the <hunspell.dic> file.')
		.action((hunspellDicFilename, options) => {
		const { sort = false, unique = false, ignore_case: ignoreCase = false, output: outputFile, lower_case: lowerCase = false, transform = true, } = options;
		notify('Write words', !!outputFile);
		notify(`Sort: ${yesNo(sort)}`, !!outputFile);
		notify(`Unique: ${yesNo(unique)}`, !!outputFile);
		notify(`Ignore Case: ${yesNo(ignoreCase)}`, !!outputFile);
		const pOutputStream = createWriteStream(outputFile);
		const baseFile = hunspellDicFilename.replace(/(\.dic)?$/, '');
		const dicFile = baseFile + '.dic';
		const affFile = baseFile + '.aff';
		notify(`Dic file: ${dicFile}`, !!outputFile);
		notify(`Aff file: ${affFile}`, !!outputFile);
		notify(`Generating Words`, !!outputFile);
		const pReader = HunspellReader_1.HunspellReader.createFromFiles(affFile, dicFile);
		const pWordReader = transform ? pReader.then(reader => reader.readWords()) : pReader.then(reader => reader.readRootWords());
		const wordsRx = rxjs_1.from(pWordReader).pipe(operators_1.map(words => words.pipe(operators_1.map(a => a.trim()), operators_1.filter(a => !!a))), operators_1.map(wordsRx => unique ? makeUnique(wordsRx, ignoreCase) : wordsRx), operators_1.map(wordsRx => sort ? sortWordList(wordsRx, ignoreCase) : wordsRx), operators_1.map(wordsRx => lowerCase ? wordsRx.pipe(operators_1.map(a => a.toLowerCase())) : wordsRx), operators_1.flatMap(words => words), operators_1.map(word => word + '\n'));
		pOutputStream.then(writeStream => {
		rxjs_stream_1.rxToStream(wordsRx.pipe(operators_1.bufferCount(1024), operators_1.map(words => words.join('')))).pipe(writeStream);
		.action(function (hunspellDicFilename, options) {
		return __awaiter(this, void 0, void 0, function* () {
		displayHelp = false;
		const { sort = false, unique = false, output: outputFile, lower_case: lowerCase = false, transform = true, } = options;
		const log = (msg) => notify(msg, !!outputFile);
		log('Write words');
		log(`Sort: ${yesNo(sort)}`);
		log(`Unique: ${yesNo(unique)}`);
		const baseFile = hunspellDicFilename.replace(/\.(dic\|aff)$/, '');
		const dicFile = baseFile + '.dic';
		const affFile = baseFile + '.aff';
		log(`Dic file: ${dicFile}`);
		log(`Aff file: ${affFile}`);
		log(`Generating Words...`);
		const reader = yield IterableHunspellReader_1.IterableHunspellReader.createFromFiles(affFile, dicFile);
		const seqWords = transform ? reader.seqWords() : reader.seqRootWords();
		const normalize = lowerCase ? (a) => a.toLowerCase() : (a) => a;
		const filterUnique = unique ? util_1.uniqueFilter(uniqueHistorySize) : (_) => true;
		const fd = outputFile ? fs.openSync(outputFile, 'w') : 1;
		const words = seqWords
		.map(a => a.trim())
		.filter(a => !!a)
		.map(normalize)
		.map(a => a + '\n')
		.filter(filterUnique);
		if (sort) {
		log('Sorting...');
		const data = words.toArray().sort().join('');
		fs.writeSync(fd, data);
		}
		else {
		gensequence_1.genSequence(util_1.batch(words, 1000)).forEach(w => fs.writeSync(fd, w.join('')));
		}
		fs.closeSync(fd);
		log('Done.');
		});
		});
		commander.parse(process.argv);
		if (!commander.args.length) {
		if (displayHelp) {
		commander.help();
		}
		function createWriteStream(filename) {
		return !filename
		? Promise.resolve(process.stdout)
		: fs_extra_1.mkdirp(path.dirname(filename)).then(() => fs.createWriteStream(filename));
		}
		function sortWordList(words, ignoreCase) {
		const compStr = (a, b) => a < b ? -1 : (a > b ? 1 : 0);
		const fnComp = ignoreCase
		? ((a, b) => compStr(a.toLowerCase(), b.toLowerCase()))
		: compStr;
		return words.pipe(operators_1.toArray(), operators_1.flatMap(a => a.sort(fnComp)));
		}
		function makeUnique(words, ignoreCase) {
		const found = new Set();
		const normalize = ignoreCase ? (a => a.toLowerCase()) : (a => a);
		return words.pipe(operators_1.filter(w => !found.has(normalize(w))), operators_1.tap(w => found.add(normalize(w))));
		}
		function notify(message, useStdOut = true) {
		@@ -69,0 +74,0 @@ if (useStdOut) {

5

dist/HunspellReader.d.ts

		import { Aff, AffWord } from './aff';
		import { Observable } from 'rxjs';
		export interface WordInfo {
		word: string;
		rules: string;
		}
		import { WordInfo } from './types';
		export interface HunspellSrcInfo {
		@@ -8,0 +5,0 @@ aff: Aff;

1

dist/index.d.ts

		export * from './HunspellReader';
		export * from './IterableHunspellReader';

1

dist/index.js

		@@ -7,2 +7,3 @@ "use strict";
		__export(require("./HunspellReader"));
		__export(require("./IterableHunspellReader"));
		//# sourceMappingURL=index.js.map

2

dist/util.d.ts

		export declare function hrTimeToSeconds([seconds, nanoseconds]: number[]): number;
		export declare function uniqueFilter<T>(historySize: number): (i: T) => boolean;
		export declare function batch<T>(i: Iterable<T>, size: number): Iterable<T[]>;

34

dist/util.js

		@@ -7,2 +7,36 @@ "use strict";
		exports.hrTimeToSeconds = hrTimeToSeconds;
		function uniqueFilter(historySize) {
		const f0 = new Set();
		const f1 = new Set();
		const found = [f0, f1, f0];
		let g = 0;
		return (w) => {
		const p = found[g];
		if (p.has(w))
		return false;
		const s = found[g + 1];
		const r = !s.has(w);
		p.add(w);
		if (p.size >= historySize) {
		s.clear();
		g = (g + 1) % 2;
		}
		return r;
		};
		}
		exports.uniqueFilter = uniqueFilter;
		function* batch(i, size) {
		let data = [];
		for (const t of i) {
		data.push(t);
		if (data.length === size) {
		yield data;
		data = [];
		}
		}
		if (data.length) {
		yield data;
		}
		}
		exports.batch = batch;
		//# sourceMappingURL=util.js.map

22

package.json

		{
		"name": "hunspell-reader",
		"version": "2.0.4",
		"version": "2.1.0",
		"description": "A library for reading Hunspell Dictionary Files",
		@@ -39,19 +39,19 @@ "bin": "./dist/app.js",
		"devDependencies": {
		"@types/chai": "^4.1.6",
		"@types/chai": "^4.1.7",
		"@types/fs-extra": "^5.0.4",
		"@types/mocha": "^5.2.5",
		"@types/node": "^8.10.34",
		"@types/node": "^8.10.39",
		"chai": "^4.2.0",
		"coveralls": "^3.0.2",
		"mocha": "^5.2.0",
		"nyc": "^13.0.1",
		"rimraf": "^2.6.2",
		"ts-node": "^6.2.0",
		"typescript": "^3.1.1"
		"nyc": "^13.1.0",
		"rimraf": "^2.6.3",
		"ts-node": "^7.0.1",
		"typescript": "^3.2.2"
		},
		"dependencies": {
		"commander": "^2.18.0",
		"cspell-lib": "^3.0.4",
		"fs-extra": "^7.0.0",
		"gensequence": "^2.1.1",
		"commander": "^2.19.0",
		"cspell-lib": "^3.0.5",
		"fs-extra": "^7.0.1",
		"gensequence": "^2.1.2",
		"rxjs": "^6.3.3",
		@@ -58,0 +58,0 @@ "rxjs-stream": "^3.0.1"

3

src/aff.ts

		@@ -5,2 +5,3 @@ import * as util from 'util';
		import * as GS from 'gensequence';
		import { Dictionary } from './types';

		@@ -60,3 +61,3 @@ // cSpell:enableCompoundWords
		export interface AffInfo extends AffTransformFlags {
		SET?: string;
		SET?: string; // Characterset encoding of the .aff and .dic file
		TRY?: string;
		@@ -63,0 +64,0 @@ KEY?: string;

104

src/app.ts

		@@ -1,16 +0,17 @@
		#!/usr/bin/env node --max_old_space_size=8192
		#!/usr/bin/env node

		// cSpell:ignore findup
		import * as commander from 'commander';
		import { HunspellReader } from './HunspellReader';
		import { IterableHunspellReader } from './IterableHunspellReader';
		import * as fs from 'fs';
		import {rxToStream} from 'rxjs-stream';
		import {mkdirp} from 'fs-extra';
		import {from, Observable} from 'rxjs';
		import {map, flatMap, filter, bufferCount, tap, toArray} from 'rxjs/operators';
		import * as path from 'path';
		// import * as monitor from './monitor';
		import { uniqueFilter, batch } from './util';
		import { genSequence } from 'gensequence';

		const uniqueHistorySize = 500000;

		const packageInfo = require('../package.json');
		const version = packageInfo['version'];

		let displayHelp = true;

		commander
		@@ -24,11 +25,10 @@ .version(version);
		.option('-u, --unique', 'make sure the words are unique.')
		.option('-i, --ignore_case', 'used with --unique and --sort')
		.option('-l, --lower_case', 'output in lower case')
		.option('-T, --no-transform', 'Do not apply the prefix and suffix transforms. Root words only.')
		.description('Output all the words in the <hunspell.dic> file.')
		.action((hunspellDicFilename, options) => {
		.action(async function(hunspellDicFilename, options) {
		displayHelp = false;
		const {
		sort = false,
		unique = false,
		ignore_case: ignoreCase = false,
		output: outputFile,
		@@ -38,31 +38,35 @@ lower_case: lowerCase = false,
		} = options;
		notify('Write words', !!outputFile);
		notify(`Sort: ${yesNo(sort)}`, !!outputFile);
		notify(`Unique: ${yesNo(unique)}`, !!outputFile);
		notify(`Ignore Case: ${yesNo(ignoreCase)}`, !!outputFile);
		const pOutputStream = createWriteStream(outputFile);
		const baseFile = hunspellDicFilename.replace(/(\.dic)?$/, '');
		const log = (msg: string) => notify(msg, !!outputFile);
		log('Write words');
		log(`Sort: ${yesNo(sort)}`);
		log(`Unique: ${yesNo(unique)}`);
		const baseFile = hunspellDicFilename.replace(/\.(dic\|aff)$/, '');
		const dicFile = baseFile + '.dic';
		const affFile = baseFile + '.aff';
		notify(`Dic file: ${dicFile}`, !!outputFile);
		notify(`Aff file: ${affFile}`, !!outputFile);
		notify(`Generating Words`, !!outputFile);
		const pReader = HunspellReader.createFromFiles(affFile, dicFile);
		const pWordReader = transform ? pReader.then(reader => reader.readWords()) : pReader.then(reader => reader.readRootWords());
		log(`Dic file: ${dicFile}`);
		log(`Aff file: ${affFile}`);
		log(`Generating Words...`);
		const reader = await IterableHunspellReader.createFromFiles(affFile, dicFile);
		const seqWords = transform ? reader.seqWords() : reader.seqRootWords();
		const normalize = lowerCase ? (a: string) => a.toLowerCase() : (a: string) => a;
		const filterUnique = unique ? uniqueFilter(uniqueHistorySize) : (_: string) => true;
		const fd = outputFile ? fs.openSync(outputFile, 'w') : 1;

		const wordsRx = from(pWordReader).pipe(
		map(words => words.pipe(
		map(a => a.trim()),
		filter(a => !!a),
		)),
		map(wordsRx => unique ? makeUnique(wordsRx, ignoreCase) : wordsRx),
		map(wordsRx => sort ? sortWordList(wordsRx, ignoreCase) : wordsRx),
		map(wordsRx => lowerCase ? wordsRx.pipe(map(a => a.toLowerCase())) : wordsRx),
		flatMap(words => words),
		map(word => word + '\n'),
		);
		const words = seqWords
		.map(a => a.trim())
		.filter(a => !!a)
		.map(normalize)
		.map(a => a + '\n')
		.filter(filterUnique);

		pOutputStream.then(writeStream => {
		rxToStream(wordsRx.pipe(bufferCount(1024),map(words => words.join('')))).pipe(writeStream);
		});
		if (sort) {
		log('Sorting...');
		const data = words.toArray().sort().join('');
		fs.writeSync(fd, data);
		} else {
		genSequence(batch(words, 1000)).forEach(w => fs.writeSync(fd, w.join('')));
		}

		fs.closeSync(fd);
		log('Done.');
		});
		@@ -72,32 +76,6 @@

		if (!commander.args.length) {
		if (displayHelp) {
		commander.help();
		}

		function createWriteStream(filename?: string): Promise<NodeJS.WritableStream> {
		return !filename
		? Promise.resolve(process.stdout)
		: mkdirp(path.dirname(filename)).then(() => fs.createWriteStream(filename));
		}

		function sortWordList(words: Observable<string>, ignoreCase: boolean) {
		const compStr = (a, b) => a < b ? -1 : (a > b ? 1 : 0);
		const fnComp: (a: string, b: string) => number = ignoreCase
		? ((a, b) => compStr(a.toLowerCase(), b.toLowerCase()))
		: compStr;
		return words.pipe(
		toArray(),
		flatMap(a => a.sort(fnComp)),
		);
		}

		function makeUnique(words: Observable<string>, ignoreCase: boolean) {
		const found = new Set<string>();
		const normalize: (a: string) => string = ignoreCase ? (a => a.toLowerCase()) : (a => a);
		return words.pipe(
		filter(w => !found.has(normalize(w))),
		tap(w => found.add(normalize(w))),
		);
		}

		function notify(message: any, useStdOut = true) {
		@@ -104,0 +82,0 @@ if (useStdOut) {

1

src/converter.ts

		@@ -0,1 +1,2 @@
		import { Dictionary } from './types';

		@@ -2,0 +3,0 @@ export interface ConvItem {

8

src/HunspellReader.ts

		@@ -7,9 +7,4 @@ import {parseAffFileToAff} from './affReader';
		import * as monitor from './monitor';
		import { WordInfo } from './types';

		export interface WordInfo {
		word: string;
		rules: string;
		}


		export interface HunspellSrcInfo {
		@@ -28,3 +23,2 @@ aff: Aff;


		/**
		@@ -31,0 +25,0 @@ * @internal

1

src/index.ts

		export * from './HunspellReader';
		export * from './IterableHunspellReader';

34

src/util.ts

		@@ -6,1 +6,35 @@
		}

		export function uniqueFilter<T>(historySize: number): (i: T) => boolean {
		const f0 = new Set<T>();
		const f1 = new Set<T>();
		const found = [f0, f1, f0];
		let g = 0;
		return (w: T) => {
		const p = found[g];
		if (p.has(w)) return false;
		const s = found[g + 1];
		const r = !s.has(w);
		p.add(w);
		if (p.size >= historySize) {
		s.clear();
		g = (g + 1) % 2;
		}
		return r;
		};
		}

		export function *batch<T>(i: Iterable<T>, size: number): Iterable<T[]> {
		let data: T[] = [];
		for (const t of i) {
		data.push(t);
		if (data.length === size) {
		yield data;
		data = [];
		}
		}

		if (data.length) {
		yield data;
		}
		}

src/globals.d.ts

dist/aff.js.map