nlptoolkit-ngram
Advanced tools
Comparing version 1.0.3 to 1.0.4
@@ -5,2 +5,6 @@ import { SimpleSmoothing } from "./SimpleSmoothing"; | ||
private readonly delta; | ||
/** | ||
* Constructor for Laplace smoothing. Sets the delta. | ||
* @param delta Delta value in Laplace smoothing. | ||
*/ | ||
constructor(delta?: number); | ||
@@ -7,0 +11,0 @@ /** |
@@ -15,2 +15,6 @@ (function (factory) { | ||
class LaplaceSmoothing extends SimpleSmoothing_1.SimpleSmoothing { | ||
/** | ||
* Constructor for Laplace smoothing. Sets the delta. | ||
* @param delta Delta value in Laplace smoothing. | ||
*/ | ||
constructor(delta = 1.0) { | ||
@@ -17,0 +21,0 @@ super(); |
@@ -5,6 +5,22 @@ export declare class MultipleFile { | ||
private contents; | ||
/** | ||
* Constructor for {@link MultipleFile} class. Initializes the buffer reader with the first input file | ||
* from the fileNameList. MultipleFile supports simple multipart file system, where a text file is divided | ||
* into multiple files. | ||
* @param args A list of files given as dynamic parameters. | ||
*/ | ||
constructor(...args: Array<any>); | ||
/** | ||
* Reads a single line from the current file. If the end of file is reached for the current file, | ||
* next file is opened and a single line from that file is read. If all files are read, the method | ||
* returns null. | ||
* @return Read line from the current file. | ||
*/ | ||
readLine(): string; | ||
/** | ||
* Checks if the current file has more lines to be read. | ||
* @returns True if the current file has more lines to be read, false otherwise. | ||
*/ | ||
hasNextLine(): boolean; | ||
readCorpus(): Array<Array<string>>; | ||
} |
@@ -15,2 +15,8 @@ (function (factory) { | ||
class MultipleFile { | ||
/** | ||
* Constructor for {@link MultipleFile} class. Initializes the buffer reader with the first input file | ||
* from the fileNameList. MultipleFile supports simple multipart file system, where a text file is divided | ||
* into multiple files. | ||
* @param args A list of files given as dynamic parameters. | ||
*/ | ||
constructor(...args) { | ||
@@ -21,2 +27,8 @@ this.fileIndex = 0; | ||
} | ||
/** | ||
* Reads a single line from the current file. If the end of file is reached for the current file, | ||
* next file is opened and a single line from that file is read. If all files are read, the method | ||
* returns null. | ||
* @return Read line from the current file. | ||
*/ | ||
readLine() { | ||
@@ -29,2 +41,6 @@ if (!this.contents.hasNextLine()) { | ||
} | ||
/** | ||
* Checks if the current file has more lines to be read. | ||
* @returns True if the current file has more lines to be read, false otherwise. | ||
*/ | ||
hasNextLine() { | ||
@@ -31,0 +47,0 @@ return this.fileIndex != this.fileNameList.length - 1 || this.contents.hasNextLine(); |
@@ -20,3 +20,12 @@ import { NGramNode } from "./NGramNode"; | ||
constructor(...args: Array<any>); | ||
/** | ||
* Reads the header from the input file. | ||
* @param multipleFile Input file | ||
*/ | ||
readHeader(multipleFile: MultipleFile): void; | ||
/** | ||
* Merges current NGram with the given NGram. If N of the two NGram's are not same, it does not | ||
* merge. Merges first the vocabulary, then the NGram trees. | ||
* @param toBeMerged NGram to be merged with. | ||
*/ | ||
merge(toBeMerged: NGram<Symbol>): void; | ||
@@ -192,3 +201,8 @@ /** | ||
setAdjustedProbability(countsOfCounts: Array<number>, height: number, pZero: number): void; | ||
/** | ||
* Prunes NGram according to the given threshold. All nodes having a probability less than the threshold will be | ||
* pruned. | ||
* @param threshold Probability threshold used for pruning. | ||
*/ | ||
prune(threshold: number): void; | ||
} |
@@ -41,2 +41,6 @@ (function (factory) { | ||
} | ||
/** | ||
* Reads the header from the input file. | ||
* @param multipleFile Input file | ||
*/ | ||
readHeader(multipleFile) { | ||
@@ -60,2 +64,7 @@ let line = multipleFile.readLine(); | ||
} | ||
/** | ||
* Merges current NGram with the given NGram. If N of the two NGram's are not same, it does not | ||
* merge. Merges first the vocabulary, then the NGram trees. | ||
* @param toBeMerged NGram to be merged with. | ||
*/ | ||
merge(toBeMerged) { | ||
@@ -417,2 +426,7 @@ if (this.N != toBeMerged.getN()) { | ||
} | ||
/** | ||
* Prunes NGram according to the given threshold. All nodes having a probability less than the threshold will be | ||
* pruned. | ||
* @param threshold Probability threshold used for pruning. | ||
*/ | ||
prune(threshold) { | ||
@@ -419,0 +433,0 @@ if (threshold > 0.0 && threshold <= 1.0) { |
@@ -10,2 +10,4 @@ import { MultipleFile } from "./MultipleFile"; | ||
private unknown; | ||
constructor1(symbol: any): void; | ||
constructor2(symbol: any, multipleFile: MultipleFile): void; | ||
/** | ||
@@ -18,2 +20,6 @@ * Constructor of {@link NGramNode} | ||
constructor(symbol: any, multipleFile?: MultipleFile); | ||
/** | ||
* Merges this NGramNode with the corresponding NGramNode in another NGram. | ||
* @param toBeMerged Parallel NGramNode of the parallel NGram tree. | ||
*/ | ||
merge(toBeMerged: NGramNode<Symbol>): void; | ||
@@ -139,3 +145,9 @@ /** | ||
generateNextString(s: Array<Symbol>, index: number): Symbol; | ||
/** | ||
* Prunes the NGramNode according to the given threshold. Removes the child(ren) whose probability is less than the | ||
* threshold. | ||
* @param threshold Threshold for pruning the NGram tree. | ||
* @param N N in N-Gram. | ||
*/ | ||
prune(threshold: number, N: number): void; | ||
} |
@@ -25,27 +25,37 @@ (function (factory) { | ||
if (multipleFile == undefined) { | ||
this.symbol = symbol; | ||
this.count = 0; | ||
this.constructor1(symbol); | ||
} | ||
else { | ||
if (!symbol) { | ||
this.symbol = multipleFile.readLine().trim(); | ||
this.constructor2(symbol, multipleFile); | ||
} | ||
} | ||
constructor1(symbol) { | ||
this.symbol = symbol; | ||
this.count = 0; | ||
} | ||
constructor2(symbol, multipleFile) { | ||
if (!symbol) { | ||
this.symbol = multipleFile.readLine().trim(); | ||
} | ||
let line = multipleFile.readLine().trim(); | ||
let items = line.split(" "); | ||
if (items.length != 4) { | ||
Console.log("Error in line -> " + line); | ||
} | ||
this.count = Number(items[0]); | ||
this.probability = Number(items[1]); | ||
this.probabilityOfUnseen = Number(items[2]); | ||
let numberOfChildren = Number(items[3]); | ||
if (numberOfChildren > 0) { | ||
this.children = new Map(); | ||
for (let i = 0; i < numberOfChildren; i++) { | ||
let childNode = new NGramNode(false, multipleFile); | ||
this.children.set(childNode.symbol, childNode); | ||
} | ||
let line = multipleFile.readLine().trim(); | ||
let items = line.split(" "); | ||
if (items.length != 4) { | ||
Console.log("Error in line -> " + line); | ||
} | ||
this.count = Number(items[0]); | ||
this.probability = Number(items[1]); | ||
this.probabilityOfUnseen = Number(items[2]); | ||
let numberOfChildren = Number(items[3]); | ||
if (numberOfChildren > 0) { | ||
this.children = new Map(); | ||
for (let i = 0; i < numberOfChildren; i++) { | ||
let childNode = new NGramNode(false, multipleFile); | ||
this.children.set(childNode.symbol, childNode); | ||
} | ||
} | ||
} | ||
} | ||
/** | ||
* Merges this NGramNode with the corresponding NGramNode in another NGram. | ||
* @param toBeMerged Parallel NGramNode of the parallel NGram tree. | ||
*/ | ||
merge(toBeMerged) { | ||
@@ -389,2 +399,8 @@ if (this.children != undefined) { | ||
} | ||
/** | ||
* Prunes the NGramNode according to the given threshold. Removes the child(ren) whose probability is less than the | ||
* threshold. | ||
* @param threshold Threshold for pruning the NGram tree. | ||
* @param N N in N-Gram. | ||
*/ | ||
prune(threshold, N) { | ||
@@ -391,0 +407,0 @@ if (N == 0) { |
import { SimpleSmoothing } from "./SimpleSmoothing"; | ||
import { NGram } from "./NGram"; | ||
export declare class NoSmoothing<Symbol> extends SimpleSmoothing<Symbol> { | ||
/** | ||
* Calculates the N-Gram probabilities with no smoothing | ||
* @param nGram N-Gram for which no smoothing is done. | ||
* @param level Height of the NGram node. | ||
*/ | ||
setProbabilitiesWithLevel(nGram: NGram<Symbol>, level: number): void; | ||
} |
@@ -15,2 +15,7 @@ (function (factory) { | ||
class NoSmoothing extends SimpleSmoothing_1.SimpleSmoothing { | ||
/** | ||
* Calculates the N-Gram probabilities with no smoothing | ||
* @param nGram N-Gram for which no smoothing is done. | ||
* @param level Height of the NGram node. | ||
*/ | ||
setProbabilitiesWithLevel(nGram, level) { | ||
@@ -17,0 +22,0 @@ nGram.setProbabilityWithPseudoCount(0.0, level); |
import { NoSmoothing } from "./NoSmoothing"; | ||
import { NGram } from "./NGram"; | ||
export declare class NoSmoothingWithDictionary<Symbol> extends NoSmoothing<Symbol> { | ||
private dictionary; | ||
private readonly dictionary; | ||
/** | ||
@@ -6,0 +6,0 @@ * Constructor of {@link NoSmoothingWithDictionary} |
@@ -5,3 +5,3 @@ import { NoSmoothing } from "./NoSmoothing"; | ||
private dictionary; | ||
private probability; | ||
private readonly probability; | ||
/** | ||
@@ -8,0 +8,0 @@ * Constructor of {@link NoSmoothingWithNonRareWords} |
import { NGram } from "./NGram"; | ||
export declare abstract class SimpleSmoothing<Symbol> { | ||
abstract setProbabilitiesWithLevel(nGram: NGram<Symbol>, level: number): void; | ||
/** | ||
* Calculates the N-Gram probabilities with simple smoothing. | ||
* @param nGram N-Gram for which simple smoothing calculation is done. | ||
*/ | ||
setProbabilities(nGram: NGram<Symbol>): void; | ||
} |
@@ -14,2 +14,6 @@ (function (factory) { | ||
class SimpleSmoothing { | ||
/** | ||
* Calculates the N-Gram probabilities with simple smoothing. | ||
* @param nGram N-Gram for which simple smoothing calculation is done. | ||
*/ | ||
setProbabilities(nGram) { | ||
@@ -16,0 +20,0 @@ this.setProbabilitiesWithLevel(nGram, nGram.getN()); |
{ | ||
"name": "nlptoolkit-ngram", | ||
"version": "1.0.3", | ||
"version": "1.0.4", | ||
"description": "NGram library", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -27,3 +27,3 @@ N-Gram | ||
You can also see [Python](https://github.com/starlangsoftware/NGram-Py), [Java](https://github.com/starlangsoftware/NGram), | ||
[C++](https://github.com/starlangsoftware/NGram-CPP), [Swift](https://github.com/starlangsoftware/NGram-Swift), | ||
[C++](https://github.com/starlangsoftware/NGram-CPP), [C](https://github.com/starlangsoftware/NGram-C), [Swift](https://github.com/starlangsoftware/NGram-Swift), | ||
[Cython](https://github.com/starlangsoftware/NGram-Cy) or [C#](https://github.com/starlangsoftware/NGram-CS) repository. | ||
@@ -30,0 +30,0 @@ |
@@ -8,2 +8,6 @@ import {SimpleSmoothing} from "./SimpleSmoothing"; | ||
/** | ||
* Constructor for Laplace smoothing. Sets the delta. | ||
* @param delta Delta value in Laplace smoothing. | ||
*/ | ||
constructor(delta: number = 1.0) { | ||
@@ -10,0 +14,0 @@ super(); |
@@ -9,2 +9,8 @@ import {FileContents} from "nlptoolkit-util/dist/FileContents"; | ||
/** | ||
* Constructor for {@link MultipleFile} class. Initializes the buffer reader with the first input file | ||
* from the fileNameList. MultipleFile supports simple multipart file system, where a text file is divided | ||
* into multiple files. | ||
* @param args A list of files given as dynamic parameters. | ||
*/ | ||
constructor(... args: Array<any>) { | ||
@@ -16,2 +22,8 @@ this.fileIndex = 0 | ||
/** | ||
* Reads a single line from the current file. If the end of file is reached for the current file, | ||
* next file is opened and a single line from that file is read. If all files are read, the method | ||
* returns null. | ||
* @return Read line from the current file. | ||
*/ | ||
readLine(): string{ | ||
@@ -25,2 +37,6 @@ if (!this.contents.hasNextLine()){ | ||
/** | ||
* Checks if the current file has more lines to be read. | ||
* @returns True if the current file has more lines to be read, false otherwise. | ||
*/ | ||
hasNextLine(): boolean { | ||
@@ -27,0 +43,0 @@ return this.fileIndex != this.fileNameList.length - 1 || this.contents.hasNextLine() |
@@ -40,2 +40,6 @@ import {NGramNode} from "./NGramNode"; | ||
/** | ||
* Reads the header from the input file. | ||
* @param multipleFile Input file | ||
*/ | ||
readHeader(multipleFile: MultipleFile){ | ||
@@ -60,2 +64,7 @@ let line = multipleFile.readLine(); | ||
/** | ||
* Merges current NGram with the given NGram. If N of the two NGram's are not same, it does not | ||
* merge. Merges first the vocabulary, then the NGram trees. | ||
* @param toBeMerged NGram to be merged with. | ||
*/ | ||
merge(toBeMerged: NGram<Symbol>){ | ||
@@ -435,2 +444,7 @@ if (this.N != toBeMerged.getN()){ | ||
/** | ||
* Prunes NGram according to the given threshold. All nodes having a probability less than the threshold will be | ||
* pruned. | ||
* @param threshold Probability threshold used for pruning. | ||
*/ | ||
prune(threshold: number){ | ||
@@ -437,0 +451,0 @@ if (threshold > 0.0 && threshold <= 1.0){ |
@@ -14,2 +14,29 @@ import {MultipleFile} from "./MultipleFile"; | ||
constructor1(symbol: any){ | ||
this.symbol = symbol | ||
this.count = 0 | ||
} | ||
constructor2(symbol: any, multipleFile: MultipleFile) { | ||
if (!symbol) { | ||
this.symbol = <Symbol><unknown>multipleFile.readLine().trim() | ||
} | ||
let line = multipleFile.readLine().trim(); | ||
let items = line.split(" "); | ||
if (items.length != 4){ | ||
Console.log("Error in line -> " + line); | ||
} | ||
this.count = Number(items[0]) | ||
this.probability = Number(items[1]); | ||
this.probabilityOfUnseen = Number(items[2]); | ||
let numberOfChildren = Number(items[3]); | ||
if (numberOfChildren > 0){ | ||
this.children = new Map<Symbol, NGramNode<Symbol>>() | ||
for (let i = 0; i < numberOfChildren; i++){ | ||
let childNode = new NGramNode<Symbol>(false, multipleFile); | ||
this.children.set(childNode.symbol, childNode); | ||
} | ||
} | ||
} | ||
/** | ||
@@ -23,27 +50,12 @@ * Constructor of {@link NGramNode} | ||
if (multipleFile == undefined){ | ||
this.symbol = symbol | ||
this.count = 0 | ||
this.constructor1(symbol); | ||
} else { | ||
if (!symbol) { | ||
this.symbol = <Symbol><unknown>multipleFile.readLine().trim() | ||
} | ||
let line = multipleFile.readLine().trim(); | ||
let items = line.split(" "); | ||
if (items.length != 4){ | ||
Console.log("Error in line -> " + line); | ||
} | ||
this.count = Number(items[0]) | ||
this.probability = Number(items[1]); | ||
this.probabilityOfUnseen = Number(items[2]); | ||
let numberOfChildren = Number(items[3]); | ||
if (numberOfChildren > 0){ | ||
this.children = new Map<Symbol, NGramNode<Symbol>>() | ||
for (let i = 0; i < numberOfChildren; i++){ | ||
let childNode = new NGramNode<Symbol>(false, multipleFile); | ||
this.children.set(childNode.symbol, childNode); | ||
} | ||
} | ||
this.constructor2(symbol, multipleFile); | ||
} | ||
} | ||
/** | ||
* Merges this NGramNode with the corresponding NGramNode in another NGram. | ||
* @param toBeMerged Parallel NGramNode of the parallel NGram tree. | ||
*/ | ||
merge(toBeMerged: NGramNode<Symbol>){ | ||
@@ -387,2 +399,8 @@ if (this.children != undefined){ | ||
/** | ||
* Prunes the NGramNode according to the given threshold. Removes the child(ren) whose probability is less than the | ||
* threshold. | ||
* @param threshold Threshold for pruning the NGram tree. | ||
* @param N N in N-Gram. | ||
*/ | ||
prune(threshold: number, N: number){ | ||
@@ -389,0 +407,0 @@ if (N == 0){ |
@@ -6,2 +6,7 @@ import {SimpleSmoothing} from "./SimpleSmoothing"; | ||
/** | ||
* Calculates the N-Gram probabilities with no smoothing | ||
* @param nGram N-Gram for which no smoothing is done. | ||
* @param level Height of the NGram node. | ||
*/ | ||
public setProbabilitiesWithLevel(nGram: NGram<Symbol>, level: number): void { | ||
@@ -8,0 +13,0 @@ nGram.setProbabilityWithPseudoCount(0.0, level); |
@@ -6,3 +6,3 @@ import {NoSmoothing} from "./NoSmoothing"; | ||
private dictionary: Set<Symbol> | ||
private readonly dictionary: Set<Symbol> | ||
@@ -9,0 +9,0 @@ /** |
@@ -7,3 +7,3 @@ import {NoSmoothing} from "./NoSmoothing"; | ||
private dictionary: Set<Symbol> | ||
private probability: number | ||
private readonly probability: number | ||
@@ -10,0 +10,0 @@ /** |
@@ -6,2 +6,6 @@ import {NGram} from "./NGram"; | ||
/** | ||
* Calculates the N-Gram probabilities with simple smoothing. | ||
* @param nGram N-Gram for which simple smoothing calculation is done. | ||
*/ | ||
public setProbabilities(nGram: NGram<Symbol>){ | ||
@@ -8,0 +12,0 @@ this.setProbabilitiesWithLevel(nGram, nGram.getN()); |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
6848767
3757