jp-verbs - npm Package Compare versions

Comparing version 1.0.0 to 1.0.1

frequencyForWord.json

308

index.js

		@@ -1,26 +0,23 @@
		'use strict'
		/*
		* This module uses the grammar rules listed in derivations.js
		* and recursively applies them to the input string until the
		* verb root is reached. There will most likely be more than
		* one apparently viable deconjugation, so effort is made to
		* sort them according to liklihood.
		*
		* Full project source: https://github.com/mistval/jp-verb-conjugator
		*/

		const derivationTable = require('./derivations.js');
		const WordType = require('./word_type.js');
		const DerivationAttribute = require('./derivation_attribute.js');
		const dictionary = require('./dictionary.json');
		const VerbType = require('./verb_type.js');
		const frequencyForWord = require('./frequencyForWord.json');

		// Create separate sets for suru verbs and non suru verbs.
		/*
		* For performance, map each rule to the conjugated word type that it can follow.
		*/
		const derivationRulesForConjugatedWordType = {};

		let suruVerbSet = {};
		let ichiGoDanVerbSet = {};
		for (let wordInformation of dictionary) {
		if (wordInformation.verbType === VerbType.NON_SURU_VERB) {
		ichiGoDanVerbSet[wordInformation.verb] = true;
		} else {
		suruVerbSet[wordInformation.verb] = true;
		}
		}

		// For performance, map each rule to the conjugated word type that it can follow.

		let derivationRulesForConjugatedWordType = {};

		for (let rule of derivationTable) {
		let conjugatedWordType = rule.conjugatedWordType;
		const conjugatedWordType = rule.conjugatedWordType;
		if (!derivationRulesForConjugatedWordType[conjugatedWordType]) {
		@@ -32,160 +29,189 @@ derivationRulesForConjugatedWordType[conjugatedWordType] = [];

		function isSuru(result) {
		return result.base === 'する';
		function getFrequencyForSuruVerb(word) {
		if (word.endsWith('する')) {
		const suruBase = word.substring(0, word.length - 2);
		return frequencyForWord[suruBase];
		}
		return undefined;
		}

		// This is almost always, but not always, an accurate heuristic.
		function isSuruVerb(result) {
		return result.base.endsWith('する');
		function isNumber(numberCandidate) {
		return typeof numberCandidate === typeof 1;
		}

		function preferTrue(a, b) {
		if (a && !b) {
		function compareFrequency(frequencyA, frequencyB) {
		const frequencyAIsNumber = isNumber(frequencyA);
		const frequencyBIsNumber = isNumber(frequencyB);

		if (frequencyAIsNumber && frequencyBIsNumber) {
		return frequencyA - frequencyB;
		} else if (frequencyAIsNumber) {
		return -1;
		} else if (b && !a) {
		} else if (frequencyBIsNumber) {
		return 1;
		} else {
		return 0;
		}
		return 0;
		}

		function preferSuru(a, b) {
		return preferTrue(isSuru(a), isSuru(b));
		}
		// Sort by the frequency of the base word.
		function sortByLikelihood(results) {
		const resultsCopy = results.slice();
		return resultsCopy.sort((a, b) => {
		const aBase = a.base;
		const bBase = b.base;

		function preferNonSuruVerb(a, b) {
		return preferTrue(!isSuruVerb(a), !isSuruVerb(b));
		// First try comparing the words as-is based on their frequency.
		const strictCompare = compareFrequency(frequencyForWord[aBase], frequencyForWord[bBase]);
		if (strictCompare) {
		return strictCompare;
		}

		// If neither word is preferred as-is, then try comparing the words as suru verbs.
		const suruVerbCompare = compareFrequency(getFrequencyForSuruVerb(aBase), getFrequencyForSuruVerb(bBase));
		if (suruVerbCompare) {
		return suruVerbCompare;
		}

		// If neither word is preferred as-is or when considered as a suru verb, then prefer whichever word is shorter, if either.
		return aBase.length - bBase.length;
		});
		}

		const impossibleSequences = [
		[WordType.POTENTIAL, WordType.POTENTIAL_PASSIVE],
		];
		function getCandidateDerivations(wordType, word) {
		/*
		* SENTENCE is a special word type that allows any
		* derivation whose conjugated word ending matches its
		* ending. So consider the entire derivation table if
		* the word type is SENTENCE.
		*/
		let candidateDerivations;
		if (wordType === WordType.SENTENCE) {
		candidateDerivations = derivationTable;
		} else {
		candidateDerivations = derivationRulesForConjugatedWordType[wordType];
		}

		function hasImpossibleSequence(result) {
		const separator = '\|';
		let pathString = result.derivationPath.join(separator);
		for (let impossibleSequence of impossibleSequences) {
		let impossibleSequenceString = impossibleSequence.join('\|');
		if (pathString.indexOf(impossibleSequenceString) !== -1) {
		return true;
		}
		}
		return false;
		// Return only the derivations whose conjugated endings match the end of the word.
		return candidateDerivations.filter(derivation => word.endsWith(derivation.conjugatedEnding));
		}

		function filterResultsWithImpossibleSequences(results) {
		return results.filter(result => !hasImpossibleSequence(result));
		function derivationIsSilent(derivation) {
		return derivation.attributes && derivation.attributes.indexOf(DerivationAttribute.SILENT) !== -1;
		}

		// This heuristic improves the results,
		// but still makes a lot of mistakes.
		// I think it might be best to sort
		// results by the ranking of the base
		// word in a word frequency list. That's
		// TODO
		function sortByLikelihood(results) {
		results = results.sort((a, b) => {
		let preference = 0;
		preference = preferSuru(a, b);
		if (preference) {
		return preference;
		}
		preference = preferNonSuruVerb(a, b);
		if (preference) {
		return preference;
		}
		return a.derivationPath.length - b.derivationPath.length;
		});
		return results;
		function createNewDerivationSequence() {
		return {
		nonSilentDerivationsTaken: [],
		nonSilentWordFormProgression: [],
		allDerivationsTaken: [],
		};
		}

		function getCandidateDerivationsForWordType(wordType) {
		if (wordType === WordType.SENTENCE) {
		return derivationTable;
		function copyDerivationSequence(derivationSequence) {
		const copy = {};
		for (let key of Object.keys(derivationSequence)) {
		const array = derivationSequence[key];
		copy[key] = array.slice();
		}
		return derivationRulesForConjugatedWordType[wordType];
		return copy;
		}

		function canTakeDerivationPath(word, nextCandidateDerivation) {
		return word.endsWith(nextCandidateDerivation.conjugatedEnding);
		function addDerivationToSequence(derivationSequence, derivation, derivedWord) {
		derivationSequence = copyDerivationSequence(derivationSequence);
		if (!derivationIsSilent(derivation)) {
		derivationSequence.nonSilentDerivationsTaken.push(derivation);
		derivationSequence.nonSilentWordFormProgression.push(derivedWord);
		}

		derivationSequence.allDerivationsTaken.push(derivation);
		return derivationSequence;
		}

		function reduceResultDerivationsToWordTypes(results) {
		for (let result of results) {
		result.derivationPath = result.derivationPath.map(derivation => derivation.conjugatedWordType);
		}
		return results;
		function createDerivationSequenceOutputForm(derivationSequence) {
		/*
		* This module works recursively from the end of the conjugated word, but
		* it makes more sense for the module's output to be in the other direction,
		* hence the reverse() calls.
		*/
		return {
		derivations: derivationSequence.nonSilentDerivationsTaken.slice().reverse().map(derivation => derivation.conjugatedWordType),
		wordFormProgression: derivationSequence.nonSilentWordFormProgression.slice().reverse(),
		};
		}

		function derivationIsSilent(derivation) {
		return derivation.attributes && derivation.attributes.indexOf(DerivationAttribute.SILENT) !== -1;
		function unconjugateWord(word, derivation) {
		// Slice off the conjugated ending and replace it with the unconjugated ending.
		return word.substring(0, word.length - derivation.conjugatedEnding.length) + derivation.unconjugatedEnding;
		}

		class DerivationInformation {
		constructor(derivationPath, derivationSequence) {
		if (derivationPath) {
		this.derivationPath = derivationPath.slice();
		} else {
		this.derivationPath = [];
		}
		function tookInvalidDerivationPath(derivationSequence) {
		const allDerivationsTaken = derivationSequence.allDerivationsTaken;

		if (derivationSequence) {
		this.derivationSequence = derivationSequence.slice();
		} else {
		this.derivationSequence = [];
		/*
		* Check if any derivation in the sequence follows a sequence of derivations
		* that it's not allowed to follow.
		*/
		for (let i = 0; i < allDerivationsTaken.length; ++i) {
		const derivation = allDerivationsTaken[i];
		if (!derivation.cannotFollow) {
		continue;
		}
		}
		for (let forbiddenPredecessorSequence of derivation.cannotFollow) {
		let nextDerivationOffset = 1;

		tryPushDerivation(derivation, word) {
		if (!derivation) {
		this.derivationSequence.push(word);
		return;
		/*
		* The forbidden predecessor sequences are expressed in forward-order in derivations.js,
		* because they are easier to think about that way. But the conjugation code works in
		* reverse order, so we have to consider the forbidden predecessor sequences in reverse
		* order also. So start at the back of the sequence.
		*/
		for (let g = forbiddenPredecessorSequence.length - 1; g >= 0; --g, ++nextDerivationOffset) {
		const nextDerivation = allDerivationsTaken[i + nextDerivationOffset];
		if (!nextDerivation \|\| nextDerivation.conjugatedWordType !== forbiddenPredecessorSequence[g]) {
		break;
		}
		if (g === 0) {
		return true; // A forbidden predecessor sequence was matched. Return true.
		}
		}
		}
		if (!derivationIsSilent(derivation)) {
		this.derivationPath.push(derivation);
		this.derivationSequence.push(word);
		};
		}

		copy() {
		return new DerivationInformation(this.derivationPath, this.derivationSequence);
		}
		return false; // No forbidden predecessor sequence was matched.
		}

		getDerivationPathFinalForm() {
		return this.derivationPath.slice().reverse();
		function unconjugateRecursive(word, wordType, derivationSequence, level, levelLimit) {
		if (tookInvalidDerivationPath(derivationSequence)) {
		return [];
		}

		getDerivationSequenceFinalForm() {
		return this.derivationSequence.slice().reverse();
		}
		}

		function unconjugateRecursive(word, wordType, derivationInformation, level, levelLimit) {
		// Invalid base cases
		if (level > levelLimit) {
		/*
		* Recursion is going too deep, abort.
		*
		* There should not be any potential for infinite recursion,
		* however it is difficult to verify with certainty that
		* there is none. Therefore, a way to break out of the
		* recursion is provided for safety (relying on running out of space
		* on the stack and throwing might take too ling)
		*/
		return [];
		}

		// Valid base case
		// Check if we have reached a potentially valid result, and if so, add it to the results.
		let results = [];
		let isIchiGoDanVerb = ichiGoDanVerbSet[word] === true;
		let isSuruVerb = word.endsWith('する') && suruVerbSet[word.replace('する', '')] === true;
		let isDictionaryForm = wordType === WordType.GODAN_VERB \|\| wordType === WordType.ICHIDAN_VERB \|\| wordType === WordType.SENTENCE;
		if ((isIchiGoDanVerb \|\| isSuruVerb) && isDictionaryForm) {
		let nextDerivationInformation = derivationInformation.copy();
		nextDerivationInformation.tryPushDerivation(undefined, word);
		const isDictionaryForm = wordType === WordType.GODAN_VERB \|\| wordType === WordType.ICHIDAN_VERB \|\| wordType === WordType.SENTENCE;
		if (isDictionaryForm) {
		const derivationSequenceOutputForm = createDerivationSequenceOutputForm(derivationSequence);
		results.push({
		base: word,
		derivationPath: nextDerivationInformation.getDerivationPathFinalForm(),
		currentDerivationSequence: nextDerivationInformation.getDerivationSequenceFinalForm()});
		derivationSequence: derivationSequenceOutputForm,
		});
		}

		// Take possible derivation paths
		for (let candidateDerivation of getCandidateDerivationsForWordType(wordType)) {
		if (canTakeDerivationPath(word, candidateDerivation)) {
		let nextDerivationInformation = derivationInformation.copy();
		nextDerivationInformation.tryPushDerivation(candidateDerivation, word);
		let unconjugatedWord = word.substring(0, word.length - candidateDerivation.conjugatedEnding.length) + candidateDerivation.unconjugatedEnding;
		results = results.concat(unconjugateRecursive(unconjugatedWord, candidateDerivation.unconjugatedWordType, nextDerivationInformation, level + 1, levelLimit));
		}
		// Take possible derivation paths and recurse.
		for (let candidateDerivation of getCandidateDerivations(wordType, word)) {
		const nextDerivationSequence = addDerivationToSequence(derivationSequence, candidateDerivation, word);
		const unconjugatedWord = unconjugateWord(word, candidateDerivation);
		results = results.concat(unconjugateRecursive(unconjugatedWord, candidateDerivation.unconjugatedWordType, nextDerivationSequence, level + 1, levelLimit));
		}
		@@ -195,3 +221,8 @@ return results;

		function removeLastCharacter(str) {
		return str.substring(0, str.length - 1);
		}

		module.exports.unconjugate = function(word, fuzzy, recursionDepthLimit) {
		// Handle the 'recursionDepthLimit' argument being passed as the second argument, and the 'fuzzy' argument being omitted.
		if (typeof fuzzy === typeof 1) {
		@@ -201,17 +232,16 @@ recursionDepthLimit = fuzzy;
		}

		fuzzy = !!fuzzy;
		recursionDepthLimit = recursionDepthLimit \|\| 999999;
		let results = unconjugateRecursive(word, WordType.SENTENCE, new DerivationInformation(), 0, recursionDepthLimit);
		recursionDepthLimit = recursionDepthLimit \|\| Math.MAX_SAFE_INTEGER;
		const results = unconjugateRecursive(word, WordType.SENTENCE, createNewDerivationSequence(), 0, recursionDepthLimit);

		// If there are no results but the search should be fuzzy, chop off the last character one by one and see if we can get a substring that has results
		if (fuzzy && results.length === 0) {
		// Chop off the last character one by one and see if we can get a substring that has results
		let truncatedWord = word.substring(0, word.length - 1);
		const truncatedWord = removeLastCharacter(word);
		while (truncatedWord && results.length === 0) {
		results = unconjugateRecursive(truncatedWord, WordType.SENTENCE, new DerivationInformation(), 0, recursionDepthLimit);
		truncatedWord = truncatedWord.substring(0, truncatedWord.length - 1);
		results = unconjugateRecursive(truncatedWord, WordType.SENTENCE, createNewDerivationSequence(), 0, recursionDepthLimit);
		truncatedWord = removeLastCharacter(truncatedWord);
		}
		}

		results = reduceResultDerivationsToWordTypes(results);
		results = filterResultsWithImpossibleSequences(results);
		return sortByLikelihood(results);
		@@ -218,0 +248,0 @@ }

package.json

		{
		"name": "jp-verbs",
		"version": "1.0.0",
		"version": "1.0.1",
		"description": "Unconjugate conjugated Japanese verbs.",
		@@ -11,3 +11,3 @@ "main": "index.js",
		"type": "git",
		"url": "git+https://github.com/mistval/jp-verb-conjugator.git"
		"url": "git+https://github.com/mistval/jp-verb-deconjugator.git"
		},
		@@ -25,9 +25,9 @@ "keywords": [
		],
		"author": "Randall Schmidt",
		"author": "mistval",
		"license": "MIT",
		"bugs": {
		"url": "https://github.com/mistval/jp-verb-conjugator/issues"
		"url": "https://github.com/mistval/jp-verb-deconjugator/issues"
		},
		"homepage": "https://github.com/mistval/jp-verb-conjugator#readme",
		"homepage": "https://github.com/mistval/jp-verb-deconjugator#readme",
		"dependencies": {}
		}

README.md

		@@ -57,4 +57,2 @@ # jp-verb-conjugator

		100% of the code is covered by tests, however not every rule is covered.

		After installing nyc and mocha globally, run tests with ```npm test```
		@@ -61,0 +59,0 @@

test/tests.js

		@@ -26,5 +26,5 @@ const Conjugator = require('./../index.js');
		const desiredDerivationPathString = derivationPath.join(combinationCharacter);
		const actualDerivationPathString = bestResult.derivationPath.join(combinationCharacter);
		const actualDerivationPathString = bestResult.derivationSequence.derivations.join(combinationCharacter);

		for (let derivation of derivationPath.concat(bestResult.derivationPath)) {
		for (let derivation of derivationPath.concat(bestResult.derivationSequence.derivations)) {
		if (!derivation) {
		@@ -49,3 +49,3 @@ throw new Error('undefined derivation name for word ' + wordToDeconjugate);
		assertDerivationPathMatches('誘ってもらわれてくれなかった', '誘う', WordType.TE_FORM, WordType.MORAU, WordType.PASSIVE, WordType.TE_FORM, WordType.KURERU, WordType.NEGATIVE_NAI_VERB, WordType.PLAIN_PAST);
		assertDerivationPathMatches('遊んでるべく', '遊ぶ', WordType.TE_FORM, WordType.IRU, WordType.BEKU);
		assertDerivationPathMatches('遊んでるべく', '遊ぶ', WordType.TE_FORM, WordType.SHORT_IRU, WordType.BEKU);
		assertDerivationPathMatches('敷きやがりなさい', '敷く', WordType.MASU_STEM, WordType.YAGARU, WordType.MASU_STEM, WordType.NASAI);
		@@ -111,3 +111,3 @@ assertDerivationPathMatches('重なり次第だ', '重なる', WordType.MASU_STEM, WordType.SHIDAI, WordType.DA);
		assertDerivationPathMatches('帰ったのだよ', '帰る', WordType.PLAIN_PAST, WordType.EXPLANATORY_NO_PARTICLE, WordType.DA, WordType.YO_PARTICLE);
		assertDerivationPathMatches('殺されるな', '殺す', WordType.PASSIVE, WordType.NA_COMMAND);
		assertDerivationPathMatches('殺されるな', '殺す', WordType.PASSIVE, WordType.NA_PARTICLE);
		assertDerivationPathMatches('はしゃぐことがあることがあるだろうよ', 'はしゃぐ', WordType.OCCASIONAL_OCCURANCE_ARU, WordType.OCCASIONAL_OCCURANCE_ARU, WordType.DAROU, WordType.YO_PARTICLE);
		@@ -147,7 +147,7 @@ assertDerivationPathMatches('止めることができる', '止める', WordType.POTENTIAL);
		assertDerivationPathMatches('語らないです', '語る', WordType.NEGATIVE_NAI_VERB, WordType.POLITE_DESU_VERB);
		assertDerivationPathMatches('弾けない', '弾ける', WordType.NEGATIVE_NAI_VERB);
		assertDerivationPathMatches('弾けない', '弾く', WordType.POTENTIAL, WordType.NEGATIVE_NAI_VERB);
		assertDerivationPathMatches('集まりました', '集まる', WordType.MASU_STEM, WordType.POLITE_MASU, WordType.POLITE_MASHITA);
		assertDerivationPathMatches('信じました', '信じる', WordType.MASU_STEM, WordType.POLITE_MASU, WordType.POLITE_MASHITA);
		assertDerivationPathMatches('笑いませんでした', '笑う', WordType.MASU_STEM, WordType.POLITE_MASEN, WordType.POLITE_MASEN_DESHITA);
		assertDerivationPathMatches('放った', '放つ', WordType.PLAIN_PAST);
		assertDerivationPathMatches('放った', '放る', WordType.PLAIN_PAST);
		assertDerivationPathMatches('覚えた', '覚える', WordType.PLAIN_PAST);
		@@ -171,3 +171,3 @@ assertDerivationPathMatches('言わなかった', '言う', WordType.NEGATIVE_NAI_VERB, WordType.PLAIN_PAST);
		assertDerivationPathMatches('黙れ', '黙る', WordType.IMPERATIVE);
		assertDerivationPathMatches('いろ', 'いる', WordType.IMPERATIVE);
		//assertDerivationPathMatches('いろ', 'いる', WordType.IMPERATIVE);
		assertDerivationPathMatches('食べよう', '食べる', WordType.VOLITIONAL);
		@@ -222,6 +222,5 @@ assertDerivationPathMatches('殺されましょう', '殺す', WordType.PASSIVE, WordType.MASU_STEM, WordType.POLITE_MASU, WordType.POLITE_MASHOU);
		assertDerivationPathMatches('書かれてあった', '書く', WordType.PASSIVE, WordType.TE_FORM, WordType.ARU, WordType.PLAIN_PAST);
		assertDerivationPathMatches('書かれてなかった', '書く', WordType.PASSIVE, WordType.TE_FORM, WordType.NEGATIVE_ARU_OR_IRU, WordType.PLAIN_PAST);
		assertDerivationPathMatches('書かれてなかった', '書く', WordType.PASSIVE, WordType.TE_FORM, WordType.ARU, WordType.NEGATIVE_NAI_VERB, WordType.PLAIN_PAST);
		assertDerivationPathMatches('撫でさせられていさせなさい', '撫でる', WordType.CAUSATIVE, WordType.POTENTIAL_PASSIVE, WordType.TE_FORM, WordType.IRU, WordType.CAUSATIVE, WordType.MASU_STEM, WordType.NASAI);
		assertDerivationPathMatches('撫でさせられていさせな', '撫でる', WordType.CAUSATIVE, WordType.POTENTIAL_PASSIVE, WordType.TE_FORM, WordType.IRU, WordType.CAUSATIVE, WordType.MASU_STEM, WordType.NASAI);
		assertDerivationPathMatches('撫でさせられていさせな', '撫でる', WordType.CAUSATIVE, WordType.POTENTIAL_PASSIVE, WordType.TE_FORM, WordType.IRU, WordType.CAUSATIVE, WordType.MASU_STEM, WordType.NASAI);
		assertDerivationPathMatches('撫でさせられてはいさせな', '撫でる', WordType.CAUSATIVE, WordType.POTENTIAL_PASSIVE, WordType.TE_FORM, WordType.WA_AFTER_TE, WordType.IRU, WordType.CAUSATIVE, WordType.MASU_STEM, WordType.NASAI);
		@@ -240,3 +239,2 @@ assertDerivationPathMatches('書かず', '書く', WordType.ZU);
		assertDerivationPathMatches('しませんでした', 'する', WordType.MASU_STEM, WordType.POLITE_MASEN, WordType.POLITE_MASEN_DESHITA);
		assertDerivationPathMatches('しませんでした', 'する', WordType.MASU_STEM, WordType.POLITE_MASEN, WordType.POLITE_MASEN_DESHITA);
		assertDerivationPathMatches('為さいませんでした', '為さる', WordType.MASU_STEM, WordType.POLITE_MASEN, WordType.POLITE_MASEN_DESHITA);
		@@ -265,6 +263,6 @@ assertDerivationPathMatches('書いてください', '書く', WordType.TE_FORM, WordType.KUDASAI);
		it('Respects maximum recursion depth', function() {
		assertDerivationPathMatches('話してあるでしょう', '話す', WordType.TE_FORM, WordType.ARU, WordType.DAROU, WordType.POLITE_DESHOU);
		let resultsWithMaximumRecursionDepth = Conjugator.unconjugate('話してあるでしょう', 3);
		assert(resultsWithMaximumRecursionDepth.length === 0);
		//assertDerivationPathMatches('撫でさせられぬよね', '撫でる', WordType.CAUSATIVE, WordType.POTENTIAL_PASSIVE, WordType.NEGATIVE_NAI_VERB, WordType.NU, WordType.YO_PARTICLE, WordType.NE_PARTICLE);
		//let resultsWithMaximumRecursionDepth = Conjugator.unconjugate('撫でさせられぬよね', 3);
		//assert(resultsWithMaximumRecursionDepth.length === 0);
		});
		});

word_type.js

		@@ -113,7 +113,8 @@ 'use strict'
		BEKU: 'べく In Order To',
		GODAN_VERB: 'godan verb (silent)',
		ICHIDAN_VERB: 'ichidan verb (silent)',
		ADJECTIVE: 'adjective (silent)',
		GODAN_VERB: 'godan verb',
		ICHIDAN_VERB: 'ichidan verb',
		ADJECTIVE: 'adjective',
		SENTENCE_ENDING_PARTICLES: 'sentence ending particles (silent)',
		NEGATIVE_ARU_OR_IRU: 'Negative ある or いる',
		SHORT_IRU: 'てる・でる Continuing State/Result',
		};

.nyc_output/1cd7db40be0dcad39abd733004c52601.json

.nyc_output/30ba1e4e3143fc93f45a84dcac35640b.json

dictionary.json

verb_type.js

derivations.js

Sorry, the diff of this file is too big to display

jp-verbs - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics