Socket
Socket
Sign inDemoInstall

jp-verbs

Package Overview
Dependencies
Maintainers
1
Versions
3
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

jp-verbs - npm Package Compare versions

Comparing version 1.0.0 to 1.0.1

frequencyForWord.json

308

index.js

@@ -1,26 +0,23 @@

'use strict'
/*
* This module uses the grammar rules listed in derivations.js
* and recursively applies them to the input string until the
* verb root is reached. There will most likely be more than
* one apparently viable deconjugation, so effort is made to
* sort them according to liklihood.
*
* Full project source: https://github.com/mistval/jp-verb-conjugator
*/
const derivationTable = require('./derivations.js');
const WordType = require('./word_type.js');
const DerivationAttribute = require('./derivation_attribute.js');
const dictionary = require('./dictionary.json');
const VerbType = require('./verb_type.js');
const frequencyForWord = require('./frequencyForWord.json');
// Create separate sets for suru verbs and non suru verbs.
/*
* For performance, map each rule to the conjugated word type that it can follow.
*/
const derivationRulesForConjugatedWordType = {};
let suruVerbSet = {};
let ichiGoDanVerbSet = {};
for (let wordInformation of dictionary) {
if (wordInformation.verbType === VerbType.NON_SURU_VERB) {
ichiGoDanVerbSet[wordInformation.verb] = true;
} else {
suruVerbSet[wordInformation.verb] = true;
}
}
// For performance, map each rule to the conjugated word type that it can follow.
let derivationRulesForConjugatedWordType = {};
for (let rule of derivationTable) {
let conjugatedWordType = rule.conjugatedWordType;
const conjugatedWordType = rule.conjugatedWordType;
if (!derivationRulesForConjugatedWordType[conjugatedWordType]) {

@@ -32,160 +29,189 @@ derivationRulesForConjugatedWordType[conjugatedWordType] = [];

function isSuru(result) {
return result.base === 'する';
function getFrequencyForSuruVerb(word) {
if (word.endsWith('する')) {
const suruBase = word.substring(0, word.length - 2);
return frequencyForWord[suruBase];
}
return undefined;
}
// This is almost always, but not always, an accurate heuristic.
function isSuruVerb(result) {
return result.base.endsWith('する');
function isNumber(numberCandidate) {
return typeof numberCandidate === typeof 1;
}
function preferTrue(a, b) {
if (a && !b) {
function compareFrequency(frequencyA, frequencyB) {
const frequencyAIsNumber = isNumber(frequencyA);
const frequencyBIsNumber = isNumber(frequencyB);
if (frequencyAIsNumber && frequencyBIsNumber) {
return frequencyA - frequencyB;
} else if (frequencyAIsNumber) {
return -1;
} else if (b && !a) {
} else if (frequencyBIsNumber) {
return 1;
} else {
return 0;
}
return 0;
}
function preferSuru(a, b) {
return preferTrue(isSuru(a), isSuru(b));
}
// Sort by the frequency of the base word.
function sortByLikelihood(results) {
const resultsCopy = results.slice();
return resultsCopy.sort((a, b) => {
const aBase = a.base;
const bBase = b.base;
function preferNonSuruVerb(a, b) {
return preferTrue(!isSuruVerb(a), !isSuruVerb(b));
// First try comparing the words as-is based on their frequency.
const strictCompare = compareFrequency(frequencyForWord[aBase], frequencyForWord[bBase]);
if (strictCompare) {
return strictCompare;
}
// If neither word is preferred as-is, then try comparing the words as suru verbs.
const suruVerbCompare = compareFrequency(getFrequencyForSuruVerb(aBase), getFrequencyForSuruVerb(bBase));
if (suruVerbCompare) {
return suruVerbCompare;
}
// If neither word is preferred as-is or when considered as a suru verb, then prefer whichever word is shorter, if either.
return aBase.length - bBase.length;
});
}
const impossibleSequences = [
[WordType.POTENTIAL, WordType.POTENTIAL_PASSIVE],
];
function getCandidateDerivations(wordType, word) {
/*
* SENTENCE is a special word type that allows any
* derivation whose conjugated word ending matches its
* ending. So consider the entire derivation table if
* the word type is SENTENCE.
*/
let candidateDerivations;
if (wordType === WordType.SENTENCE) {
candidateDerivations = derivationTable;
} else {
candidateDerivations = derivationRulesForConjugatedWordType[wordType];
}
function hasImpossibleSequence(result) {
const separator = '|';
let pathString = result.derivationPath.join(separator);
for (let impossibleSequence of impossibleSequences) {
let impossibleSequenceString = impossibleSequence.join('|');
if (pathString.indexOf(impossibleSequenceString) !== -1) {
return true;
}
}
return false;
// Return only the derivations whose conjugated endings match the end of the word.
return candidateDerivations.filter(derivation => word.endsWith(derivation.conjugatedEnding));
}
function filterResultsWithImpossibleSequences(results) {
return results.filter(result => !hasImpossibleSequence(result));
function derivationIsSilent(derivation) {
return derivation.attributes && derivation.attributes.indexOf(DerivationAttribute.SILENT) !== -1;
}
// This heuristic improves the results,
// but still makes a lot of mistakes.
// I think it might be best to sort
// results by the ranking of the base
// word in a word frequency list. That's
// TODO
function sortByLikelihood(results) {
results = results.sort((a, b) => {
let preference = 0;
preference = preferSuru(a, b);
if (preference) {
return preference;
}
preference = preferNonSuruVerb(a, b);
if (preference) {
return preference;
}
return a.derivationPath.length - b.derivationPath.length;
});
return results;
function createNewDerivationSequence() {
return {
nonSilentDerivationsTaken: [],
nonSilentWordFormProgression: [],
allDerivationsTaken: [],
};
}
function getCandidateDerivationsForWordType(wordType) {
if (wordType === WordType.SENTENCE) {
return derivationTable;
function copyDerivationSequence(derivationSequence) {
const copy = {};
for (let key of Object.keys(derivationSequence)) {
const array = derivationSequence[key];
copy[key] = array.slice();
}
return derivationRulesForConjugatedWordType[wordType];
return copy;
}
function canTakeDerivationPath(word, nextCandidateDerivation) {
return word.endsWith(nextCandidateDerivation.conjugatedEnding);
function addDerivationToSequence(derivationSequence, derivation, derivedWord) {
derivationSequence = copyDerivationSequence(derivationSequence);
if (!derivationIsSilent(derivation)) {
derivationSequence.nonSilentDerivationsTaken.push(derivation);
derivationSequence.nonSilentWordFormProgression.push(derivedWord);
}
derivationSequence.allDerivationsTaken.push(derivation);
return derivationSequence;
}
function reduceResultDerivationsToWordTypes(results) {
for (let result of results) {
result.derivationPath = result.derivationPath.map(derivation => derivation.conjugatedWordType);
}
return results;
function createDerivationSequenceOutputForm(derivationSequence) {
/*
* This module works recursively from the end of the conjugated word, but
* it makes more sense for the module's output to be in the other direction,
* hence the reverse() calls.
*/
return {
derivations: derivationSequence.nonSilentDerivationsTaken.slice().reverse().map(derivation => derivation.conjugatedWordType),
wordFormProgression: derivationSequence.nonSilentWordFormProgression.slice().reverse(),
};
}
function derivationIsSilent(derivation) {
return derivation.attributes && derivation.attributes.indexOf(DerivationAttribute.SILENT) !== -1;
function unconjugateWord(word, derivation) {
// Slice off the conjugated ending and replace it with the unconjugated ending.
return word.substring(0, word.length - derivation.conjugatedEnding.length) + derivation.unconjugatedEnding;
}
class DerivationInformation {
constructor(derivationPath, derivationSequence) {
if (derivationPath) {
this.derivationPath = derivationPath.slice();
} else {
this.derivationPath = [];
}
function tookInvalidDerivationPath(derivationSequence) {
const allDerivationsTaken = derivationSequence.allDerivationsTaken;
if (derivationSequence) {
this.derivationSequence = derivationSequence.slice();
} else {
this.derivationSequence = [];
/*
* Check if any derivation in the sequence follows a sequence of derivations
* that it's not allowed to follow.
*/
for (let i = 0; i < allDerivationsTaken.length; ++i) {
const derivation = allDerivationsTaken[i];
if (!derivation.cannotFollow) {
continue;
}
}
for (let forbiddenPredecessorSequence of derivation.cannotFollow) {
let nextDerivationOffset = 1;
tryPushDerivation(derivation, word) {
if (!derivation) {
this.derivationSequence.push(word);
return;
/*
* The forbidden predecessor sequences are expressed in forward-order in derivations.js,
* because they are easier to think about that way. But the conjugation code works in
* reverse order, so we have to consider the forbidden predecessor sequences in reverse
* order also. So start at the back of the sequence.
*/
for (let g = forbiddenPredecessorSequence.length - 1; g >= 0; --g, ++nextDerivationOffset) {
const nextDerivation = allDerivationsTaken[i + nextDerivationOffset];
if (!nextDerivation || nextDerivation.conjugatedWordType !== forbiddenPredecessorSequence[g]) {
break;
}
if (g === 0) {
return true; // A forbidden predecessor sequence was matched. Return true.
}
}
}
if (!derivationIsSilent(derivation)) {
this.derivationPath.push(derivation);
this.derivationSequence.push(word);
};
}
copy() {
return new DerivationInformation(this.derivationPath, this.derivationSequence);
}
return false; // No forbidden predecessor sequence was matched.
}
getDerivationPathFinalForm() {
return this.derivationPath.slice().reverse();
function unconjugateRecursive(word, wordType, derivationSequence, level, levelLimit) {
if (tookInvalidDerivationPath(derivationSequence)) {
return [];
}
getDerivationSequenceFinalForm() {
return this.derivationSequence.slice().reverse();
}
}
function unconjugateRecursive(word, wordType, derivationInformation, level, levelLimit) {
// Invalid base cases
if (level > levelLimit) {
/*
* Recursion is going too deep, abort.
*
* There should not be any potential for infinite recursion,
* however it is difficult to verify with certainty that
* there is none. Therefore, a way to break out of the
* recursion is provided for safety (relying on running out of space
* on the stack and throwing might take too ling)
*/
return [];
}
// Valid base case
// Check if we have reached a potentially valid result, and if so, add it to the results.
let results = [];
let isIchiGoDanVerb = ichiGoDanVerbSet[word] === true;
let isSuruVerb = word.endsWith('する') && suruVerbSet[word.replace('する', '')] === true;
let isDictionaryForm = wordType === WordType.GODAN_VERB || wordType === WordType.ICHIDAN_VERB || wordType === WordType.SENTENCE;
if ((isIchiGoDanVerb || isSuruVerb) && isDictionaryForm) {
let nextDerivationInformation = derivationInformation.copy();
nextDerivationInformation.tryPushDerivation(undefined, word);
const isDictionaryForm = wordType === WordType.GODAN_VERB || wordType === WordType.ICHIDAN_VERB || wordType === WordType.SENTENCE;
if (isDictionaryForm) {
const derivationSequenceOutputForm = createDerivationSequenceOutputForm(derivationSequence);
results.push({
base: word,
derivationPath: nextDerivationInformation.getDerivationPathFinalForm(),
currentDerivationSequence: nextDerivationInformation.getDerivationSequenceFinalForm()});
derivationSequence: derivationSequenceOutputForm,
});
}
// Take possible derivation paths
for (let candidateDerivation of getCandidateDerivationsForWordType(wordType)) {
if (canTakeDerivationPath(word, candidateDerivation)) {
let nextDerivationInformation = derivationInformation.copy();
nextDerivationInformation.tryPushDerivation(candidateDerivation, word);
let unconjugatedWord = word.substring(0, word.length - candidateDerivation.conjugatedEnding.length) + candidateDerivation.unconjugatedEnding;
results = results.concat(unconjugateRecursive(unconjugatedWord, candidateDerivation.unconjugatedWordType, nextDerivationInformation, level + 1, levelLimit));
}
// Take possible derivation paths and recurse.
for (let candidateDerivation of getCandidateDerivations(wordType, word)) {
const nextDerivationSequence = addDerivationToSequence(derivationSequence, candidateDerivation, word);
const unconjugatedWord = unconjugateWord(word, candidateDerivation);
results = results.concat(unconjugateRecursive(unconjugatedWord, candidateDerivation.unconjugatedWordType, nextDerivationSequence, level + 1, levelLimit));
}

@@ -195,3 +221,8 @@ return results;

function removeLastCharacter(str) {
return str.substring(0, str.length - 1);
}
module.exports.unconjugate = function(word, fuzzy, recursionDepthLimit) {
// Handle the 'recursionDepthLimit' argument being passed as the second argument, and the 'fuzzy' argument being omitted.
if (typeof fuzzy === typeof 1) {

@@ -201,17 +232,16 @@ recursionDepthLimit = fuzzy;

}
fuzzy = !!fuzzy;
recursionDepthLimit = recursionDepthLimit || 999999;
let results = unconjugateRecursive(word, WordType.SENTENCE, new DerivationInformation(), 0, recursionDepthLimit);
recursionDepthLimit = recursionDepthLimit || Math.MAX_SAFE_INTEGER;
const results = unconjugateRecursive(word, WordType.SENTENCE, createNewDerivationSequence(), 0, recursionDepthLimit);
// If there are no results but the search should be fuzzy, chop off the last character one by one and see if we can get a substring that has results
if (fuzzy && results.length === 0) {
// Chop off the last character one by one and see if we can get a substring that has results
let truncatedWord = word.substring(0, word.length - 1);
const truncatedWord = removeLastCharacter(word);
while (truncatedWord && results.length === 0) {
results = unconjugateRecursive(truncatedWord, WordType.SENTENCE, new DerivationInformation(), 0, recursionDepthLimit);
truncatedWord = truncatedWord.substring(0, truncatedWord.length - 1);
results = unconjugateRecursive(truncatedWord, WordType.SENTENCE, createNewDerivationSequence(), 0, recursionDepthLimit);
truncatedWord = removeLastCharacter(truncatedWord);
}
}
results = reduceResultDerivationsToWordTypes(results);
results = filterResultsWithImpossibleSequences(results);
return sortByLikelihood(results);

@@ -218,0 +248,0 @@ }

{
"name": "jp-verbs",
"version": "1.0.0",
"version": "1.0.1",
"description": "Unconjugate conjugated Japanese verbs.",

@@ -11,3 +11,3 @@ "main": "index.js",

"type": "git",
"url": "git+https://github.com/mistval/jp-verb-conjugator.git"
"url": "git+https://github.com/mistval/jp-verb-deconjugator.git"
},

@@ -25,9 +25,9 @@ "keywords": [

],
"author": "Randall Schmidt",
"author": "mistval",
"license": "MIT",
"bugs": {
"url": "https://github.com/mistval/jp-verb-conjugator/issues"
"url": "https://github.com/mistval/jp-verb-deconjugator/issues"
},
"homepage": "https://github.com/mistval/jp-verb-conjugator#readme",
"homepage": "https://github.com/mistval/jp-verb-deconjugator#readme",
"dependencies": {}
}

@@ -57,4 +57,2 @@ # jp-verb-conjugator

100% of the code is covered by tests, however not every rule is covered.
After installing nyc and mocha globally, run tests with ```npm test```

@@ -61,0 +59,0 @@

@@ -26,5 +26,5 @@ const Conjugator = require('./../index.js');

const desiredDerivationPathString = derivationPath.join(combinationCharacter);
const actualDerivationPathString = bestResult.derivationPath.join(combinationCharacter);
const actualDerivationPathString = bestResult.derivationSequence.derivations.join(combinationCharacter);
for (let derivation of derivationPath.concat(bestResult.derivationPath)) {
for (let derivation of derivationPath.concat(bestResult.derivationSequence.derivations)) {
if (!derivation) {

@@ -49,3 +49,3 @@ throw new Error('undefined derivation name for word ' + wordToDeconjugate);

assertDerivationPathMatches('誘ってもらわれてくれなかった', '誘う', WordType.TE_FORM, WordType.MORAU, WordType.PASSIVE, WordType.TE_FORM, WordType.KURERU, WordType.NEGATIVE_NAI_VERB, WordType.PLAIN_PAST);
assertDerivationPathMatches('遊んでるべく', '遊ぶ', WordType.TE_FORM, WordType.IRU, WordType.BEKU);
assertDerivationPathMatches('遊んでるべく', '遊ぶ', WordType.TE_FORM, WordType.SHORT_IRU, WordType.BEKU);
assertDerivationPathMatches('敷きやがりなさい', '敷く', WordType.MASU_STEM, WordType.YAGARU, WordType.MASU_STEM, WordType.NASAI);

@@ -111,3 +111,3 @@ assertDerivationPathMatches('重なり次第だ', '重なる', WordType.MASU_STEM, WordType.SHIDAI, WordType.DA);

assertDerivationPathMatches('帰ったのだよ', '帰る', WordType.PLAIN_PAST, WordType.EXPLANATORY_NO_PARTICLE, WordType.DA, WordType.YO_PARTICLE);
assertDerivationPathMatches('殺されるな', '殺す', WordType.PASSIVE, WordType.NA_COMMAND);
assertDerivationPathMatches('殺されるな', '殺す', WordType.PASSIVE, WordType.NA_PARTICLE);
assertDerivationPathMatches('はしゃぐことがあることがあるだろうよ', 'はしゃぐ', WordType.OCCASIONAL_OCCURANCE_ARU, WordType.OCCASIONAL_OCCURANCE_ARU, WordType.DAROU, WordType.YO_PARTICLE);

@@ -147,7 +147,7 @@ assertDerivationPathMatches('止めることができる', '止める', WordType.POTENTIAL);

assertDerivationPathMatches('語らないです', '語る', WordType.NEGATIVE_NAI_VERB, WordType.POLITE_DESU_VERB);
assertDerivationPathMatches('弾けない', '弾ける', WordType.NEGATIVE_NAI_VERB);
assertDerivationPathMatches('弾けない', '弾く', WordType.POTENTIAL, WordType.NEGATIVE_NAI_VERB);
assertDerivationPathMatches('集まりました', '集まる', WordType.MASU_STEM, WordType.POLITE_MASU, WordType.POLITE_MASHITA);
assertDerivationPathMatches('信じました', '信じる', WordType.MASU_STEM, WordType.POLITE_MASU, WordType.POLITE_MASHITA);
assertDerivationPathMatches('笑いませんでした', '笑う', WordType.MASU_STEM, WordType.POLITE_MASEN, WordType.POLITE_MASEN_DESHITA);
assertDerivationPathMatches('放った', '放つ', WordType.PLAIN_PAST);
assertDerivationPathMatches('放った', '放る', WordType.PLAIN_PAST);
assertDerivationPathMatches('覚えた', '覚える', WordType.PLAIN_PAST);

@@ -171,3 +171,3 @@ assertDerivationPathMatches('言わなかった', '言う', WordType.NEGATIVE_NAI_VERB, WordType.PLAIN_PAST);

assertDerivationPathMatches('黙れ', '黙る', WordType.IMPERATIVE);
assertDerivationPathMatches('いろ', 'いる', WordType.IMPERATIVE);
//assertDerivationPathMatches('いろ', 'いる', WordType.IMPERATIVE);
assertDerivationPathMatches('食べよう', '食べる', WordType.VOLITIONAL);

@@ -222,6 +222,5 @@ assertDerivationPathMatches('殺されましょう', '殺す', WordType.PASSIVE, WordType.MASU_STEM, WordType.POLITE_MASU, WordType.POLITE_MASHOU);

assertDerivationPathMatches('書かれてあった', '書く', WordType.PASSIVE, WordType.TE_FORM, WordType.ARU, WordType.PLAIN_PAST);
assertDerivationPathMatches('書かれてなかった', '書く', WordType.PASSIVE, WordType.TE_FORM, WordType.NEGATIVE_ARU_OR_IRU, WordType.PLAIN_PAST);
assertDerivationPathMatches('書かれてなかった', '書く', WordType.PASSIVE, WordType.TE_FORM, WordType.ARU, WordType.NEGATIVE_NAI_VERB, WordType.PLAIN_PAST);
assertDerivationPathMatches('撫でさせられていさせなさい', '撫でる', WordType.CAUSATIVE, WordType.POTENTIAL_PASSIVE, WordType.TE_FORM, WordType.IRU, WordType.CAUSATIVE, WordType.MASU_STEM, WordType.NASAI);
assertDerivationPathMatches('撫でさせられていさせな', '撫でる', WordType.CAUSATIVE, WordType.POTENTIAL_PASSIVE, WordType.TE_FORM, WordType.IRU, WordType.CAUSATIVE, WordType.MASU_STEM, WordType.NASAI);
assertDerivationPathMatches('撫でさせられていさせな', '撫でる', WordType.CAUSATIVE, WordType.POTENTIAL_PASSIVE, WordType.TE_FORM, WordType.IRU, WordType.CAUSATIVE, WordType.MASU_STEM, WordType.NASAI);
assertDerivationPathMatches('撫でさせられてはいさせな', '撫でる', WordType.CAUSATIVE, WordType.POTENTIAL_PASSIVE, WordType.TE_FORM, WordType.WA_AFTER_TE, WordType.IRU, WordType.CAUSATIVE, WordType.MASU_STEM, WordType.NASAI);

@@ -240,3 +239,2 @@ assertDerivationPathMatches('書かず', '書く', WordType.ZU);

assertDerivationPathMatches('しませんでした', 'する', WordType.MASU_STEM, WordType.POLITE_MASEN, WordType.POLITE_MASEN_DESHITA);
assertDerivationPathMatches('しませんでした', 'する', WordType.MASU_STEM, WordType.POLITE_MASEN, WordType.POLITE_MASEN_DESHITA);
assertDerivationPathMatches('為さいませんでした', '為さる', WordType.MASU_STEM, WordType.POLITE_MASEN, WordType.POLITE_MASEN_DESHITA);

@@ -265,6 +263,6 @@ assertDerivationPathMatches('書いてください', '書く', WordType.TE_FORM, WordType.KUDASAI);

it('Respects maximum recursion depth', function() {
assertDerivationPathMatches('話してあるでしょう', '話す', WordType.TE_FORM, WordType.ARU, WordType.DAROU, WordType.POLITE_DESHOU);
let resultsWithMaximumRecursionDepth = Conjugator.unconjugate('話してあるでしょう', 3);
assert(resultsWithMaximumRecursionDepth.length === 0);
//assertDerivationPathMatches('撫でさせられぬよね', '撫でる', WordType.CAUSATIVE, WordType.POTENTIAL_PASSIVE, WordType.NEGATIVE_NAI_VERB, WordType.NU, WordType.YO_PARTICLE, WordType.NE_PARTICLE);
//let resultsWithMaximumRecursionDepth = Conjugator.unconjugate('撫でさせられぬよね', 3);
//assert(resultsWithMaximumRecursionDepth.length === 0);
});
});

@@ -113,7 +113,8 @@ 'use strict'

BEKU: 'べく In Order To',
GODAN_VERB: 'godan verb (silent)',
ICHIDAN_VERB: 'ichidan verb (silent)',
ADJECTIVE: 'adjective (silent)',
GODAN_VERB: 'godan verb',
ICHIDAN_VERB: 'ichidan verb',
ADJECTIVE: 'adjective',
SENTENCE_ENDING_PARTICLES: 'sentence ending particles (silent)',
NEGATIVE_ARU_OR_IRU: 'Negative ある or いる',
SHORT_IRU: 'てる・でる Continuing State/Result',
};

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc