Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

nlptoolkit-morphologicalanalysis

Package Overview
Dependencies
Maintainers
1
Versions
15
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

nlptoolkit-morphologicalanalysis - npm Package Compare versions

Comparing version 1.0.4 to 1.0.5

parses/açıkla.txt

7

dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.d.ts

@@ -104,3 +104,10 @@ import { FiniteStateMachine } from "./FiniteStateMachine";

* Ex : Allak,
* !isPlural, !isPortmanteau and isCode, if root holds the conditions then it gets the state
* with the name of CodeRoot.
* Ex : 9400f,
* <p>
* !isPlural, !isPortmanteau and isMetric, if root holds the conditions then it gets the state
* with the name of MetricRoot.
* Ex : 11x8x12,
* <p>
* !isPlural, !isPortmanteau and isNumeral, if root holds the conditions then it gets the state

@@ -107,0 +114,0 @@ * with the name of CardinalRoot.

@@ -222,2 +222,8 @@ import { MorphologicalParse } from "./MorphologicalParse";

* <p>
* If it is "CodeRoot", it assigns concatenation of first item of formList and +CODE to the result String.
* Ex : 5000-WX
* <p>
* If it is "MetricRoot", it assigns concatenation of first item of formList and +METRIC to the result String.
* Ex : 6cmx12cm
* <p>
* If it is "QuestionRoot", it assigns concatenation of first item of formList and +QUES to the result String.

@@ -224,0 +230,0 @@ * Ex : Mı

72

dist/MorphologicalAnalysis/FsmParse.js

@@ -433,2 +433,8 @@ (function (factory) {

* <p>
* If it is "CodeRoot", it assigns concatenation of first item of formList and +CODE to the result String.
* Ex : 5000-WX
* <p>
* If it is "MetricRoot", it assigns concatenation of first item of formList and +METRIC to the result String.
* Ex : 6cmx12cm
* <p>
* If it is "QuestionRoot", it assigns concatenation of first item of formList and +QUES to the result String.

@@ -553,28 +559,38 @@ * Ex : Mı

else {
if (this.suffixList[0].getName() == "QuestionRoot") {
result = "mi+QUES";
if (this.suffixList[0].getName() == "CodeRoot") {
result = this.formList[0] + "+CODE";
}
else {
if (this.suffixList[0].getName() == "PostP") {
if (this.formList[0] == "karşı" || this.formList[0] == "ilişkin" || this.formList[0] == "göre" || this.formList[0] == "kadar" || this.formList[0] == "ait" || this.formList[0] == "yönelik" || this.formList[0] == "rağmen" || this.formList[0] == "değin" || this.formList[0] == "dek" || this.formList[0] == "doğru" || this.formList[0] == "karşın" || this.formList[0] == "dair" || this.formList[0] == "atfen" || this.formList[0] == "binaen" || this.formList[0] == "hitaben" || this.formList[0] == "istinaden" || this.formList[0] == "mahsuben" || this.formList[0] == "mukabil" || this.formList[0] == "nazaran") {
result = this.formList[0] + "+POSTP+PCDAT";
if (this.suffixList[0].getName() == "MetricRoot") {
result = this.formList[0] + "+METRIC";
}
else {
if (this.suffixList[0].getName() == "QuestionRoot") {
result = "mi+QUES";
}
else {
if (this.formList[0] == "sonra" || this.formList[0] == "önce" || this.formList[0] == "beri" || this.formList[0] == "fazla" || this.formList[0] == "dolayı" || this.formList[0] == "itibaren" || this.formList[0] == "başka" || this.formList[0] == "çok" || this.formList[0] == "evvel" || this.formList[0] == "ötürü" || this.formList[0] == "yana" || this.formList[0] == "öte" || this.formList[0] == "aşağı" || this.formList[0] == "yukarı" || this.formList[0] == "dışarı" || this.formList[0] == "az" || this.formList[0] == "gayrı") {
result = this.formList[0] + "+POSTP+PCABL";
}
else {
if (this.formList[0] == "yanısıra") {
result = this.formList[0] + "+POSTP+PCGEN";
if (this.suffixList[0].getName() == "PostP") {
if (this.formList[0] == "karşı" || this.formList[0] == "ilişkin" || this.formList[0] == "göre" || this.formList[0] == "kadar" || this.formList[0] == "ait" || this.formList[0] == "yönelik" || this.formList[0] == "rağmen" || this.formList[0] == "değin" || this.formList[0] == "dek" || this.formList[0] == "doğru" || this.formList[0] == "karşın" || this.formList[0] == "dair" || this.formList[0] == "atfen" || this.formList[0] == "binaen" || this.formList[0] == "hitaben" || this.formList[0] == "istinaden" || this.formList[0] == "mahsuben" || this.formList[0] == "mukabil" || this.formList[0] == "nazaran") {
result = this.formList[0] + "+POSTP+PCDAT";
}
else {
if (this.formList[0] == "birlikte" || this.formList[0] == "beraber") {
result = this.formList[0] + "+POSTP+PCINS";
if (this.formList[0] == "sonra" || this.formList[0] == "önce" || this.formList[0] == "beri" || this.formList[0] == "fazla" || this.formList[0] == "dolayı" || this.formList[0] == "itibaren" || this.formList[0] == "başka" || this.formList[0] == "çok" || this.formList[0] == "evvel" || this.formList[0] == "ötürü" || this.formList[0] == "yana" || this.formList[0] == "öte" || this.formList[0] == "aşağı" || this.formList[0] == "yukarı" || this.formList[0] == "dışarı" || this.formList[0] == "az" || this.formList[0] == "gayrı") {
result = this.formList[0] + "+POSTP+PCABL";
}
else {
if (this.formList[0] == "aşkın" || this.formList[0] == "takiben") {
result = this.formList[0] + "+POSTP+PCACC";
if (this.formList[0] == "yanısıra") {
result = this.formList[0] + "+POSTP+PCGEN";
}
else {
result = this.formList[0] + "+POSTP+PCNOM";
if (this.formList[0] == "birlikte" || this.formList[0] == "beraber") {
result = this.formList[0] + "+POSTP+PCINS";
}
else {
if (this.formList[0] == "aşkın" || this.formList[0] == "takiben") {
result = this.formList[0] + "+POSTP+PCACC";
}
else {
result = this.formList[0] + "+POSTP+PCNOM";
}
}
}

@@ -584,16 +600,16 @@ }

}
}
}
else {
if (this.suffixList[0].getName().startsWith("PronounRoot")) {
result = this.pronounTransition();
}
else {
if (this.suffixList[0].getName() == "OrdinalRoot") {
result = this.formList[0] + "+NUM+ORD";
}
else {
if (this.suffixList[0].getName().startsWith("Adjective")) {
result = this.formList[0] + "+ADJ";
if (this.suffixList[0].getName().startsWith("PronounRoot")) {
result = this.pronounTransition();
}
else {
if (this.suffixList[0].getName() == "OrdinalRoot") {
result = this.formList[0] + "+NUM+ORD";
}
else {
if (this.suffixList[0].getName().startsWith("Adjective")) {
result = this.formList[0] + "+ADJ";
}
}
}
}

@@ -600,0 +616,0 @@ }

@@ -5,2 +5,3 @@ import { FsmParse } from "./FsmParse";

private readonly fsmParses;
static longestRootExceptions: string[];
/**

@@ -46,3 +47,4 @@ * A constructor of {@link FsmParseList} class which takes an {@link Array} fsmParses as an input. First it sorts

* The getParseWithLongestRootWord method returns the parse with the longest root word. If more than one parse has the
* longest root word, the first parse with that root is returned.
* longest root word, the first parse with that root is returned. If the longest root word belongs to an
* exceptional case, the parse with the next longest root word that does not, is returned.
*

@@ -53,2 +55,9 @@ * @return FsmParse Parse with the longest root word.

/**
* The isLongestRootException method returns true if the longest root word belongs to an exceptional case, false otherwise.
*
* @param fsmParse {@link FsmParse} input.
* @return true if the longest root belongs to an exceptional case, false otherwise.
*/
isLongestRootException(fsmParse: FsmParse): boolean;
/**
* The reduceToParsesWithSameRoot method takes a {@link String} currentWithPos as an input and loops i times till

@@ -55,0 +64,0 @@ * i equals to the size of the fsmParses {@link Array}. If the given currentRoot does not equal to the root of ith

@@ -87,3 +87,4 @@ (function (factory) {

* The getParseWithLongestRootWord method returns the parse with the longest root word. If more than one parse has the
* longest root word, the first parse with that root is returned.
* longest root word, the first parse with that root is returned. If the longest root word belongs to an
* exceptional case, the parse with the next longest root word that does not, is returned.
*

@@ -96,3 +97,3 @@ * @return FsmParse Parse with the longest root word.

for (let currentParse of this.fsmParses) {
if (currentParse.getWord().getName().length > maxLength) {
if (currentParse.getWord().getName().length > maxLength && !this.isLongestRootException(currentParse)) {
maxLength = currentParse.getWord().getName().length;

@@ -105,2 +106,29 @@ bestParse = currentParse;

/**
* The isLongestRootException method returns true if the longest root word belongs to an exceptional case, false otherwise.
*
* @param fsmParse {@link FsmParse} input.
* @return true if the longest root belongs to an exceptional case, false otherwise.
*/
isLongestRootException(fsmParse) {
let surfaceForm = fsmParse.getSurfaceForm();
let root = fsmParse.getWord().getName();
for (let longestRootException of FsmParseList.longestRootExceptions) {
let exceptionItems = longestRootException.split(" ");
let surfaceFormEnding = exceptionItems[0];
let longestRootEnding = exceptionItems[1];
let longestRootPos = exceptionItems[2];
let possibleRootPos = exceptionItems[3];
let possibleRoot = surfaceForm.replace(surfaceFormEnding, "");
if (surfaceForm.endsWith(surfaceFormEnding) && root.endsWith(longestRootEnding) &&
fsmParse.getRootPos() == longestRootPos) {
for (let currentParse of this.fsmParses) {
if (currentParse.getWord().getName() == possibleRoot && currentParse.getRootPos() == possibleRootPos) {
return true;
}
}
}
}
return false;
}
/**
* The reduceToParsesWithSameRoot method takes a {@link String} currentWithPos as an input and loops i times till

@@ -244,3 +272,83 @@ * i equals to the size of the fsmParses {@link Array}. If the given currentRoot does not equal to the root of ith

exports.FsmParseList = FsmParseList;
FsmParseList.longestRootExceptions = [
"acağı acak NOUN VERB", "acağım acak NOUN VERB", "acağımı acak NOUN VERB", "acağımız acak NOUN VERB",
"acağın acak NOUN VERB",
"acağına acak NOUN VERB", "acağını acak NOUN VERB", "acağının acak NOUN VERB", "acağınız acak NOUN VERB",
"acağınıza acak NOUN VERB",
"acağınızdır acak NOUN VERB", "acağınızı acak NOUN VERB", "acağınızın acak NOUN VERB", "acağız acak NOUN VERB",
"acakları acak NOUN VERB",
"acaklarını acak NOUN VERB", "acaksa acak NOUN VERB", "acaktır acak NOUN VERB", "ardım ar NOUN VERB",
"arız ar NOUN VERB",
"arken ar NOUN VERB", "arsa ar NOUN VERB", "arsak ar NOUN VERB", "arsanız ar NOUN VERB", "arsınız ar NOUN VERB",
"eceği ecek NOUN VERB", "eceğim ecek NOUN VERB", "eceğimi ecek NOUN VERB", "eceğimiz ecek NOUN VERB",
"eceğin ecek NOUN VERB",
"eceğine ecek NOUN VERB", "eceğini ecek NOUN VERB", "eceğinin ecek NOUN VERB", "eceğiniz ecek NOUN VERB",
"eceğinizdir ecek NOUN VERB",
"eceğinize ecek NOUN VERB", "eceğinizi ecek NOUN VERB", "eceğinizin ecek NOUN VERB", "eceğiz ecek NOUN VERB",
"ecekleri ecek NOUN VERB",
"eceklerini ecek NOUN VERB", "ecekse ecek NOUN VERB", "ecektir ecek NOUN VERB", "erdim er NOUN VERB",
"eriz er NOUN VERB",
"erken er NOUN VERB", "erse er NOUN VERB", "ersek er NOUN VERB", "erseniz er NOUN VERB", "ersiniz er NOUN VERB",
"ilen i VERB VERB", "ilene i VERB VERB", "ilin i VERB VERB", "ilince i VERB VERB", "imiz i ADJ NOUN",
"in i ADJ NOUN", "inde i ADJ NOUN", "ine i ADJ NOUN", "ini i ADJ NOUN", "inin i ADJ NOUN",
"ılan ı NOUN VERB", "ılana ı NOUN VERB", "ılın ı NOUN VERB", "ılınca ı NOUN VERB", "la la VERB NOUN",
"lar la VERB NOUN", "lardan la VERB NOUN", "lardandır la VERB NOUN", "lardır la VERB NOUN", "ları la VERB NOUN",
"larıdır la VERB NOUN", "larım la VERB NOUN", "larımdan la VERB NOUN", "larımız la VERB NOUN",
"larımıza la VERB NOUN",
"larımızda la VERB NOUN", "larımızdan la VERB NOUN", "larımızdaydı la VERB NOUN", "larımızı la VERB NOUN",
"larımızın la VERB NOUN",
"larımızla la VERB NOUN", "ların la VERB NOUN", "larına la VERB NOUN", "larında la VERB NOUN",
"larındaki la VERB NOUN",
"larındakiler la VERB NOUN", "larındakilere la VERB NOUN", "larındakileri la VERB NOUN",
"larındakilerin la VERB NOUN", "larından la VERB NOUN",
"larındandır la VERB NOUN", "larındaysa la VERB NOUN", "larını la VERB NOUN", "larının la VERB NOUN",
"larınız la VERB NOUN",
"larınıza la VERB NOUN", "larınızda la VERB NOUN", "larınızdaki la VERB NOUN", "larınızdan la VERB NOUN",
"larınızı la VERB NOUN",
"larınızın la VERB NOUN", "larınızla la VERB NOUN", "larıyla la VERB NOUN", "le le VERB NOUN",
"ler le VERB NOUN",
"lerden le VERB NOUN", "lerdendir le VERB NOUN", "lerdir le VERB NOUN", "leri le VERB NOUN",
"leridir le VERB NOUN",
"lerim le VERB NOUN", "lerimden le VERB NOUN", "lerimiz le VERB NOUN", "lerimizde le VERB NOUN",
"lerimizden le VERB NOUN",
"lerimizdeydi le VERB NOUN", "lerimize le VERB NOUN", "lerimizi le VERB NOUN", "lerimizin le VERB NOUN",
"lerimizle le VERB NOUN",
"lerin le VERB NOUN", "lerinde le VERB NOUN", "lerindeki le VERB NOUN", "lerindekiler le VERB NOUN",
"lerindekilere le VERB NOUN",
"lerindekileri le VERB NOUN", "lerindekilerin le VERB NOUN", "lerinden le VERB NOUN",
"lerindendir le VERB NOUN", "lerindeyse le VERB NOUN",
"lerine le VERB NOUN", "lerini le VERB NOUN", "lerinin le VERB NOUN", "leriniz le VERB NOUN",
"lerinizde le VERB NOUN",
"lerinizdeki le VERB NOUN", "lerinizden le VERB NOUN", "lerinize le VERB NOUN", "lerinizi le VERB NOUN",
"lerinizin le VERB NOUN",
"lerinizle le VERB NOUN", "leriyle le VERB NOUN", "m m NOUN NOUN", "madan ma NOUN VERB", "malı ma NOUN VERB",
"malıdır ma NOUN VERB", "malıdırlar ma NOUN VERB", "malılar ma NOUN VERB", "malısınız ma NOUN VERB",
"malıyım ma NOUN VERB",
"malıyız ma NOUN VERB", "mam ma NOUN VERB", "mama ma NOUN VERB", "mamız ma NOUN VERB", "mamıza ma NOUN VERB",
"mamızı ma NOUN VERB", "manız ma NOUN VERB", "manızda ma NOUN VERB", "manızdır ma NOUN VERB",
"manızı ma NOUN VERB",
"manızla ma NOUN VERB", "ması ma NOUN VERB", "masıdır ma NOUN VERB", "masın ma NOUN VERB",
"masına ma NOUN VERB",
"masında ma NOUN VERB", "masındaki ma NOUN VERB", "masını ma NOUN VERB", "masıyla ma NOUN VERB",
"mdan m NOUN NOUN",
"meden me NOUN VERB", "meli me NOUN VERB", "melidir me NOUN VERB", "melidirler me NOUN VERB",
"meliler me NOUN VERB",
"melisiniz me NOUN VERB", "meliyim me NOUN VERB", "meliyiz me NOUN VERB", "mem me NOUN VERB",
"meme me NOUN VERB",
"memiz me NOUN VERB", "memize me NOUN VERB", "memizi me NOUN VERB", "meniz me NOUN VERB",
"menizde me NOUN VERB",
"menizdir me NOUN VERB", "menizi me NOUN VERB", "menizle me NOUN VERB", "mesi me NOUN VERB",
"mesidir me NOUN VERB",
"mesin me NOUN VERB", "mesinde me NOUN VERB", "mesindeki me NOUN VERB", "mesine me NOUN VERB",
"mesini me NOUN VERB",
"mesiyle me NOUN VERB", "mişse miş NOUN VERB", "mını m NOUN NOUN", "mışsa mış NOUN VERB", "mız m NOUN NOUN",
"n n NOUN NOUN", "na n NOUN NOUN", "ne n NOUN NOUN", "nin n NOUN NOUN", "niz n NOUN NOUN",
"nın n NOUN NOUN", "nız n NOUN NOUN", "rdim r NOUN VERB", "rdım r NOUN VERB", "riz r NOUN VERB",
"rız r NOUN VERB", "rken r NOUN VERB", "rken r NOUN VERB", "rsa r NOUN VERB", "rsak r NOUN VERB",
"rsanız r NOUN VERB", "rse r NOUN VERB", "rsek r NOUN VERB", "rseniz r NOUN VERB", "rsiniz r NOUN VERB",
"rsınız r NOUN VERB", "sa sa VERB ADJ", "se se VERB ADJ", "ulan u NOUN VERB", "un un VERB NOUN",
"üne ün VERB NOUN", "unun un VERB NOUN", "ince i NOUN VERB", "unca u NOUN VERB", "ınca ı NOUN VERB",
"unca un NOUN VERB", "ilen ile VERB VERB"
];
});
//# sourceMappingURL=FsmParseList.js.map

@@ -185,3 +185,3 @@ (function (factory) {

"DIST", "ADAMANTLY", "PERCENT", "WITHOUTBEINGABLETOHAVEDONESO", "DIM",
"PERS", "FRACTION", "HASHTAG", "EMAIL", "DATE"];
"PERS", "FRACTION", "HASHTAG", "EMAIL", "DATE", "CODE", "METRIC"];
InflectionalGroup.morphoTags = [MorphologicalTag_1.MorphologicalTag.NOUN, MorphologicalTag_1.MorphologicalTag.ADVERB, MorphologicalTag_1.MorphologicalTag.ADJECTIVE,

@@ -212,4 +212,5 @@ MorphologicalTag_1.MorphologicalTag.VERB, MorphologicalTag_1.MorphologicalTag.A1SG, MorphologicalTag_1.MorphologicalTag.A2SG, MorphologicalTag_1.MorphologicalTag.A3SG, MorphologicalTag_1.MorphologicalTag.A1PL,

MorphologicalTag_1.MorphologicalTag.DISTRIBUTIVE, MorphologicalTag_1.MorphologicalTag.ADAMANTLY, MorphologicalTag_1.MorphologicalTag.PERCENT, MorphologicalTag_1.MorphologicalTag.WITHOUTBEINGABLETOHAVEDONESO, MorphologicalTag_1.MorphologicalTag.DIMENSION,
MorphologicalTag_1.MorphologicalTag.PERSONALPRONOUN, MorphologicalTag_1.MorphologicalTag.FRACTION, MorphologicalTag_1.MorphologicalTag.HASHTAG, MorphologicalTag_1.MorphologicalTag.EMAIL, MorphologicalTag_1.MorphologicalTag.DATE];
MorphologicalTag_1.MorphologicalTag.PERSONALPRONOUN, MorphologicalTag_1.MorphologicalTag.FRACTION, MorphologicalTag_1.MorphologicalTag.HASHTAG, MorphologicalTag_1.MorphologicalTag.EMAIL, MorphologicalTag_1.MorphologicalTag.DATE,
MorphologicalTag_1.MorphologicalTag.CODE, MorphologicalTag_1.MorphologicalTag.METRIC];
});
//# sourceMappingURL=InflectionalGroup.js.map

@@ -509,3 +509,11 @@ export declare enum MorphologicalTag {

*/
DATE = 126
DATE = 126,
/**
* Code : i7-9700K
*/
CODE = 127,
/**
* Metric : 6cmx7cmx8cm
*/
METRIC = 128
}

@@ -523,4 +523,12 @@ (function (factory) {

MorphologicalTag[MorphologicalTag["DATE"] = 126] = "DATE";
/**
* Code : i7-9700K
*/
MorphologicalTag[MorphologicalTag["CODE"] = 127] = "CODE";
/**
* Metric : 6cmx7cmx8cm
*/
MorphologicalTag[MorphologicalTag["METRIC"] = 128] = "METRIC";
})(MorphologicalTag = exports.MorphologicalTag || (exports.MorphologicalTag = {}));
});
//# sourceMappingURL=MorphologicalTag.js.map

@@ -144,3 +144,3 @@ (function (factory) {

}
if (this._with == "dHr") {
if (this._with == "DHr") {
if (this._toState.getName() == "Adverb") {

@@ -147,0 +147,0 @@ return true;

{
"name": "nlptoolkit-morphologicalanalysis",
"version": "1.0.4",
"version": "1.0.5",
"description": "Turkish Morphological Analysis Library",

@@ -23,6 +23,6 @@ "main": "index.js",

"mocha": "^9.1.3",
"nlptoolkit-corpus": "^1.0.2",
"nlptoolkit-datastructure": "^1.0.0",
"nlptoolkit-dictionary": "^1.0.4",
"nlptoolkit-ngram": "^1.0.0",
"nlptoolkit-corpus": "^1.0.7",
"nlptoolkit-datastructure": "^1.0.1",
"nlptoolkit-dictionary": "^1.0.8",
"nlptoolkit-ngram": "^1.0.1",
"nlptoolkit-xmlparser": "^1.0.4",

@@ -29,0 +29,0 @@ "ts-node": "^10.4.0",

@@ -1,2 +0,2 @@

Morphological Analysis [<img src="https://github.com/StarlangSoftware/TurkishMorphologicalAnalysis/blob/master/video1.jpg" width="5%">](https://youtu.be/KxguxpbgDQc)[<img src="https://github.com/StarlangSoftware/TurkishMorphologicalAnalysis/blob/master/video2.jpg" width="5%">](https://youtu.be/UMmA2LMkAkw)[<img src="https://github.com/StarlangSoftware/TurkishMorphologicalAnalysis/blob/master/video3.jpg" width="5%">](https://youtu.be/dP97ovMSSfE)[<img src="https://github.com/StarlangSoftware/TurkishMorphologicalAnalysis/blob/master/video4.jpg" width="5%">](https://youtu.be/Tgmy5tts_pY)
Morphological Analysis
============

@@ -26,2 +26,7 @@

Video Lectures
============
[<img src="https://github.com/StarlangSoftware/TurkishMorphologicalAnalysis/blob/master/video1.jpg" width="50%">](https://youtu.be/KxguxpbgDQc)[<img src="https://github.com/StarlangSoftware/TurkishMorphologicalAnalysis/blob/master/video2.jpg" width="50%">](https://youtu.be/UMmA2LMkAkw)[<img src="https://github.com/StarlangSoftware/TurkishMorphologicalAnalysis/blob/master/video3.jpg" width="50%">](https://youtu.be/dP97ovMSSfE)[<img src="https://github.com/StarlangSoftware/TurkishMorphologicalAnalysis/blob/master/video4.jpg" width="50%">](https://youtu.be/Tgmy5tts_pY)
For Developers

@@ -35,2 +40,44 @@ ============

## Requirements
* [Node.js 14 or higher](#Node.js)
* [Git](#git)
### Node.js
To check if you have a compatible version of Node.js installed, use the following command:
node -v
You can find the latest version of Node.js [here](https://nodejs.org/en/download/).
### Git
Install the [latest version of Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git).
## Npm Install
npm install nlptoolkit-morphologicalanalysis
## Download Code
In order to work on code, create a fork from GitHub page.
Use Git for cloning the code to your local or below line for Ubuntu:
git clone <your-fork-git-link>
A directory called util will be created. Or you can use below link for exploring the code:
git clone https://github.com/starlangsoftware/morphologicalanalysis-js.git
## Open project with Webstorm IDE
Steps for opening the cloned project:
* Start IDE
* Select **File | Open** from main menu
* Choose `SemanticRoleLabeling-Js` file
* Select open as project option
* Couple of seconds, dependencies will be downloaded.
*
Detailed Description

@@ -37,0 +84,0 @@ ============

@@ -441,2 +441,8 @@ import {MorphologicalParse} from "./MorphologicalParse";

* <p>
* If it is "CodeRoot", it assigns concatenation of first item of formList and +CODE to the result String.
* Ex : 5000-WX
* <p>
* If it is "MetricRoot", it assigns concatenation of first item of formList and +METRIC to the result String.
* Ex : 6cmx12cm
* <p>
* If it is "QuestionRoot", it assigns concatenation of first item of formList and +QUES to the result String.

@@ -542,36 +548,44 @@ * Ex : Mı

} else {
if (this.suffixList[0].getName() == "QuestionRoot") {
result = "mi+QUES";
if (this.suffixList[0].getName() == "CodeRoot"){
result = this.formList[0] + "+CODE";
} else {
if (this.suffixList[0].getName() == "PostP") {
if (this.formList[0] == "karşı" || this.formList[0] == "ilişkin" || this.formList[0] == "göre" || this.formList[0] == "kadar" || this.formList[0] == "ait" || this.formList[0] == "yönelik" || this.formList[0] == "rağmen" || this.formList[0] == "değin" || this.formList[0] == "dek" || this.formList[0] == "doğru" || this.formList[0] == "karşın" || this.formList[0] == "dair" || this.formList[0] == "atfen" || this.formList[0] == "binaen" || this.formList[0] == "hitaben" || this.formList[0] == "istinaden" || this.formList[0] == "mahsuben" || this.formList[0] == "mukabil" || this.formList[0] == "nazaran") {
result = this.formList[0] + "+POSTP+PCDAT";
if (this.suffixList[0].getName() == "MetricRoot"){
result = this.formList[0] + "+METRIC";
} else {
if (this.suffixList[0].getName() == "QuestionRoot") {
result = "mi+QUES";
} else {
if (this.formList[0] == "sonra" || this.formList[0] == "önce" || this.formList[0] == "beri" || this.formList[0] == "fazla" || this.formList[0] == "dolayı" || this.formList[0] == "itibaren" || this.formList[0] == "başka" || this.formList[0] == "çok" || this.formList[0] == "evvel" || this.formList[0] == "ötürü" || this.formList[0] == "yana" || this.formList[0] == "öte" || this.formList[0] == "aşağı" || this.formList[0] == "yukarı" || this.formList[0] == "dışarı" || this.formList[0] == "az" || this.formList[0] == "gayrı") {
result = this.formList[0] + "+POSTP+PCABL";
} else {
if (this.formList[0] == "yanısıra") {
result = this.formList[0] + "+POSTP+PCGEN";
if (this.suffixList[0].getName() == "PostP") {
if (this.formList[0] == "karşı" || this.formList[0] == "ilişkin" || this.formList[0] == "göre" || this.formList[0] == "kadar" || this.formList[0] == "ait" || this.formList[0] == "yönelik" || this.formList[0] == "rağmen" || this.formList[0] == "değin" || this.formList[0] == "dek" || this.formList[0] == "doğru" || this.formList[0] == "karşın" || this.formList[0] == "dair" || this.formList[0] == "atfen" || this.formList[0] == "binaen" || this.formList[0] == "hitaben" || this.formList[0] == "istinaden" || this.formList[0] == "mahsuben" || this.formList[0] == "mukabil" || this.formList[0] == "nazaran") {
result = this.formList[0] + "+POSTP+PCDAT";
} else {
if (this.formList[0] == "birlikte" || this.formList[0] == "beraber") {
result = this.formList[0] + "+POSTP+PCINS";
if (this.formList[0] == "sonra" || this.formList[0] == "önce" || this.formList[0] == "beri" || this.formList[0] == "fazla" || this.formList[0] == "dolayı" || this.formList[0] == "itibaren" || this.formList[0] == "başka" || this.formList[0] == "çok" || this.formList[0] == "evvel" || this.formList[0] == "ötürü" || this.formList[0] == "yana" || this.formList[0] == "öte" || this.formList[0] == "aşağı" || this.formList[0] == "yukarı" || this.formList[0] == "dışarı" || this.formList[0] == "az" || this.formList[0] == "gayrı") {
result = this.formList[0] + "+POSTP+PCABL";
} else {
if (this.formList[0] == "aşkın" || this.formList[0] == "takiben") {
result = this.formList[0] + "+POSTP+PCACC";
if (this.formList[0] == "yanısıra") {
result = this.formList[0] + "+POSTP+PCGEN";
} else {
result = this.formList[0] + "+POSTP+PCNOM";
if (this.formList[0] == "birlikte" || this.formList[0] == "beraber") {
result = this.formList[0] + "+POSTP+PCINS";
} else {
if (this.formList[0] == "aşkın" || this.formList[0] == "takiben") {
result = this.formList[0] + "+POSTP+PCACC";
} else {
result = this.formList[0] + "+POSTP+PCNOM";
}
}
}
}
}
}
}
} else {
if (this.suffixList[0].getName().startsWith("PronounRoot")) {
result = this.pronounTransition();
} else {
if (this.suffixList[0].getName() == "OrdinalRoot") {
result = this.formList[0] + "+NUM+ORD";
} else {
if (this.suffixList[0].getName().startsWith("Adjective")) {
result = this.formList[0] + "+ADJ";
if (this.suffixList[0].getName().startsWith("PronounRoot")) {
result = this.pronounTransition();
} else {
if (this.suffixList[0].getName() == "OrdinalRoot") {
result = this.formList[0] + "+NUM+ORD";
} else {
if (this.suffixList[0].getName().startsWith("Adjective")) {
result = this.formList[0] + "+ADJ";
}
}
}

@@ -578,0 +592,0 @@ }

@@ -8,2 +8,82 @@ import {FsmParse} from "./FsmParse";

static longestRootExceptions = [
"acağı acak NOUN VERB", "acağım acak NOUN VERB", "acağımı acak NOUN VERB", "acağımız acak NOUN VERB",
"acağın acak NOUN VERB",
"acağına acak NOUN VERB", "acağını acak NOUN VERB", "acağının acak NOUN VERB", "acağınız acak NOUN VERB",
"acağınıza acak NOUN VERB",
"acağınızdır acak NOUN VERB", "acağınızı acak NOUN VERB", "acağınızın acak NOUN VERB", "acağız acak NOUN VERB",
"acakları acak NOUN VERB",
"acaklarını acak NOUN VERB", "acaksa acak NOUN VERB", "acaktır acak NOUN VERB", "ardım ar NOUN VERB",
"arız ar NOUN VERB",
"arken ar NOUN VERB", "arsa ar NOUN VERB", "arsak ar NOUN VERB", "arsanız ar NOUN VERB", "arsınız ar NOUN VERB",
"eceği ecek NOUN VERB", "eceğim ecek NOUN VERB", "eceğimi ecek NOUN VERB", "eceğimiz ecek NOUN VERB",
"eceğin ecek NOUN VERB",
"eceğine ecek NOUN VERB", "eceğini ecek NOUN VERB", "eceğinin ecek NOUN VERB", "eceğiniz ecek NOUN VERB",
"eceğinizdir ecek NOUN VERB",
"eceğinize ecek NOUN VERB", "eceğinizi ecek NOUN VERB", "eceğinizin ecek NOUN VERB", "eceğiz ecek NOUN VERB",
"ecekleri ecek NOUN VERB",
"eceklerini ecek NOUN VERB", "ecekse ecek NOUN VERB", "ecektir ecek NOUN VERB", "erdim er NOUN VERB",
"eriz er NOUN VERB",
"erken er NOUN VERB", "erse er NOUN VERB", "ersek er NOUN VERB", "erseniz er NOUN VERB", "ersiniz er NOUN VERB",
"ilen i VERB VERB", "ilene i VERB VERB", "ilin i VERB VERB", "ilince i VERB VERB", "imiz i ADJ NOUN",
"in i ADJ NOUN", "inde i ADJ NOUN", "ine i ADJ NOUN", "ini i ADJ NOUN", "inin i ADJ NOUN",
"ılan ı NOUN VERB", "ılana ı NOUN VERB", "ılın ı NOUN VERB", "ılınca ı NOUN VERB", "la la VERB NOUN",
"lar la VERB NOUN", "lardan la VERB NOUN", "lardandır la VERB NOUN", "lardır la VERB NOUN", "ları la VERB NOUN",
"larıdır la VERB NOUN", "larım la VERB NOUN", "larımdan la VERB NOUN", "larımız la VERB NOUN",
"larımıza la VERB NOUN",
"larımızda la VERB NOUN", "larımızdan la VERB NOUN", "larımızdaydı la VERB NOUN", "larımızı la VERB NOUN",
"larımızın la VERB NOUN",
"larımızla la VERB NOUN", "ların la VERB NOUN", "larına la VERB NOUN", "larında la VERB NOUN",
"larındaki la VERB NOUN",
"larındakiler la VERB NOUN", "larındakilere la VERB NOUN", "larındakileri la VERB NOUN",
"larındakilerin la VERB NOUN", "larından la VERB NOUN",
"larındandır la VERB NOUN", "larındaysa la VERB NOUN", "larını la VERB NOUN", "larının la VERB NOUN",
"larınız la VERB NOUN",
"larınıza la VERB NOUN", "larınızda la VERB NOUN", "larınızdaki la VERB NOUN", "larınızdan la VERB NOUN",
"larınızı la VERB NOUN",
"larınızın la VERB NOUN", "larınızla la VERB NOUN", "larıyla la VERB NOUN", "le le VERB NOUN",
"ler le VERB NOUN",
"lerden le VERB NOUN", "lerdendir le VERB NOUN", "lerdir le VERB NOUN", "leri le VERB NOUN",
"leridir le VERB NOUN",
"lerim le VERB NOUN", "lerimden le VERB NOUN", "lerimiz le VERB NOUN", "lerimizde le VERB NOUN",
"lerimizden le VERB NOUN",
"lerimizdeydi le VERB NOUN", "lerimize le VERB NOUN", "lerimizi le VERB NOUN", "lerimizin le VERB NOUN",
"lerimizle le VERB NOUN",
"lerin le VERB NOUN", "lerinde le VERB NOUN", "lerindeki le VERB NOUN", "lerindekiler le VERB NOUN",
"lerindekilere le VERB NOUN",
"lerindekileri le VERB NOUN", "lerindekilerin le VERB NOUN", "lerinden le VERB NOUN",
"lerindendir le VERB NOUN", "lerindeyse le VERB NOUN",
"lerine le VERB NOUN", "lerini le VERB NOUN", "lerinin le VERB NOUN", "leriniz le VERB NOUN",
"lerinizde le VERB NOUN",
"lerinizdeki le VERB NOUN", "lerinizden le VERB NOUN", "lerinize le VERB NOUN", "lerinizi le VERB NOUN",
"lerinizin le VERB NOUN",
"lerinizle le VERB NOUN", "leriyle le VERB NOUN", "m m NOUN NOUN", "madan ma NOUN VERB", "malı ma NOUN VERB",
"malıdır ma NOUN VERB", "malıdırlar ma NOUN VERB", "malılar ma NOUN VERB", "malısınız ma NOUN VERB",
"malıyım ma NOUN VERB",
"malıyız ma NOUN VERB", "mam ma NOUN VERB", "mama ma NOUN VERB", "mamız ma NOUN VERB", "mamıza ma NOUN VERB",
"mamızı ma NOUN VERB", "manız ma NOUN VERB", "manızda ma NOUN VERB", "manızdır ma NOUN VERB",
"manızı ma NOUN VERB",
"manızla ma NOUN VERB", "ması ma NOUN VERB", "masıdır ma NOUN VERB", "masın ma NOUN VERB",
"masına ma NOUN VERB",
"masında ma NOUN VERB", "masındaki ma NOUN VERB", "masını ma NOUN VERB", "masıyla ma NOUN VERB",
"mdan m NOUN NOUN",
"meden me NOUN VERB", "meli me NOUN VERB", "melidir me NOUN VERB", "melidirler me NOUN VERB",
"meliler me NOUN VERB",
"melisiniz me NOUN VERB", "meliyim me NOUN VERB", "meliyiz me NOUN VERB", "mem me NOUN VERB",
"meme me NOUN VERB",
"memiz me NOUN VERB", "memize me NOUN VERB", "memizi me NOUN VERB", "meniz me NOUN VERB",
"menizde me NOUN VERB",
"menizdir me NOUN VERB", "menizi me NOUN VERB", "menizle me NOUN VERB", "mesi me NOUN VERB",
"mesidir me NOUN VERB",
"mesin me NOUN VERB", "mesinde me NOUN VERB", "mesindeki me NOUN VERB", "mesine me NOUN VERB",
"mesini me NOUN VERB",
"mesiyle me NOUN VERB", "mişse miş NOUN VERB", "mını m NOUN NOUN", "mışsa mış NOUN VERB", "mız m NOUN NOUN",
"n n NOUN NOUN", "na n NOUN NOUN", "ne n NOUN NOUN", "nin n NOUN NOUN", "niz n NOUN NOUN",
"nın n NOUN NOUN", "nız n NOUN NOUN", "rdim r NOUN VERB", "rdım r NOUN VERB", "riz r NOUN VERB",
"rız r NOUN VERB", "rken r NOUN VERB", "rken r NOUN VERB", "rsa r NOUN VERB", "rsak r NOUN VERB",
"rsanız r NOUN VERB", "rse r NOUN VERB", "rsek r NOUN VERB", "rseniz r NOUN VERB", "rsiniz r NOUN VERB",
"rsınız r NOUN VERB", "sa sa VERB ADJ", "se se VERB ADJ", "ulan u NOUN VERB", "un un VERB NOUN",
"üne ün VERB NOUN", "unun un VERB NOUN", "ince i NOUN VERB", "unca u NOUN VERB", "ınca ı NOUN VERB",
"unca un NOUN VERB", "ilen ile VERB VERB"]
/**

@@ -86,3 +166,4 @@ * A constructor of {@link FsmParseList} class which takes an {@link Array} fsmParses as an input. First it sorts

* The getParseWithLongestRootWord method returns the parse with the longest root word. If more than one parse has the
* longest root word, the first parse with that root is returned.
* longest root word, the first parse with that root is returned. If the longest root word belongs to an
* exceptional case, the parse with the next longest root word that does not, is returned.
*

@@ -95,3 +176,3 @@ * @return FsmParse Parse with the longest root word.

for (let currentParse of this.fsmParses) {
if (currentParse.getWord().getName().length > maxLength) {
if (currentParse.getWord().getName().length > maxLength && !this.isLongestRootException(currentParse)) {
maxLength = currentParse.getWord().getName().length;

@@ -105,2 +186,30 @@ bestParse = currentParse;

/**
* The isLongestRootException method returns true if the longest root word belongs to an exceptional case, false otherwise.
*
* @param fsmParse {@link FsmParse} input.
* @return true if the longest root belongs to an exceptional case, false otherwise.
*/
isLongestRootException(fsmParse: FsmParse): boolean{
let surfaceForm = fsmParse.getSurfaceForm()
let root = fsmParse.getWord().getName()
for (let longestRootException of FsmParseList.longestRootExceptions){
let exceptionItems = longestRootException.split(" ")
let surfaceFormEnding = exceptionItems[0]
let longestRootEnding = exceptionItems[1]
let longestRootPos = exceptionItems[2]
let possibleRootPos = exceptionItems[3]
let possibleRoot = surfaceForm.replace(surfaceFormEnding, "")
if (surfaceForm.endsWith(surfaceFormEnding) && root.endsWith(longestRootEnding) &&
fsmParse.getRootPos() == longestRootPos){
for (let currentParse of this.fsmParses){
if (currentParse.getWord().getName() == possibleRoot && currentParse.getRootPos() == possibleRootPos){
return true
}
}
}
}
return false
}
/**
* The reduceToParsesWithSameRoot method takes a {@link String} currentWithPos as an input and loops i times till

@@ -107,0 +216,0 @@ * i equals to the size of the fsmParses {@link Array}. If the given currentRoot does not equal to the root of ith

@@ -31,3 +31,3 @@ import {MorphologicalTag} from "./MorphologicalTag";

"DIST", "ADAMANTLY", "PERCENT", "WITHOUTBEINGABLETOHAVEDONESO", "DIM",
"PERS", "FRACTION", "HASHTAG", "EMAIL", "DATE"];
"PERS", "FRACTION", "HASHTAG", "EMAIL", "DATE", "CODE", "METRIC"];
static morphoTags = [MorphologicalTag.NOUN, MorphologicalTag.ADVERB, MorphologicalTag.ADJECTIVE,

@@ -58,3 +58,4 @@ MorphologicalTag.VERB, MorphologicalTag.A1SG, MorphologicalTag.A2SG, MorphologicalTag.A3SG, MorphologicalTag.A1PL,

MorphologicalTag.DISTRIBUTIVE, MorphologicalTag.ADAMANTLY, MorphologicalTag.PERCENT, MorphologicalTag.WITHOUTBEINGABLETOHAVEDONESO, MorphologicalTag.DIMENSION,
MorphologicalTag.PERSONALPRONOUN, MorphologicalTag.FRACTION, MorphologicalTag.HASHTAG, MorphologicalTag.EMAIL, MorphologicalTag.DATE];
MorphologicalTag.PERSONALPRONOUN, MorphologicalTag.FRACTION, MorphologicalTag.HASHTAG, MorphologicalTag.EMAIL, MorphologicalTag.DATE,
MorphologicalTag.CODE, MorphologicalTag.METRIC];

@@ -61,0 +62,0 @@ /**

@@ -509,3 +509,11 @@ export enum MorphologicalTag {

*/
DATE
DATE,
/**
* Code : i7-9700K
*/
CODE,
/**
* Metric : 6cmx7cmx8cm
*/
METRIC
}

@@ -142,3 +142,3 @@ import {State} from "./State";

}
if (this._with == "dHr") {
if (this._with == "DHr") {
if (this._toState.getName() == "Adverb") {

@@ -145,0 +145,0 @@ return true;

@@ -10,3 +10,3 @@ import * as assert from "assert";

it('testStateCount', function() {
assert.strictEqual(139, stateList.length);
assert.strictEqual(141, stateList.length);
});

@@ -20,3 +20,3 @@ it('testStartEndStates', function() {

}
assert.strictEqual(35, endStateCount);
assert.strictEqual(37, endStateCount);
let posCounts = new CounterHashMap<string>();

@@ -23,0 +23,0 @@ for (let state of stateList){

@@ -7,2 +7,3 @@ import * as assert from "assert";

import {Sentence} from "nlptoolkit-corpus/dist/Sentence";
import * as fs from "fs";

@@ -12,2 +13,29 @@ describe('FsmMorphologicalAnalyzerTest', function() {

let fsm = new FsmMorphologicalAnalyzer();
it('morphologicalAnalysisGenerateAllParses', function() {
let testWords = ["göç", "açıkla", "yıldönümü",
"resim", "hal", "emlak", "git",
"kavur", "ye", "yemek", "ak",
"sıska", "yıka", "bul", "cevapla",
"coş", "böl", "del", "giy",
"kaydol", "anla", "çök", "çık",
"doldur", "azal", "göster", "aksa", "cenk", "kalp"]
for (let testWord of testWords){
let word = <TxtWord> fsm.getDictionary().getWord(testWord)
let parsesExpected : Array<string> = []
let data = fs.readFileSync("parses/" + word.getName() + ".txt", 'utf8')
let lines = data.split("\n")
for (let line of lines) {
let items = line.split(" ")
if (items.length == 2){
parsesExpected.push(items[1].trim())
}
}
let parsesGenerated = fsm.generateAllParses(word, word.getName().length + 5)
assert.ok(parsesExpected.length == parsesGenerated.length)
for (let parseGenerated of parsesGenerated){
assert.ok(parsesExpected.includes(parseGenerated.toString()))
}
}
});
it('morphologicalAnalysisDataTimeNumber', function() {

@@ -14,0 +42,0 @@ assert.ok(fsm.morphologicalAnalysis("3/4").size() != 0);

@@ -22,2 +22,6 @@ import * as assert from "assert";

let parse14 = fsm.morphologicalAnalysis("sana");
let parse15 = fsm.morphologicalAnalysis("açacağını");
let parse16 = fsm.morphologicalAnalysis("kollarımız");
let parse17 = fsm.morphologicalAnalysis("yapmamızı");
let parse18 = fsm.morphologicalAnalysis("koşmalıyız");
it('testSize', function() {

@@ -52,2 +56,6 @@ assert.strictEqual(2, parse1.size());

assert.strictEqual("karşılaştırmalı", parse6.getParseWithLongestRootWord().getWord().getName());
assert.strictEqual("aç", parse15.getParseWithLongestRootWord().getWord().getName());
assert.strictEqual("kol", parse16.getParseWithLongestRootWord().getWord().getName());
assert.strictEqual("yap", parse17.getParseWithLongestRootWord().getWord().getName());
assert.strictEqual("koş", parse18.getParseWithLongestRootWord().getWord().getName());
});

@@ -54,0 +62,0 @@ it('testReduceToParsesWithSameRootAndPos', function() {

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc