Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@nlpjs/lang-id

Package Overview
Dependencies
Maintainers
1
Versions
38
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@nlpjs/lang-id - npm Package Compare versions

Comparing version 4.0.1 to 4.1.0

6

package.json
{
"name": "@nlpjs/lang-id",
"version": "4.0.1",
"version": "4.1.0",
"description": "Core",

@@ -28,5 +28,5 @@ "author": {

"dependencies": {
"@nlpjs/core": "^4.0.1"
"@nlpjs/core": "^4.1.0"
},
"gitHead": "4e9126a5c16404496eb4d9cc392f31315be5e6f2"
"gitHead": "35a9f046ff64fa04e952ef9c14b3e1dd11bdba08"
}

@@ -24,7 +24,5 @@ /*

const { BaseStemmer } = require('@nlpjs/core');
const kataDasar = require('./kata-dasar.json');
const preffixRules = require('./preffix-rules').rules;
const suffixRules = require('./suffix-rules').rules;
const { Among, BaseStemmer } = require('@nlpjs/core');
/* eslint-disable */
class StemmerId extends BaseStemmer {

@@ -34,198 +32,485 @@ constructor(container) {

this.name = 'stemmer-id';
this.I_prefix = 0;
this.I_measure = 0;
}
findWord(token) {
const result = kataDasar[token] === 1;
return result;
r_remove_particle() {
this.ket = this.cursor;
if (this.find_among_b(StemmerId.a_0) == 0) {
return false;
}
this.bra = this.cursor;
if (!this.slice_del()) {
return false;
}
this.I_measure -= 1;
return true;
}
stemPlural(token) {
let matches = token.match(/^(.*)-(.*)$/);
if (!matches) {
return token;
r_remove_possessive_pronoun() {
this.ket = this.cursor;
if (this.find_among_b(StemmerId.a_1) == 0) {
return false;
}
const words = [matches[1], matches[2]];
matches = words[0].match(/^(.*)-(.*)$/);
if (StemmerId.suffixes[words[1]] && matches) {
[, words[0]] = matches;
words[1] = `${matches[2]}-${words[1]}`;
this.bra = this.cursor;
if (!this.slice_del()) {
return false;
}
const rootWord1 = this.stemSingular(words[0]);
let rootWord2 = this.stemSingular(words[1]);
if (rootWord2 === words[1] && !this.findWord(words[1])) {
rootWord2 = this.stemSingular(`me${words[1]}`);
this.I_measure -= 1;
return true;
}
r_SUFFIX_KAN_OK() {
if (!(I_prefix != 3)) {
return false;
}
if (rootWord1 === rootWord2) {
return rootWord1;
if (!(I_prefix != 2)) {
return false;
}
return token;
return true;
}
requiresAdjustment(word) {
const rules = [
/^be(.*)lah$/,
/^be(.*)an$/,
/^me(.*)i$/,
/^di(.*)i$/,
/^pe(.*)i$/,
/^ter(.*)i$/,
];
for (let i = 0; i < rules.length; i += 1) {
if (word.match(rules[i])) {
return true;
}
r_SUFFIX_AN_OK() {
if (!(I_prefix != 1)) {
return false;
}
return false;
return true;
}
checkPrefixRules() {
const removalCount = this.removals.length;
for (let i = 0; i < preffixRules.length; i += 1) {
const resultObj = preffixRules[i](this.currentWord);
if (resultObj.removal) {
this.removals.push(resultObj.removal);
r_SUFFIX_I_OK() {
if (!(I_prefix <= 2)) {
return false;
}
{
const v_1 = this.limit - this.cursor;
lab0: {
if (!this.eq_s_b('s')) {
break lab0;
}
return false;
}
this.currentWord = resultObj.currentWord;
if (this.findWord(this.currentWord)) {
return this.currentWord;
}
if (this.removals.length > removalCount) {
return this.currentWord;
}
this.cursor = this.limit - v_1;
}
return this.currentWord;
return true;
}
removePrefixes() {
for (let i = 0; i < 3; i += 1) {
this.checkPrefixRules();
if (this.findWord(this.currentWord)) {
return this.currentWord;
}
r_remove_suffix() {
this.ket = this.cursor;
if (this.find_among_b(StemmerId.a_2) == 0) {
return false;
}
return this.currentWord;
this.bra = this.cursor;
if (!this.slice_del()) {
return false;
}
this.I_measure -= 1;
return true;
}
removeSuffixes() {
for (let i = 0; i < suffixRules.length; i += 1) {
const resultObj = suffixRules[i](this.currentWord);
if (resultObj.removal) {
this.removals.push(resultObj.removal);
}
this.currentWord = resultObj.currentWord;
if (this.findWord(this.currentWord)) {
return this.currentWord;
}
r_VOWEL() {
if (!this.in_grouping(StemmerId.g_vowel, 97, 117)) {
return false;
}
return this.currentWord;
return true;
}
restorePrefix() {
for (let i = 0; i < this.removals.length; i += 1) {
this.currentWord = this.removals[i].originalWord;
break;
r_KER() {
if (!this.out_grouping(StemmerId.g_vowel, 97, 117)) {
return false;
}
let i = 0;
while (i < this.removals.length) {
if (this.removals[i].affixType === 'DP') {
this.removals.splice(i, 1);
} else {
i += 1;
}
if (!this.eq_s('er')) {
return false;
}
return true;
}
loopRestorePrefixes() {
this.restorePrefix();
const tempCurrentWord = this.currentWord;
for (let i = this.removals.length - 1; i >= 0; i -= 1) {
const currentRemoval = this.removals[i];
if (['DS', 'PP', 'P'].includes(currentRemoval.affixType)) {
if (currentRemoval.removedPart === 'kan') {
this.currentWord = `${currentRemoval.result}k`;
this.removePrefixes();
if (this.findWord(this.currentWord)) {
return this.currentWord;
r_remove_first_order_prefix() {
let /** number */ among_var;
this.bra = this.cursor;
among_var = this.find_among(StemmerId.a_3);
if (among_var == 0) {
return false;
}
this.ket = this.cursor;
switch (among_var) {
case 1:
if (!this.slice_del()) {
return false;
}
this.I_prefix = 1;
this.I_measure -= 1;
break;
case 2:
if (!this.slice_del()) {
return false;
}
this.I_prefix = 3;
this.I_measure -= 1;
break;
case 3:
this.I_prefix = 1;
if (!this.slice_from('s')) {
return false;
}
this.I_measure -= 1;
break;
case 4:
this.I_prefix = 3;
if (!this.slice_from('s')) {
return false;
}
this.I_measure -= 1;
break;
case 5:
this.I_prefix = 1;
this.I_measure -= 1;
lab0: {
const v_1 = this.cursor;
lab1: {
const v_2 = this.cursor;
if (!this.in_grouping(StemmerId.g_vowel, 97, 117)) {
break lab1;
}
this.cursor = v_2;
if (!this.slice_from('p')) {
return false;
}
break lab0;
}
this.currentWord = `${currentRemoval.result}kan`;
} else {
this.currentWord = currentRemoval.originalWord;
this.cursor = v_1;
if (!this.slice_del()) {
return false;
}
}
this.removePrefixes();
if (this.findWord(this.currentWord)) {
return this.currentWord;
break;
case 6:
this.I_prefix = 3;
this.I_measure -= 1;
lab2: {
const v_3 = this.cursor;
lab3: {
const v_4 = this.cursor;
if (!this.in_grouping(StemmerId.g_vowel, 97, 117)) {
break lab3;
}
this.cursor = v_4;
if (!this.slice_from('p')) {
return false;
}
break lab2;
}
this.cursor = v_3;
if (!this.slice_del()) {
return false;
}
}
this.currentWord = tempCurrentWord;
return this.currentWord;
}
break;
}
return this.currentWord;
return true;
}
stemmingProcess() {
if (this.findWord(this.currentWord)) {
return this.currentWord;
r_remove_second_order_prefix() {
let among_var;
this.bra = this.cursor;
among_var = this.find_among(StemmerId.a_4);
if (among_var == 0) {
return false;
}
if (this.requiresAdjustment(this.originalWord)) {
this.removePrefixes();
if (this.findWord(this.currentWord)) {
return this.currentWord;
this.ket = this.cursor;
switch (among_var) {
case 1:
if (!this.slice_del()) {
return false;
}
this.I_prefix = 2;
this.I_measure -= 1;
break;
case 2:
if (!this.slice_from('ajar')) {
return false;
}
this.I_measure -= 1;
break;
case 3:
if (!this.slice_del()) {
return false;
}
this.I_prefix = 4;
this.I_measure -= 1;
break;
case 4:
if (!this.slice_from('ajar')) {
return false;
}
this.I_prefix = 4;
this.measure -= 1;
break;
}
return true;
}
innerbStem() {
this.I_measure = 0;
const /** number */ v_1 = this.cursor;
{
while (true) {
const /** number */ v_2 = this.cursor;
lab1: {
while (true) {
lab3: {
if (!this.in_grouping(StemmerId.g_vowel, 97, 117)) {
break lab3;
}
break;
}
if (this.cursor >= this.limit) {
break lab1;
}
this.cursor++;
}
this.I_measure += 1;
continue;
}
this.cursor = v_2;
break;
}
this.removeSuffixes();
if (this.findWord(this.currentWord)) {
return this.currentWord;
}
this.currentWord = this.originalWord;
this.removals = [];
}
this.removeSuffixes();
if (this.findWord(this.currentWord)) {
return this.currentWord;
this.cursor = v_1;
if (!(this.I_measure > 2)) {
return false;
}
this.removePrefixes();
if (this.findWord(this.currentWord)) {
return this.currentWord;
this.I_prefix = 0;
this.limit_backward = this.cursor;
this.cursor = this.limit;
const /** number */ v_4 = this.limit - this.cursor;
this.r_remove_particle();
this.cursor = this.limit - v_4;
if (!(this.I_measure > 2)) {
return false;
}
this.loopRestorePrefixes();
return this.currentWord;
}
stemSingular(word) {
this.originalWord = word;
this.currentWord = word;
if (this.currentWord.length > 3) {
this.stemmingProcess();
const /** number */ v_5 = this.limit - this.cursor;
this.r_remove_possessive_pronoun();
this.cursor = this.limit - v_5;
this.cursor = this.limit_backward;
if (!(this.I_measure > 2)) {
return false;
}
return this.findWord(this.currentWord)
? this.currentWord
: this.originalWord;
}
isPlural(word) {
const matches = word.match(/^(.*)-(ku|mu|nya|lah|kah|tah|pun)$/);
if (matches) {
return matches[1].search('-') !== -1;
lab4: {
const /** number */ v_6 = this.cursor;
lab5: {
const /** number */ v_7 = this.cursor;
if (!this.r_remove_first_order_prefix()) {
break lab5;
}
const /** number */ v_8 = this.cursor;
lab6: {
const /** number */ v_9 = this.cursor;
if (!(this.I_measure > 2)) {
break lab6;
}
this.limit_backward = this.cursor;
this.cursor = this.limit;
if (!this.r_remove_suffix()) {
break lab6;
}
this.cursor = this.limit_backward;
this.cursor = v_9;
if (!(this.I_measure > 2)) {
break lab6;
}
if (!this.r_remove_second_order_prefix()) {
break lab6;
}
}
this.cursor = v_8;
this.cursor = v_7;
break lab4;
}
this.cursor = v_6;
const /** number */ v_10 = this.cursor;
// call remove_second_order_prefix, line 189
this.r_remove_second_order_prefix();
this.cursor = v_10;
// do, line 190
const /** number */ v_11 = this.cursor;
lab7: {
if (!(this.I_measure > 2)) {
break lab7;
}
this.limit_backward = this.cursor;
this.cursor = this.limit;
if (!this.r_remove_suffix()) {
break lab7;
}
this.cursor = this.limit_backward;
}
this.cursor = v_11;
}
return word.search('-') !== -1;
return true;
}
innerStem() {
const token = this.getCurrent();
this.removals = [];
this.setCurrent(
this.isPlural(token) ? this.stemPlural(token) : this.stemSingular(token)
);
const current = this.getCurrent();
this.innerbStem();
for (let i = 5; i > 0; i -= 1) {
if (current.length - i > 2) {
if (StemmerId[`suffixes${i}`][current.slice(-i)]) {
this.setCurrent(current.slice(0, -i));
i = 0;
}
}
}
}
}
StemmerId.suffixes = {
ku: 1,
mu: 1,
nya: 1,
lah: 1,
kah: 1,
tah: 1,
pun: 1,
StemmerId.a_0 = [
['kah', -1, 1],
['lah', -1, 1],
['pun', -1, 1],
].map(x => new Among(x[0], x[1], x[2]));
StemmerId.a_1 = [
['nya', -1, 1],
['ku', -1, 1],
['mu', -1, 1],
].map(x => new Among(x[0], x[1], x[2]));
StemmerId.a_2 = [
['i', -1, 1],
['an', -1, 1],
['kan', 1, 1],
].map(x => new Among(x[0], x[1], x[2]));
StemmerId.a_3 = [
['di', -1, 1],
['ke', -1, 2],
['me', -1, 1],
['mem', 2, 5],
['men', 2, 1],
['meng', 4, 1],
['meny', 4, 3],
['pem', -1, 6],
['pen', -1, 2],
['peng', 8, 2],
['peny', 8, 4],
['ter', -1, 1],
].map(x => new Among(x[0], x[1], x[2]));
StemmerId.a_4 = [
['be', -1, 3],
['belajar', 0, 4],
['ber', 0, 3],
['pe', -1, 1],
['pelajar', 3, 2],
['per', 3, 1],
].map(x => new Among(x[0], x[1], x[2]));
StemmerId.suffixes5 = {
iskos: 1,
iskas: 1,
anciu: 1,
ingas: 1,
jamas: 1,
intas: 1,
antis: 1,
uotas: 1,
iskai: 1,
damas: 1,
iuose: 1,
};
StemmerId.suffixes4 = {
iant: 1,
isku: 1,
iaus: 1,
ingu: 1,
iems: 1,
jami: 1,
asis: 1,
dama: 1,
ytas: 1,
iska: 1,
inta: 1,
dami: 1,
uoja: 1,
inga: 1,
jama: 1,
iame: 1,
amos: 1,
uota: 1,
iams: 1,
inti: 1,
uoti: 1,
amas: 1,
emis: 1,
uose: 1,
davo: 1,
omis: 1,
iais: 1,
};
StemmerId.suffixes3 = {
aja: 1,
oti: 1,
amu: 1,
ias: 1,
ies: 1,
osi: 1,
iam: 1,
eja: 1,
ems: 1,
eti: 1,
ziu: 1,
yta: 1,
aus: 1,
ojo: 1,
iui: 1,
oms: 1,
usi: 1,
ese: 1,
ami: 1,
yje: 1,
ejo: 1,
yti: 1,
ant: 1,
ose: 1,
ios: 1,
ama: 1,
ams: 1,
eje: 1,
oje: 1,
ais: 1,
ius: 1,
iai: 1,
};
StemmerId.suffixes2 = {
ki: 1,
ei: 1,
ys: 1,
ia: 1,
ui: 1,
ti: 1,
io: 1,
is: 1,
us: 1,
os: 1,
ai: 1,
es: 1,
iu: 1,
as: 1,
};
StemmerId.suffixes1 = {
s: 1,
i: 1,
o: 1,
e: 1,
u: 1,
a: 1,
};
StemmerId.g_vowel = [17, 65, 16];
module.exports = StemmerId;
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc