@nlpjs/lang-id - npm Package Compare versions

Comparing version 4.0.1 to 4.1.0

package.json

		{
		"name": "@nlpjs/lang-id",
		"version": "4.0.1",
		"version": "4.1.0",
		"description": "Core",
		@@ -28,5 +28,5 @@ "author": {
		"dependencies": {
		"@nlpjs/core": "^4.0.1"
		"@nlpjs/core": "^4.1.0"
		},
		"gitHead": "4e9126a5c16404496eb4d9cc392f31315be5e6f2"
		"gitHead": "35a9f046ff64fa04e952ef9c14b3e1dd11bdba08"
		}

589

src/stemmer-id.js

		@@ -24,7 +24,5 @@ /*

		const { BaseStemmer } = require('@nlpjs/core');
		const kataDasar = require('./kata-dasar.json');
		const preffixRules = require('./preffix-rules').rules;
		const suffixRules = require('./suffix-rules').rules;
		const { Among, BaseStemmer } = require('@nlpjs/core');

		/* eslint-disable */
		class StemmerId extends BaseStemmer {
		@@ -34,198 +32,485 @@ constructor(container) {
		this.name = 'stemmer-id';
		this.I_prefix = 0;
		this.I_measure = 0;
		}

		findWord(token) {
		const result = kataDasar[token] === 1;
		return result;
		r_remove_particle() {
		this.ket = this.cursor;
		if (this.find_among_b(StemmerId.a_0) == 0) {
		return false;
		}
		this.bra = this.cursor;
		if (!this.slice_del()) {
		return false;
		}
		this.I_measure -= 1;
		return true;
		}

		stemPlural(token) {
		let matches = token.match(/^(.)-(.)$/);
		if (!matches) {
		return token;
		r_remove_possessive_pronoun() {
		this.ket = this.cursor;
		if (this.find_among_b(StemmerId.a_1) == 0) {
		return false;
		}
		const words = [matches[1], matches[2]];
		matches = words[0].match(/^(.)-(.)$/);
		if (StemmerId.suffixes[words[1]] && matches) {
		[, words[0]] = matches;
		words[1] = `${matches[2]}-${words[1]}`;
		this.bra = this.cursor;
		if (!this.slice_del()) {
		return false;
		}
		const rootWord1 = this.stemSingular(words[0]);
		let rootWord2 = this.stemSingular(words[1]);
		if (rootWord2 === words[1] && !this.findWord(words[1])) {
		rootWord2 = this.stemSingular(`me${words[1]}`);
		this.I_measure -= 1;
		return true;
		}

		r_SUFFIX_KAN_OK() {
		if (!(I_prefix != 3)) {
		return false;
		}
		if (rootWord1 === rootWord2) {
		return rootWord1;
		if (!(I_prefix != 2)) {
		return false;
		}
		return token;
		return true;
		}

		requiresAdjustment(word) {
		const rules = [
		/^be(.*)lah$/,
		/^be(.*)an$/,
		/^me(.*)i$/,
		/^di(.*)i$/,
		/^pe(.*)i$/,
		/^ter(.*)i$/,
		];
		for (let i = 0; i < rules.length; i += 1) {
		if (word.match(rules[i])) {
		return true;
		}
		r_SUFFIX_AN_OK() {
		if (!(I_prefix != 1)) {
		return false;
		}
		return false;
		return true;
		}

		checkPrefixRules() {
		const removalCount = this.removals.length;
		for (let i = 0; i < preffixRules.length; i += 1) {
		const resultObj = preffixRules[i](this.currentWord);
		if (resultObj.removal) {
		this.removals.push(resultObj.removal);
		r_SUFFIX_I_OK() {
		if (!(I_prefix <= 2)) {
		return false;
		}
		{
		const v_1 = this.limit - this.cursor;
		lab0: {
		if (!this.eq_s_b('s')) {
		break lab0;
		}
		return false;
		}
		this.currentWord = resultObj.currentWord;
		if (this.findWord(this.currentWord)) {
		return this.currentWord;
		}
		if (this.removals.length > removalCount) {
		return this.currentWord;
		}
		this.cursor = this.limit - v_1;
		}
		return this.currentWord;
		return true;
		}

		removePrefixes() {
		for (let i = 0; i < 3; i += 1) {
		this.checkPrefixRules();
		if (this.findWord(this.currentWord)) {
		return this.currentWord;
		}
		r_remove_suffix() {
		this.ket = this.cursor;
		if (this.find_among_b(StemmerId.a_2) == 0) {
		return false;
		}
		return this.currentWord;
		this.bra = this.cursor;
		if (!this.slice_del()) {
		return false;
		}
		this.I_measure -= 1;
		return true;
		}

		removeSuffixes() {
		for (let i = 0; i < suffixRules.length; i += 1) {
		const resultObj = suffixRules[i](this.currentWord);
		if (resultObj.removal) {
		this.removals.push(resultObj.removal);
		}
		this.currentWord = resultObj.currentWord;
		if (this.findWord(this.currentWord)) {
		return this.currentWord;
		}
		r_VOWEL() {
		if (!this.in_grouping(StemmerId.g_vowel, 97, 117)) {
		return false;
		}
		return this.currentWord;
		return true;
		}

		restorePrefix() {
		for (let i = 0; i < this.removals.length; i += 1) {
		this.currentWord = this.removals[i].originalWord;
		break;
		r_KER() {
		if (!this.out_grouping(StemmerId.g_vowel, 97, 117)) {
		return false;
		}
		let i = 0;
		while (i < this.removals.length) {
		if (this.removals[i].affixType === 'DP') {
		this.removals.splice(i, 1);
		} else {
		i += 1;
		}
		if (!this.eq_s('er')) {
		return false;
		}
		return true;
		}

		loopRestorePrefixes() {
		this.restorePrefix();
		const tempCurrentWord = this.currentWord;
		for (let i = this.removals.length - 1; i >= 0; i -= 1) {
		const currentRemoval = this.removals[i];
		if (['DS', 'PP', 'P'].includes(currentRemoval.affixType)) {
		if (currentRemoval.removedPart === 'kan') {
		this.currentWord = `${currentRemoval.result}k`;
		this.removePrefixes();
		if (this.findWord(this.currentWord)) {
		return this.currentWord;
		r_remove_first_order_prefix() {
		let /** number */ among_var;
		this.bra = this.cursor;
		among_var = this.find_among(StemmerId.a_3);
		if (among_var == 0) {
		return false;
		}
		this.ket = this.cursor;
		switch (among_var) {
		case 1:
		if (!this.slice_del()) {
		return false;
		}
		this.I_prefix = 1;
		this.I_measure -= 1;
		break;
		case 2:
		if (!this.slice_del()) {
		return false;
		}
		this.I_prefix = 3;
		this.I_measure -= 1;
		break;
		case 3:
		this.I_prefix = 1;
		if (!this.slice_from('s')) {
		return false;
		}
		this.I_measure -= 1;
		break;
		case 4:
		this.I_prefix = 3;
		if (!this.slice_from('s')) {
		return false;
		}
		this.I_measure -= 1;
		break;
		case 5:
		this.I_prefix = 1;
		this.I_measure -= 1;
		lab0: {
		const v_1 = this.cursor;
		lab1: {
		const v_2 = this.cursor;
		if (!this.in_grouping(StemmerId.g_vowel, 97, 117)) {
		break lab1;
		}
		this.cursor = v_2;
		if (!this.slice_from('p')) {
		return false;
		}
		break lab0;
		}
		this.currentWord = `${currentRemoval.result}kan`;
		} else {
		this.currentWord = currentRemoval.originalWord;
		this.cursor = v_1;
		if (!this.slice_del()) {
		return false;
		}
		}
		this.removePrefixes();
		if (this.findWord(this.currentWord)) {
		return this.currentWord;
		break;
		case 6:
		this.I_prefix = 3;
		this.I_measure -= 1;
		lab2: {
		const v_3 = this.cursor;
		lab3: {
		const v_4 = this.cursor;
		if (!this.in_grouping(StemmerId.g_vowel, 97, 117)) {
		break lab3;
		}
		this.cursor = v_4;
		if (!this.slice_from('p')) {
		return false;
		}
		break lab2;
		}
		this.cursor = v_3;
		if (!this.slice_del()) {
		return false;
		}
		}
		this.currentWord = tempCurrentWord;
		return this.currentWord;
		}
		break;
		}
		return this.currentWord;
		return true;
		}

		stemmingProcess() {
		if (this.findWord(this.currentWord)) {
		return this.currentWord;
		r_remove_second_order_prefix() {
		let among_var;
		this.bra = this.cursor;
		among_var = this.find_among(StemmerId.a_4);
		if (among_var == 0) {
		return false;
		}
		if (this.requiresAdjustment(this.originalWord)) {
		this.removePrefixes();
		if (this.findWord(this.currentWord)) {
		return this.currentWord;
		this.ket = this.cursor;
		switch (among_var) {
		case 1:
		if (!this.slice_del()) {
		return false;
		}
		this.I_prefix = 2;
		this.I_measure -= 1;
		break;
		case 2:
		if (!this.slice_from('ajar')) {
		return false;
		}
		this.I_measure -= 1;
		break;
		case 3:
		if (!this.slice_del()) {
		return false;
		}
		this.I_prefix = 4;
		this.I_measure -= 1;
		break;
		case 4:
		if (!this.slice_from('ajar')) {
		return false;
		}
		this.I_prefix = 4;
		this.measure -= 1;
		break;
		}
		return true;
		}

		innerbStem() {
		this.I_measure = 0;
		const /** number */ v_1 = this.cursor;
		{
		while (true) {
		const /** number */ v_2 = this.cursor;
		lab1: {
		while (true) {
		lab3: {
		if (!this.in_grouping(StemmerId.g_vowel, 97, 117)) {
		break lab3;
		}
		break;
		}
		if (this.cursor >= this.limit) {
		break lab1;
		}
		this.cursor++;
		}
		this.I_measure += 1;
		continue;
		}
		this.cursor = v_2;
		break;
		}
		this.removeSuffixes();
		if (this.findWord(this.currentWord)) {
		return this.currentWord;
		}
		this.currentWord = this.originalWord;
		this.removals = [];
		}
		this.removeSuffixes();
		if (this.findWord(this.currentWord)) {
		return this.currentWord;
		this.cursor = v_1;
		if (!(this.I_measure > 2)) {
		return false;
		}
		this.removePrefixes();
		if (this.findWord(this.currentWord)) {
		return this.currentWord;
		this.I_prefix = 0;
		this.limit_backward = this.cursor;
		this.cursor = this.limit;
		const /** number */ v_4 = this.limit - this.cursor;
		this.r_remove_particle();
		this.cursor = this.limit - v_4;
		if (!(this.I_measure > 2)) {
		return false;
		}
		this.loopRestorePrefixes();
		return this.currentWord;
		}

		stemSingular(word) {
		this.originalWord = word;
		this.currentWord = word;
		if (this.currentWord.length > 3) {
		this.stemmingProcess();
		const /** number */ v_5 = this.limit - this.cursor;
		this.r_remove_possessive_pronoun();
		this.cursor = this.limit - v_5;
		this.cursor = this.limit_backward;
		if (!(this.I_measure > 2)) {
		return false;
		}
		return this.findWord(this.currentWord)
		? this.currentWord
		: this.originalWord;
		}

		isPlural(word) {
		const matches = word.match(/^(.*)-(ku\|mu\|nya\|lah\|kah\|tah\|pun)$/);
		if (matches) {
		return matches[1].search('-') !== -1;
		lab4: {
		const /** number */ v_6 = this.cursor;
		lab5: {
		const /** number */ v_7 = this.cursor;
		if (!this.r_remove_first_order_prefix()) {
		break lab5;
		}
		const /** number */ v_8 = this.cursor;
		lab6: {
		const /** number */ v_9 = this.cursor;
		if (!(this.I_measure > 2)) {
		break lab6;
		}
		this.limit_backward = this.cursor;
		this.cursor = this.limit;
		if (!this.r_remove_suffix()) {
		break lab6;
		}
		this.cursor = this.limit_backward;
		this.cursor = v_9;
		if (!(this.I_measure > 2)) {
		break lab6;
		}
		if (!this.r_remove_second_order_prefix()) {
		break lab6;
		}
		}
		this.cursor = v_8;
		this.cursor = v_7;
		break lab4;
		}
		this.cursor = v_6;
		const /** number */ v_10 = this.cursor;
		// call remove_second_order_prefix, line 189
		this.r_remove_second_order_prefix();
		this.cursor = v_10;
		// do, line 190
		const /** number */ v_11 = this.cursor;
		lab7: {
		if (!(this.I_measure > 2)) {
		break lab7;
		}
		this.limit_backward = this.cursor;
		this.cursor = this.limit;
		if (!this.r_remove_suffix()) {
		break lab7;
		}
		this.cursor = this.limit_backward;
		}
		this.cursor = v_11;
		}
		return word.search('-') !== -1;
		return true;
		}


		innerStem() {
		const token = this.getCurrent();
		this.removals = [];
		this.setCurrent(
		this.isPlural(token) ? this.stemPlural(token) : this.stemSingular(token)
		);
		const current = this.getCurrent();
		this.innerbStem();
		for (let i = 5; i > 0; i -= 1) {
		if (current.length - i > 2) {
		if (StemmerId[`suffixes${i}`][current.slice(-i)]) {
		this.setCurrent(current.slice(0, -i));
		i = 0;
		}
		}
		}
		}
		}

		StemmerId.suffixes = {
		ku: 1,
		mu: 1,
		nya: 1,
		lah: 1,
		kah: 1,
		tah: 1,
		pun: 1,
		StemmerId.a_0 = [
		['kah', -1, 1],
		['lah', -1, 1],
		['pun', -1, 1],
		].map(x => new Among(x[0], x[1], x[2]));

		StemmerId.a_1 = [
		['nya', -1, 1],
		['ku', -1, 1],
		['mu', -1, 1],
		].map(x => new Among(x[0], x[1], x[2]));

		StemmerId.a_2 = [
		['i', -1, 1],
		['an', -1, 1],
		['kan', 1, 1],
		].map(x => new Among(x[0], x[1], x[2]));

		StemmerId.a_3 = [
		['di', -1, 1],
		['ke', -1, 2],
		['me', -1, 1],
		['mem', 2, 5],
		['men', 2, 1],
		['meng', 4, 1],
		['meny', 4, 3],
		['pem', -1, 6],
		['pen', -1, 2],
		['peng', 8, 2],
		['peny', 8, 4],
		['ter', -1, 1],
		].map(x => new Among(x[0], x[1], x[2]));

		StemmerId.a_4 = [
		['be', -1, 3],
		['belajar', 0, 4],
		['ber', 0, 3],
		['pe', -1, 1],
		['pelajar', 3, 2],
		['per', 3, 1],
		].map(x => new Among(x[0], x[1], x[2]));

		StemmerId.suffixes5 = {
		iskos: 1,
		iskas: 1,
		anciu: 1,
		ingas: 1,
		jamas: 1,
		intas: 1,
		antis: 1,
		uotas: 1,
		iskai: 1,
		damas: 1,
		iuose: 1,
		};

		StemmerId.suffixes4 = {
		iant: 1,
		isku: 1,
		iaus: 1,
		ingu: 1,
		iems: 1,
		jami: 1,
		asis: 1,
		dama: 1,
		ytas: 1,
		iska: 1,
		inta: 1,
		dami: 1,
		uoja: 1,
		inga: 1,
		jama: 1,
		iame: 1,
		amos: 1,
		uota: 1,
		iams: 1,
		inti: 1,
		uoti: 1,
		amas: 1,
		emis: 1,
		uose: 1,
		davo: 1,
		omis: 1,
		iais: 1,
		};

		StemmerId.suffixes3 = {
		aja: 1,
		oti: 1,
		amu: 1,
		ias: 1,
		ies: 1,
		osi: 1,
		iam: 1,
		eja: 1,
		ems: 1,
		eti: 1,
		ziu: 1,
		yta: 1,
		aus: 1,
		ojo: 1,
		iui: 1,
		oms: 1,
		usi: 1,
		ese: 1,
		ami: 1,
		yje: 1,
		ejo: 1,
		yti: 1,
		ant: 1,
		ose: 1,
		ios: 1,
		ama: 1,
		ams: 1,
		eje: 1,
		oje: 1,
		ais: 1,
		ius: 1,
		iai: 1,
		};

		StemmerId.suffixes2 = {
		ki: 1,
		ei: 1,
		ys: 1,
		ia: 1,
		ui: 1,
		ti: 1,
		io: 1,
		is: 1,
		us: 1,
		os: 1,
		ai: 1,
		es: 1,
		iu: 1,
		as: 1,
		};

		StemmerId.suffixes1 = {
		s: 1,
		i: 1,
		o: 1,
		e: 1,
		u: 1,
		a: 1,
		};

		StemmerId.g_vowel = [17, 65, 16];

		module.exports = StemmerId;

src/kata-dasar.json

src/preffix-rules.js

src/removal.js

src/suffix-rules.js

@nlpjs/lang-id - npm Package Compare versions

New alerts

Fixed alerts

Worsened metrics

Dependency changes