@@ -5,3 +5,3 @@ const languageSelect = document.getElementById('languages')

		function updateSentence() {
		function updateSentence () {
		const language = languageSelect.value
		@@ -8,0 +8,0 @@ const oldString = sentenceInput.value.split(' ')

package.json

		{
		"name": "stopword",
		"version": "1.0.11",
		"version": "2.0.0-rc.1",
		"description": "A module for node.js and the browser that takes in text and returns text that is stripped of stopwords. Has pre-defined stopword lists for 57 languages and also takes lists with custom stopwords as input.",
		"main": "lib/stopword.js",
		"main": "./dist/stopword.cjs.js",
		"module": "./dist/stopword.esm.js",
		"browser": "./dist/stopword.umd.js",
		"scripts": {
		"compile-for-browser": "webpack",
		"empty-sandbox": "rm -rf test/sandbox && mkdir test/sandbox",
		"test": "standard './.js' './test/.js' && npm run empty-sandbox && npm run compile-for-browser && tape ./test/test.js && cat test/sandbox/bundle.js \| tape-run && rm -rf test/sandbox"
		"build": "rollup --config",
		"test": "standard './.js' './test/.js' && npm run build && npx ava ./test/test.cjs.js && npx ava ./test/test.esm.mjs"
		},
		@@ -18,6 +19,4 @@ "repository": {
		"stopwords",
		"document",
		"processing",
		"document-processing",
		"search",
		"norch",
		"search-index",
		@@ -27,11 +26,5 @@ "nlp"
		"devDependencies": {
		"buffer": "6.0.3",
		"path-browserify": "1.0.1",
		"process": "0.11.10",
		"standard": "^16.0.3",
		"stream-browserify": "3.0.0",
		"tape": "^5.3.0",
		"tape-run": "^9.0.0",
		"webpack": "^5.47.0",
		"webpack-cli": "^4.7.2"
		"batr": "^2.0.2",
		"rollup-plugin-terser": "7.0.2",
		"rollup-plugin-license": "2.6.1"
		},
		@@ -38,0 +31,0 @@ "author": "Fergus McDowall",

202

README.md

		@@ -7,6 +7,8 @@ # stopword

		## Breaking change!
		Language codes are changed from ISO-639-1 (two characters) to ISO-639-3. This to have room for more small languages that wasn't specified in ISO-639-1.

		[![NPM version][npm-version-image]][npm-url]
		[![NPM downloads][npm-downloads-image]][npm-url]
		[![Build Status][travis-image]][travis-url]
		[![Known Vulnerabilities][snyk-image]][snyk-url]
		[![Build Status][CI-image]][CI-url]
		[![JavaScript Style Guide][standardjs-image]][standardjs-url]
		@@ -19,16 +21,36 @@ [![MIT License][license-image]][license-url]

		## Usage
		## Getting the script in your environment

		### Node.js
		### CJS - CommonJS
		Deconstruction require:
		```javascript
		sw = require('stopword')
		// sw.removeStopwords and sw.[language code] now available
		const { removeStopwords, eng, fra } = require('stopword')
		// 'removeStopwords', 'eng' and 'fra' available
		```

		### Script tag method
		Old style require:
		```javascript
		const sw = require('stopword')
		// sw.removeStopwords and sw.<language codes> now available
		```

		### ESM - Ecmascript Modules
		Deconstruction import:
		```javascript
		import { removeStopwords, eng, fra } from './dist/stopword.esm.mjs'
		// 'removeStopwords', 'eng' and 'fra' available
		```

		Old style import:
		```javascript
		import * as sw from './dist/stopword.esm.mjs'
		// 'sw.removeStopwords' + 'sw.<language codes>' available
		```

		### UMD - Script tag method
		```html
		<script src="stopword.js"></script>
		<script src="stopword.umd.js"></script>

		<script>
		// sw.removeStopwords and sw.[language code] now available
		// sw.removeStopwords and sw.<language codes> now available
		</script>
		@@ -38,2 +60,4 @@ ```

		## Usage

		### Default (English)
		@@ -43,5 +67,5 @@ By default, `stopword` will strip an array of "meaningless" English words
		```javascript
		sw = require('stopword')
		const { removeStopwords } = require('stopword')
		const oldString = 'a really Interesting string with some words'.split(' ')
		const newString = sw.removeStopwords(oldString)
		const newString = removeStopwords(oldString)
		// newString is now [ 'really', 'Interesting', 'string', 'words' ]
		@@ -54,6 +78,6 @@
		```javascript
		sw = require('stopword')
		const { removeStopwords, swe } = require('stopword')
		const oldString = 'Trädgårdsägare är beredda att pröva vad som helst för att bli av med de hatade mördarsniglarna åäö'.split(' ')
		// sw.sv contains swedish stopwords
		const newString = sw.removeStopwords(oldString, sw.sv)
		// swe contains swedish stopwords
		const newString = removeStopwords(oldString, swe)
		// newString is now [ 'Trädgårdsägare', 'beredda', 'pröva', 'helst', 'hatade', 'mördarsniglarna', 'åäö' ]
		@@ -65,6 +89,6 @@ ```
		```javascript
		sw = require('stopword')
		const { removeStopwords } = require('stopword')
		const oldString = 'you can even roll your own custom stopword list'.split(' ')
		// Just add your own list/array of stopwords
		const newString = sw.removeStopwords(oldString, [ 'even', 'a', 'custom', 'stopword', 'list', 'is', 'possible']
		const newString = removeStopwords(oldString, [ 'even', 'a', 'custom', 'stopword', 'list', 'is', 'possible']
		// newString is now [ 'you', 'can', 'roll', 'your', 'own']
		@@ -76,6 +100,6 @@ ```
		```javascript
		sw = require('stopword')
		const { removeStopwords, eng, swe } = require('stopword')
		const oldString = 'a really interesting string with some words trädgårdsägare är beredda att pröva vad som helst för att bli av med de hatade mördarsniglarna'.split(' ')
		const customStopwords = ['interesting', 'really']
		const newString = sw.removeStopwords(oldString, [...sw.en, ...sw.sv, ...customStopwords]
		const newString = sw.removeStopwords(oldString, [...eng, ...swe, ...customStopwords]
		// newString is now ['string', 'words', 'trädgårdsägare', 'beredda', 'pröva', 'helst', 'hatade', 'mördarsniglarna']
		@@ -96,4 +120,4 @@ ```
		```javascript
		sw = require('stopword')
		var text = sw.removeStopwords(text[, stopwords])
		const { removeStopwords } = require('stopword')
		var text = removeStopwords(text[, stopwords])
		// text is now an array of given words minus specified stopwords
		@@ -104,71 +128,71 @@ ```

		Arrays of stopwords for the following 55 languages are supplied:
		Language codes follow [ISO 639-3 Language Code list](https://iso639-3.sil.org/code_tables/639/data/all). Arrays of stopwords for the following 57 languages are supplied:

		* `af` - Afrikaans
		* `ar` - Arabic, Modern Standard
		* `hy` - Armenian
		* `eu` - Basque
		* `bn` - Bengali
		* `br` - Breton
		* `bg` - Bulgarian
		* `ca` - Catalan
		* `zh` - Chinese Simplified
		* `hr` - Croatian
		* `cs` - Czech
		* `da` - Danish
		* `nl` - Dutch
		* `en` - English
		* `eo` - Esperanto
		* `et` - Estonian
		* `fa` - Farsi
		* `fi` - Finnish
		* `fr` - French
		* `gl` - Galician
		* `de` - German
		* `el` - Greek
		* `ha` - Hausa
		* `he` - Hebrew
		* `hi` - Hindi
		* `hu` - Hungarian
		* `id` - Indonesian
		* `ga` - Irish
		* `it` - Italian
		* `ja` - Japanese
		* `ko` - Korean
		* `la` - Latin
		* `lv` - Latvian
		* `lgg` - Lugbara (without diacritics)
		* `lggo` - Lugbara official (with diacritics)
		* `mr` - Marathi
		* `my` - Myanmar
		* `no` - Norwegian
		* `pl` - Polish
		* `pt` - Portuguese
		* `ptbr` - Portuguese (Brazilian)
		* `pa` - Punjabi Gurmukhi
		* `ro` - Romanian
		* `ru` - Russian
		* `sk` - Slovak
		* `sl` - Slovenian
		* `so` - Somali
		* `st` - Sotho
		* `es` - Spanish
		* `sw` - Swahili
		* `sv` - Swedish
		* `th` - Thai
		* `tl` - Tagalog (Filipino)
		* `tr` - Turkish
		* `ur` - Urdu
		* `vi` - Vietnamese
		* `yo` - Yoruba
		* `zu` - Zulu
		* `afr` - Afrikaans
		* `ara` - Arabic, Macrolanguage
		* `hye` - Armenian
		* `eus` - Basque
		* `ben` - Bengali
		* `bre` - Breton
		* `bul` - Bulgarian
		* `cat` - Catalan, Valencian
		* `zho` - Chinese, Macrolanguage
		* `hrv` - Croatian
		* `ces` - Czech
		* `dan` - Danish
		* `nld` - Dutch
		* `eng` - English
		* `epo` - Esperanto
		* `est` - Estonian, Macrolanguage
		* `fin` - Finnish
		* `fra` - French
		* `glg` - Galician
		* `deu` - German
		* `ell` - Greek, Modern
		* `hau` - Hausa
		* `heb` - Hebrew
		* `hin` - Hindi
		* `hun` - Hungarian
		* `ind` - Indonesian
		* `gle` - Irish
		* `ita` - Italian
		* `jpn` - Japanese
		* `kor` - Korean
		* `lat` - Latin
		* `lav` - Latvian, Macrolanguage
		* `lgg` - Lugbara
		* `lggNd` - Lugbara, No diacritics
		* `mar` - Marathi
		* `mya` - Myanmar (Burmese)
		* `nob` - Norwegian bokmål
		* `fas` - Persian (Farsi)
		* `pol` - Polish
		* `por` - Portuguese
		* `porBr` - Portuguese-Brazilian
		* `panGu` - Punjabi (Panjabi), Gurmukhi script
		* `ron` - Romanian (Moldavian, Moldovan)
		* `rus` - Russian
		* `slk` - Slovak
		* `slv` - Slovenian
		* `som` - Somali
		* `sot` - Sotho, Southern
		* `spa` - Spanish
		* `swa` - Swahili, Macrolanguage
		* `swe` - Swedish
		* `tha` - Thai
		* `tgl` - Tagalog (Filipino)
		* `tur` - Turkish
		* `urd` - Urdu
		* `vie` - Vietnamese
		* `yor` - Yoruba
		* `zul` - Zulu

		```javascript
		sw = require('stopword')
		norwegianStopwords = sw.no
		// norwegianStopwords now contains an Array of norwgian stopwords
		const { nob } = require('stopword')
		norwegianBokmaalStopwords = nob
		// norwegianBokmaalStopwords now contains an Array of norwgian bokmål stopwords
		```

		#### Languages with no space between words
		`ja` Japanese, `th` Thai and `zh` Chinese Simplified and some of the other languages supported have no space between words. For these languages you need to split the text into an array of words in another way than just `textString.split(' ')`. You can check out [TinySegmenter](http://chasen.org/%7Etaku/software/TinySegmenter/) for Japanese and [chinese-tokenizer](https://github.com/yishn/chinese-tokenizer) for Chinese.
		`jpn` Japanese, `tha` Thai and `zho` Chinese and some of the other languages supported have no space between words. For these languages you need to split the text into an array of words in another way than just `textString.split(' ')`. You can check out [TinySegmenter](http://chasen.org/%7Etaku/software/TinySegmenter/) for Japanese and [chinese-tokenizer](https://github.com/yishn/chinese-tokenizer) for Chinese.

		@@ -178,5 +202,7 @@ ## Your language missing?

		## Contributions
		Most of this work is from other projects and people, and wouldn't be possible without them. Thanks to among others the [stopwords-iso](https://github.com/stopwords-iso) project and the [more-stoplist](https://github.com/dohliam/more-stoplists) project. And thanks for all your code input: @arthurdenner, @micalevisk, @fabric-io-rodrigues, @behzadmoradi, @guysaar223, @ConnorKrammer, @GreXLin85, @nanopx and @virtual!
		## Contributions and licenses
		Most of this work is from other projects and people, and wouldn't be possible without them. Thanks to among others the [stopwords-iso](https://github.com/stopwords-iso) project and the [more-stoplist](https://github.com/dohliam/more-stoplists) project. And thanks for all your code input: @arthurdenner, @micalevisk, @fabric-io-rodrigues, @behzadmoradi, @guysaar223, @ConnorKrammer, @GreXLin85, @nanopx, @virtual and @JustroX!

		[Licenses](./dist/LICENSES.txt) for both this and all third party code.

		[license-image]: http://img.shields.io/badge/license-MIT-blue.svg?style=flat
		@@ -187,7 +213,5 @@ [license-url]: LICENSE
		[npm-downloads-image]: http://img.shields.io/npm/dm/stopword.svg?style=flat
		[travis-url]: http://travis-ci.org/fergiemcdowall/stopword
		[travis-image]: http://img.shields.io/travis/fergiemcdowall/stopword.svg?style=flat
		[snyk-url]: https://snyk.io/test/github/fergiemcdowall/stopword?targetFile=package.json
		[snyk-image]: https://snyk.io/test/github/fergiemcdowall/stopword/badge.svg?targetFile=package.json
		[CI-url]: https://github.com/fergiemcdowall/stopword/actions/workflows/tests.yml
		[CI-image]: https://github.com/fergiemcdowall/stopword/actions/workflows/tests.yml/badge.svg
		[standardjs-url]: https://standardjs.com
		[standardjs-image]: https://img.shields.io/badge/code_style-standard-brightgreen.svg?style=flat-square

dist/stopword.js

dist/stopword.js.map

lib/stopword.js

lib/stopwords_af.js

lib/stopwords_ar.js

lib/stopwords_bg.js

lib/stopwords_bn.js

lib/stopwords_br.js

lib/stopwords_ca.js

lib/stopwords_cs.js

lib/stopwords_da.js

lib/stopwords_de.js

lib/stopwords_el.js

lib/stopwords_en.js

lib/stopwords_eo.js

lib/stopwords_es.js

lib/stopwords_et.js

lib/stopwords_eu.js

lib/stopwords_fa.js

lib/stopwords_fi.js

lib/stopwords_fr.js

lib/stopwords_ga.js

lib/stopwords_gl.js

lib/stopwords_ha.js

lib/stopwords_he.js

lib/stopwords_hi.js

lib/stopwords_hr.js

lib/stopwords_hu.js

lib/stopwords_hy.js

lib/stopwords_id.js

lib/stopwords_it.js

lib/stopwords_ja.js

lib/stopwords_ko.js

lib/stopwords_la.js

lib/stopwords_lgg.js

lib/stopwords_lggo.js

lib/stopwords_lv.js

lib/stopwords_mr.js

lib/stopwords_my.js

lib/stopwords_nl.js

lib/stopwords_no.js

lib/stopwords_pa.js

lib/stopwords_pl.js

lib/stopwords_pt.js

lib/stopwords_ptbr.js

lib/stopwords_ro.js

lib/stopwords_ru.js

lib/stopwords_sk.js

lib/stopwords_sl.js

lib/stopwords_so.js

lib/stopwords_st.js

lib/stopwords_sv.js

lib/stopwords_sw.js

lib/stopwords_th.js

lib/stopwords_tl.js

lib/stopwords_tr.js

lib/stopwords_ur.js

lib/stopwords_vi.js

lib/stopwords_yo.js

lib/stopwords_zh.js

lib/stopwords_zu.js

webpack.config.js

demo/index.html

Sorry, the diff of this file is not supported yet

LICENSE

Sorry, the diff of this file is not supported yet

stopword - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics