Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@chattylabs/language-detection

Package Overview
Dependencies
Maintainers
1
Versions
2
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@chattylabs/language-detection - npm Package Compare versions

Comparing version 0.1.0 to 0.1.1

6

package.json
{
"name": "@chattylabs/language-detection",
"version": "0.1.0",
"description": "Library that helps detecting the language of a given piece of text.",
"version": "0.1.1",
"description": "Package to detect the language of a given text (focusing on short sms type text used on tweets, facebook, WhatsApp, etc)",
"main": "src/index.js",
"repository": {
"type": "git",
"url": "git+https://github.com/danielantelo/language-detector.git"
"url": "git+https://github.com/chattylabs/language-detector"
},

@@ -10,0 +10,0 @@ "author": {

@@ -24,3 +24,3 @@ # Language Detector

```
const detect = require('customisable-language-detection')
const detect = require('@chattylabs/language-detection')
const result = detect('some text to detect')

@@ -33,3 +33,3 @@ const language = result.language

```
const detect = require('customisable-language-detection')
const detect = require('@chattylabs/language-detection')
const customLanguageProfiles = require('../path/to/data/languageProfiles.json')

@@ -48,3 +48,3 @@

const combinedProfiles = {
...require('customisable-language-detection').languageProfiles,
...require('@chattylabs/language-detection').languageProfiles,
...customLanguageProfiles

@@ -62,3 +62,3 @@ }

// bin/train.js
const train = require('customisable-language-detection').train
const train = require('@chattylabs/language-detection').train
train('./path/to/custom/samples/*.txt', './path/to/custom/export/languageProfiles.json')

@@ -75,3 +75,3 @@ ```

```
const detect = require('customisable-language-detection')
const detect = require('@chattylabs/language-detection')
const customLanguageProfiles = require('../path/to/data/languageProfiles.json')

@@ -115,3 +115,3 @@ const customReducers = require('../path/to/your/reducers')

const combinedProfiles = {
...require('customisable-language-detection').reducers,
...require('@chattylabs/language-detection').reducers,
...customReducers

@@ -118,0 +118,0 @@ }

@@ -5,3 +5,3 @@ const reducer = require('./reducer')

module.exports = (text, opts) => {
module.exports = (text, opts= {}) => {
const { languageProfiles = defaultLanguageProfiles, reducers } = opts

@@ -18,3 +18,3 @@ const allLanguages = Object.keys(languageProfiles)

const languagesIntersection = () => allLanguages.filter(lang => -1 !== reducedLanguages.indexOf(lang))
const languages = reducedLanguages.length > 1 ? languagesIntersection : allLanguages;
const languages = reducedLanguages.length > 1 ? languagesIntersection() : allLanguages;
const inputProfile = profiler(text)

@@ -24,3 +24,4 @@ const scores = {}

languages.forEach(language => {
const found = languageProfiles[language].findIndex(entry => entry.token === ngram.token)
const foundPos = languageProfiles[language].findIndex(entry => entry.token === ngram.token)
const found = foundPos >= 0
const penalty = found ? Math.abs(found - index) : 1000

@@ -33,3 +34,3 @@ language in scores ? (scores[language] -= penalty) : (scores[language] = 0 - penalty)

.map(language => ({ language: language, score: scores[language] }))
.sort((first, second) => first.score - second.score)
.sort((first, second) => second.score - first.score)

@@ -36,0 +37,0 @@ const bestMatchParts = sorted[0].language.split('_');

const defaultReducers = require('./utils/reducers')
module.exports = (text, reducers = defaultReducers) => {
let reduced;
let reduced = [];
reducers.forEach((current) => {

@@ -9,8 +9,3 @@ if (!current.regex.test(text)) {

}
const currentLanguages = new Set(current.languages)
if (!reduced) {
reduced = currentLanguages
} else {
reduced = new Set([...reduced].filter(lang => currentLanguages.has(lang)));
}
reduced = new Set([...reduced, ...current.languages])
})

@@ -17,0 +12,0 @@

// @TODO
module.exports = [
{
regex: /[ñ]/i,
languages: ['es', 'gn', 'gl']
},
{
regex: /\bik\b/i,
languages: ['nl']
},
{
regex: /\bich\b/i,
languages: ['de']
},
{
regex: /ß/i,
languages: ['de']
},
{
regex: /\bczy\b/i,
languages: ['pl']
},
{
regex: /[Ł|ń|ś|ź]/i,
languages: ['pl']
},
{
regex: /å/i,
languages: ['nb', 'nn', 'fo', 'is', 'da', 'sv']
},
{
regex: /\baf\b/i,
languages: ['da']
},
{
regex: /\bnei\b/i,
languages: ['nb', 'nn']
},
{
regex: /\och\b/i,
languages: ['sv']
},
{
regex: /[ı|ğ|ș]/i,
languages: ['tr']
},
{
regex: /[ă|ș|ț]/i,
languages: ['ro']
},
{
regex: /[ă]/i,
languages: ['vi']
},
{
regex: /[á|é|í|ó|ú]/i,
languages: ['fr', 'es', 'it', 'cn', 'nl', 'fo', 'is', 'pt', 'vi', 'cy', 'el']
},
// {
// regex: /[ñ]+/i,
// languages: ['es', 'gn', 'gl']
// },
// {
// regex: /[ü]+/i,

@@ -16,6 +68,2 @@ // languages: ['es', 'tr', 'fr', 'hu', 'et', 'de', 'sv']

// {
// regex: /[á|é|í|ó|ú]+/i,
// languages: ['fr', 'es', 'it', 'cn', 'nl', 'fo', 'is', 'pt', 'vi', 'cy', 'el']
// },
// {
// regex: /[â|ê|î]+/i,

@@ -22,0 +70,0 @@ // languages: ['fr', 'it', 'pt', 'ro', 'ru', 'hr', 'tr', 'vi']

const detect = require('../src')
const languageProfilesMock = require('./__mocks__/languageProfiles.json')
test('detects english', () => {
test('allows to pass custom profiled data', () => {
expect(detect('hello worldy world', {

@@ -14,7 +14,11 @@ languageProfiles: languageProfilesMock,

test('detects english', () => {
expect(detect("what's up dude")).toEqual({
language: 'en',
country: ''
})
})
test('detects spanish', () => {
expect(detect('q tal tío', {
languageProfiles: languageProfilesMock,
reducers: []
})).toEqual({
expect(detect("que pasa tío")).toEqual({
language: 'es',

@@ -21,0 +25,0 @@ country: ''

test('it exports detector as default', () => {
const detect = require('customisable-language-detection')
const detect = require('../src')
expect(detect).toBeDefined

@@ -8,3 +8,3 @@ expect(typeof detect).toBe('function')

test('it exports the base language profiles', () => {
const profiles = require('customisable-language-detection').languageProfiles
const profiles = require('../src').languageProfiles
expect(profiles).toBeDefined

@@ -15,3 +15,3 @@ expect(typeof profiles).toBe('object')

test('it exports the base reducers', () => {
const reducers = require('customisable-language-detection').reducers
const reducers = require('../src').reducers
expect(reducers).toBeDefined

@@ -22,3 +22,3 @@ expect(Array.isArray(reducers)).toBe(true)

test('it exports the reducer', () => {
const reducer = require('customisable-language-detection').reducer
const reducer = require('../src').reducer
expect(reducer).toBeDefined

@@ -29,3 +29,3 @@ expect(typeof reducer).toBe('function')

test('it exports the profiler', () => {
const profiler = require('customisable-language-detection').profiler
const profiler = require('../src').profiler
expect(profiler).toBeDefined

@@ -36,3 +36,3 @@ expect(typeof profiler).toBe('function')

test('it exports the trainer', () => {
const trainer = require('customisable-language-detection').trainer
const trainer = require('../src').trainer
expect(trainer).toBeDefined

@@ -39,0 +39,0 @@ expect(typeof trainer).toBe('function')

const reduce = require('../src/reducer')
test('reduces texts with vowel accents', () => {
expect(reduce('some accént')).toEqual(['fr', 'es', 'it', 'cn', 'nl', 'fo', 'is', 'pt', 'vi', 'cy', 'el'])
})
const assertTextReducesToLanguages = (text, expectedLanguages) => {
return expect(reduce(text).sort()).toEqual(expectedLanguages.sort())
}
test('reduces texts with ñ ignoring case', () => {
expect(reduce('bua niÑo')).toEqual(['es', 'gn', 'gl'])
assertTextReducesToLanguages('bua niÑo', ['es', 'gn', 'gl'])
})
test('returns the smallest intersection set of matches', () => {
expect(reduce('buá niño', [
{
regex: /[ñ]+/i,
languages: ['es', 'gn', 'gl']
},
{
regex: /[á]+/i,
languages: ['es']
}
])).toEqual(['es'])
test('Dutch if there is ik', () => {
assertTextReducesToLanguages('Ik kan er nooit tegen als mensen me negeren.', ['nl'])
assertTextReducesToLanguages('kan ik er nooit tegen als mensen me negeren.', ['nl'])
// note that it should not catch ik within a word
assertTextReducesToLanguages('fik er nooit tegen als mensen me negeren.', [])
})
test('German if there is ich or a letter ß', () => {
assertTextReducesToLanguages('Aha ich seh angeblich', ['de'])
assertTextReducesToLanguages('Ich seh angeblich', ['de'])
assertTextReducesToLanguages('bIch bich', [])
})
test('German if there is the letter ß', () => {
assertTextReducesToLanguages('seh angeblich süß aus', ['de'])
})
test('Polish if there is czy', () => {
assertTextReducesToLanguages('Czy mogłbym zasnąć w przedmieściach Twoich myśli?', ['pl'])
})
test('Polish if there is letters Ł, ń, ś or ź', () => {
assertTextReducesToLanguages('mogłbym Ła', ['pl'])
assertTextReducesToLanguages('ńa', ['pl'])
assertTextReducesToLanguages('śa', ['pl'])
assertTextReducesToLanguages('źa', ['pl'])
})
test('Scandinavian if there is a letter å', () => {
assertTextReducesToLanguages('Så skal jeg bare finde ud', ['nb', 'nn', 'fo', 'is', 'da', 'sv'])
})
test('Danish if there is af', () => {
assertTextReducesToLanguages('skal jeg bare finde ud af', ['da'])
})
test('Norwegian if there is nei', () => {
assertTextReducesToLanguages('nei vi som har finale ', ['nb', 'nn'])
})
test('Swedish if there is och', () => {
assertTextReducesToLanguages('fb och du tog', ['sv'])
})
test('Turkish if there is a letter ı (i without point) or ğ or ş', () => {
assertTextReducesToLanguages('En büyük hatayı yaptım', ['tr'])
assertTextReducesToLanguages('Çok doğru', ['tr'])
})
test('Romanian if there is a letter ă or ș or ț (although ă is also used in Vietnamese as ş is in Turkish)', () => {
assertTextReducesToLanguages('Încântat de șa cunoștință', ['ro', 'tr', 'vi'])
})

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc