Comparing version 3.1.1 to 4.0.0
@@ -23,2 +23,2 @@ // This file is generated by `build.js`. | ||
ell: /[\u0370-\u0373\u0375-\u0377\u037A-\u037D\u037F\u0384\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03E1\u03F0-\u03FF\u1D26-\u1D2A\u1D5D-\u1D61\u1D66-\u1D6A\u1DBF\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEF\u1FF2-\u1FF4\u1FF6-\u1FFE\u2126\uAB65]|\uD800[\uDD40-\uDD8C\uDDA0]|\uD834[\uDE00-\uDE45]/g | ||
}; | ||
} |
165
index.js
// This file is generated by `build.js` | ||
'use strict'; | ||
'use strict' | ||
/* Load `trigram-utils`. */ | ||
var utilities = require('trigram-utils'); | ||
var utilities = require('trigram-utils') | ||
/* Load `expressions` (regular expressions matching | ||
* scripts). */ | ||
var expressions = require('./expressions.js'); | ||
var expressions = require('./expressions.js') | ||
/* Load `data` (trigram information per language, | ||
* per script). */ | ||
var data = require('./data.json'); | ||
var data = require('./data.json') | ||
/* Expose `detectAll` on `detect`. */ | ||
detect.all = detectAll; | ||
detect.all = detectAll | ||
/* Expose `detect`. */ | ||
module.exports = detect; | ||
module.exports = detect | ||
/* Maximum sample length. */ | ||
var MAX_LENGTH = 2048; | ||
var MAX_LENGTH = 2048 | ||
/* Minimum sample length. */ | ||
var MIN_LENGTH = 10; | ||
var MIN_LENGTH = 10 | ||
/* The maximum distance to add when a given trigram does | ||
* not exist in a trigram dictionary. */ | ||
var MAX_DIFFERENCE = 300; | ||
var MAX_DIFFERENCE = 300 | ||
/* Construct trigram dictionaries. */ | ||
(function () { | ||
var languages; | ||
var name; | ||
var trigrams; | ||
var model; | ||
var script; | ||
var weight; | ||
;(function() { | ||
var languages | ||
var name | ||
var trigrams | ||
var model | ||
var script | ||
var weight | ||
for (script in data) { | ||
languages = data[script]; | ||
languages = data[script] | ||
for (name in languages) { | ||
model = languages[name].split('|'); | ||
model = languages[name].split('|') | ||
weight = model.length; | ||
weight = model.length | ||
trigrams = {}; | ||
trigrams = {} | ||
while (weight--) { | ||
trigrams[model[weight]] = weight; | ||
trigrams[model[weight]] = weight | ||
} | ||
languages[name] = trigrams; | ||
languages[name] = trigrams | ||
} | ||
} | ||
})(); | ||
})() | ||
@@ -67,3 +67,3 @@ /** | ||
function detect(value, options) { | ||
return detectAll(value, options)[0][0]; | ||
return detectAll(value, options)[0][0] | ||
} | ||
@@ -81,19 +81,19 @@ | ||
function detectAll(value, options) { | ||
var settings = options || {}; | ||
var minLength = MIN_LENGTH; | ||
var script; | ||
var settings = options || {} | ||
var minLength = MIN_LENGTH | ||
var script | ||
if (settings.minLength !== null && settings.minLength !== undefined) { | ||
minLength = settings.minLength; | ||
minLength = settings.minLength | ||
} | ||
if (!value || value.length < minLength) { | ||
return und(); | ||
return und() | ||
} | ||
value = value.substr(0, MAX_LENGTH); | ||
value = value.substr(0, MAX_LENGTH) | ||
/* Get the script which characters occur the most | ||
* in `value`. */ | ||
script = getTopScript(value, expressions); | ||
script = getTopScript(value, expressions) | ||
@@ -105,3 +105,3 @@ /* One languages exists for the most-used script. | ||
if (!(script[0] in data)) { | ||
return script[1] === 0 ? und() : singleLanguageTuples(script[0]); | ||
return script[1] === 0 ? und() : singleLanguageTuples(script[0]) | ||
} | ||
@@ -111,5 +111,6 @@ | ||
* normalize the distance values. */ | ||
return normalize(value, getDistances( | ||
utilities.asTuples(value), data[script[0]], settings | ||
)); | ||
return normalize( | ||
value, | ||
getDistances(utilities.asTuples(value), data[script[0]], settings) | ||
) | ||
} | ||
@@ -128,12 +129,12 @@ | ||
function normalize(value, distances) { | ||
var min = distances[0][1]; | ||
var max = (value.length * MAX_DIFFERENCE) - min; | ||
var index = -1; | ||
var length = distances.length; | ||
var min = distances[0][1] | ||
var max = value.length * MAX_DIFFERENCE - min | ||
var index = -1 | ||
var length = distances.length | ||
while (++index < length) { | ||
distances[index][1] = 1 - ((distances[index][1] - min) / max) || 0; | ||
distances[index][1] = 1 - (distances[index][1] - min) / max || 0 | ||
} | ||
return distances; | ||
return distances | ||
} | ||
@@ -151,17 +152,17 @@ | ||
function getTopScript(value, scripts) { | ||
var topCount = -1; | ||
var topScript; | ||
var script; | ||
var count; | ||
var topCount = -1 | ||
var topScript | ||
var script | ||
var count | ||
for (script in scripts) { | ||
count = getOccurrence(value, scripts[script]); | ||
count = getOccurrence(value, scripts[script]) | ||
if (count > topCount) { | ||
topCount = count; | ||
topScript = script; | ||
topCount = count | ||
topScript = script | ||
} | ||
} | ||
return [topScript, topCount]; | ||
return [topScript, topCount] | ||
} | ||
@@ -177,5 +178,5 @@ | ||
function getOccurrence(value, expression) { | ||
var count = value.match(expression); | ||
var count = value.match(expression) | ||
return (count ? count.length : 0) / value.length || 0; | ||
return (count ? count.length : 0) / value.length || 0 | ||
} | ||
@@ -196,17 +197,14 @@ | ||
function getDistances(trigrams, languages, options) { | ||
var distances = []; | ||
var whitelist = options.whitelist || []; | ||
var blacklist = options.blacklist || []; | ||
var language; | ||
var distances = [] | ||
var whitelist = options.whitelist || [] | ||
var blacklist = options.blacklist || [] | ||
var language | ||
languages = filterLanguages(languages, whitelist, blacklist); | ||
languages = filterLanguages(languages, whitelist, blacklist) | ||
for (language in languages) { | ||
distances.push([ | ||
language, | ||
getDistance(trigrams, languages[language]) | ||
]); | ||
distances.push([language, getDistance(trigrams, languages[language])]) | ||
} | ||
return distances.length ? distances.sort(sort) : und(); | ||
return distances.length ? distances.sort(sort) : und() | ||
} | ||
@@ -225,25 +223,25 @@ | ||
function getDistance(trigrams, model) { | ||
var distance = 0; | ||
var index = -1; | ||
var length = trigrams.length; | ||
var trigram; | ||
var difference; | ||
var distance = 0 | ||
var index = -1 | ||
var length = trigrams.length | ||
var trigram | ||
var difference | ||
while (++index < length) { | ||
trigram = trigrams[index]; | ||
trigram = trigrams[index] | ||
if (trigram[0] in model) { | ||
difference = trigram[1] - model[trigram[0]] - 1; | ||
difference = trigram[1] - model[trigram[0]] - 1 | ||
if (difference < 0) { | ||
difference = -difference; | ||
difference = -difference | ||
} | ||
} else { | ||
difference = MAX_DIFFERENCE; | ||
difference = MAX_DIFFERENCE | ||
} | ||
distance += difference; | ||
distance += difference | ||
} | ||
return distance; | ||
return distance | ||
} | ||
@@ -266,24 +264,21 @@ | ||
function filterLanguages(languages, whitelist, blacklist) { | ||
var filteredLanguages; | ||
var language; | ||
var filteredLanguages | ||
var language | ||
if (whitelist.length === 0 && blacklist.length === 0) { | ||
return languages; | ||
return languages | ||
} | ||
filteredLanguages = {}; | ||
filteredLanguages = {} | ||
for (language in languages) { | ||
if ( | ||
( | ||
whitelist.length === 0 || | ||
whitelist.indexOf(language) !== -1 | ||
) && | ||
(whitelist.length === 0 || whitelist.indexOf(language) !== -1) && | ||
blacklist.indexOf(language) === -1 | ||
) { | ||
filteredLanguages[language] = languages[language]; | ||
filteredLanguages[language] = languages[language] | ||
} | ||
} | ||
return filteredLanguages; | ||
return filteredLanguages | ||
} | ||
@@ -293,3 +288,3 @@ | ||
function und() { | ||
return singleLanguageTuples('und'); | ||
return singleLanguageTuples('und') | ||
} | ||
@@ -300,3 +295,3 @@ | ||
function singleLanguageTuples(language) { | ||
return [[language, 1]]; | ||
return [[language, 1]] | ||
} | ||
@@ -306,3 +301,3 @@ | ||
function sort(a, b) { | ||
return a[1] - b[1]; | ||
return a[1] - b[1] | ||
} |
{ | ||
"name": "franc-min", | ||
"threshold": 8000000, | ||
"version": "3.1.1", | ||
"version": "4.0.0", | ||
"description": "Detect the language of text", | ||
@@ -6,0 +6,0 @@ "license": "MIT", |
@@ -7,3 +7,3 @@ <!--This file is generated by `build.js`--> | ||
Built with support for 81 languages (8M or more speakers). | ||
Built with support for 82 languages (8M or more speakers). | ||
@@ -66,15 +66,15 @@ View the [monorepo](https://github.com/wooorm/franc) for more packages and | ||
| [`fuv`](http://www-01.sil.org/iso639-3/documentation.asp?id=fuv) | Nigerian Fulfulde | 22M | | ||
| [`bos`](http://www-01.sil.org/iso639-3/documentation.asp?id=bos) | Bosnian (Cyrillic) | 21M | | ||
| [`bos`](http://www-01.sil.org/iso639-3/documentation.asp?id=bos) | Bosnian (Latin) | 21M | | ||
| [`bos`](http://www-01.sil.org/iso639-3/documentation.asp?id=bos) | Bosnian (Cyrillic) | 21M | | ||
| [`hrv`](http://www-01.sil.org/iso639-3/documentation.asp?id=hrv) | Croatian | 21M | | ||
| [`nld`](http://www-01.sil.org/iso639-3/documentation.asp?id=nld) | Dutch | 21M | | ||
| [`srp`](http://www-01.sil.org/iso639-3/documentation.asp?id=srp) | Serbian (Cyrillic) | 21M | | ||
| [`srp`](http://www-01.sil.org/iso639-3/documentation.asp?id=srp) | Serbian (Latin) | 21M | | ||
| [`srp`](http://www-01.sil.org/iso639-3/documentation.asp?id=srp) | Serbian (Cyrillic) | 21M | | ||
| [`tha`](http://www-01.sil.org/iso639-3/documentation.asp?id=tha) | Thai | 21M | | ||
| [`ckb`](http://www-01.sil.org/iso639-3/documentation.asp?id=ckb) | Central Kurdish | 20M | | ||
| [`yor`](http://www-01.sil.org/iso639-3/documentation.asp?id=yor) | Yoruba | 20M | | ||
| [`uzn`](http://www-01.sil.org/iso639-3/documentation.asp?id=uzn) | Northern Uzbek (Cyrillic) | 18M | | ||
| [`uzn`](http://www-01.sil.org/iso639-3/documentation.asp?id=uzn) | Northern Uzbek (Latin) | 18M | | ||
| [`uzn`](http://www-01.sil.org/iso639-3/documentation.asp?id=uzn) | Northern Uzbek (Cyrillic) | 18M | | ||
| [`zlm`](http://www-01.sil.org/iso639-3/documentation.asp?id=zlm) | Malay (individual language) (Arabic) | 18M | | ||
| [`zlm`](http://www-01.sil.org/iso639-3/documentation.asp?id=zlm) | Malay (individual language) (Latin) | 18M | | ||
| [`zlm`](http://www-01.sil.org/iso639-3/documentation.asp?id=zlm) | Malay (individual language) (Arabic) | 18M | | ||
| [`ibo`](http://www-01.sil.org/iso639-3/documentation.asp?id=ibo) | Igbo | 17M | | ||
@@ -99,2 +99,3 @@ | [`nep`](http://www-01.sil.org/iso639-3/documentation.asp?id=nep) | Nepali (macrolanguage) | 16M | | ||
| [`zyb`](http://www-01.sil.org/iso639-3/documentation.asp?id=zyb) | Yongbei Zhuang | 10M | | ||
| [`pbu`](http://www-01.sil.org/iso639-3/documentation.asp?id=pbu) | Northern Pashto | 10M | | ||
| [`kin`](http://www-01.sil.org/iso639-3/documentation.asp?id=kin) | Kinyarwanda | 9M | | ||
@@ -101,0 +102,0 @@ | [`zul`](http://www-01.sil.org/iso639-3/documentation.asp?id=zul) | Zulu | 9M | |
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
121203
679
112