littlefork-plugin-language-detection
Advanced tools
Comparing version 0.1.0 to 0.1.1
@@ -8,15 +8,48 @@ var _ = require('lodash'), | ||
Object.defineProperty(exports, "__esModule", { | ||
value: true | ||
}); | ||
exports.plugins = undefined; | ||
var textLangGuess = function(unit) { | ||
var text = _.get(unit, unit._lf_title); | ||
var language = _.first(_.first(detector.detect(text, 1))); | ||
return _.extend(unit, {_lf_language: language}); | ||
// turn the unit into a string | ||
var text = JSON.stringify(unit); | ||
// match for anything between :" and " | ||
var re = /:\"(.*?)(?=\")/g; | ||
// run the regex | ||
var matches = text.match(re); | ||
// filter for strings that contain spaces, as these are most likely real text | ||
// join the array into a string | ||
var langtext = _.filter(matches, function (s) { return s.indexOf(' ') >= 0 }).join(' '); | ||
// detect the language of the string | ||
var language = _.first(_.first(detector.detect(langtext))); | ||
// merge and return the unit | ||
return _.merge(unit, {_lf_language: language}); | ||
}; | ||
module.exports = function(val) { | ||
var plugin = function(val, other) { | ||
return Promise.map(val.data, textLangGuess) | ||
.then( function(data) { | ||
debug("Analyzed language in %d data elements", _.size(data)); | ||
return _.extend(val, {data: data}); | ||
var str = "Analyzed language in data elements"; | ||
other.log.info(str); | ||
var d = _.set(val, 'data', data); | ||
return d; | ||
}); | ||
}; | ||
plugin.description = 'Detect languages in unit'; | ||
var plugins = { | ||
'detect_languages': plugin | ||
} | ||
exports.plugins = plugins; | ||
exports.default = { plugins: plugins }; |
{ | ||
"name": "littlefork-plugin-language-detection", | ||
"version": "0.1.0", | ||
"description": "Detect the language of the content.", | ||
"version": "0.1.1", | ||
"description": "A littlefork plugin, that detects the language of the content.", | ||
"main": "lib/index.js", | ||
@@ -9,2 +9,3 @@ "config": { | ||
"testDir": "test", | ||
"distDir": "lib", | ||
"reporter": "spec" | ||
@@ -11,0 +12,0 @@ }, |
@@ -1,24 +0,16 @@ | ||
# littlefork-plugin-language-detection | ||
This is a plugin for [littlefork](https://github.com/tacticaltech/littlefork). | ||
# Littlefork $NAME plugin | ||
Detect the language of content. | ||
## Plugin internals | ||
## Installation | ||
## Command line argument | ||
``` | ||
npm install --save littlefork-plugin-language-detection | ||
``` | ||
## Input/Output example | ||
## Usage | ||
# the Littlefork framework | ||
This plugin exports a single transformation plugin: | ||
TODO small generic explain | ||
### `language-detection` transformation | ||
know more here: TODO Public link | ||
``` | ||
$(npm bin)/littlefork -c cfg.json -p ddg,language-detection | ||
``` | ||
It tries to detect the language of the content of `_lf_content`. It adds a new | ||
field to the each unit called `_lf_language`. | ||
Made with <3 by TacticalTech |
Sorry, the diff of this file is not supported yet
2814
6
46
17