node-icu-charset-detector
Advanced tools
Comparing version 0.0.6 to 0.0.7
@@ -6,2 +6,6 @@ var CharsetMatch = require("./build/Release/node-icu-charset-detector").CharsetMatch; | ||
if (charsetMatch.getName() === null) { | ||
return null; | ||
} | ||
var charset = new String(charsetMatch.getName()); | ||
@@ -14,3 +18,28 @@ charset.language = charsetMatch.getLanguage(); | ||
function detectCharsetStream(stream, onDetectionFinish) { | ||
var buffer = null; | ||
var finished = false; | ||
function onChunkArrives(chunk) { | ||
buffer = buffer ? Buffer.concat(buffer, chunk) : chunk; | ||
var charset = detectCharset(buffer); | ||
if (charset !== null) { | ||
// detection succeed. stop reading chunks. | ||
stream.removeListener("data", onChunkArrives); | ||
finished = true; | ||
onDetectionFinish(charset); | ||
} | ||
} | ||
stream.on("data", onChunkArrives); | ||
stream.on("end", function () { | ||
if (!finished) { | ||
onDetectionFinish(null); | ||
} | ||
}); | ||
} | ||
exports.detectCharset = detectCharset; | ||
exports.detectCharsetStream = detectCharsetStream; | ||
exports.CharsetMatch = CharsetMatch; |
{ | ||
"name" : "node-icu-charset-detector", | ||
"version" : "0.0.6", | ||
"main" : "./node-icu-charset-detector.js", | ||
"description" : "Simple binding for ICU charset detector", | ||
"keywords" : ["charset-detection", "icu"], | ||
"name": "node-icu-charset-detector", | ||
"version": "0.0.7", | ||
"main": "./node-icu-charset-detector.js", | ||
"description": "Simple binding for ICU charset detector", | ||
"keywords": [ | ||
"charset-detection", | ||
"icu" | ||
], | ||
"repository": { | ||
@@ -19,3 +22,5 @@ "type": "git", | ||
"licenses": [ | ||
{ "type": "MIT" } | ||
{ | ||
"type": "MIT" | ||
} | ||
], | ||
@@ -22,0 +27,0 @@ "engines": { |
@@ -9,10 +9,45 @@ # ICU Character Set Detection for Node.js | ||
At first, install `libicu` into your system. Debian users can install `libicu` by `apt-get` easily. | ||
At first, install `libicu` into your system (See [this instruction](#installing-icu) for details). | ||
sudo apt-get install libicu-dev | ||
After that, install `node-icu-charset-detector` from npm. | ||
npm install node-icu-charset-detector | ||
```npm install node-icu-charset-detector``` | ||
### Installing ICU | ||
#### Linux | ||
* Debian (Ubuntu) | ||
```apt-get install libicu-dev``` | ||
* Gentoo | ||
```emerge icu``` | ||
#### OSX | ||
* MacPorts | ||
```port install icu +devel``` | ||
* Homebrew | ||
```shell | ||
brew install icu4c | ||
ln -s /usr/local/Cellar/icu4c/<VERSION>/bin/icu-config /usr/local/bin/icu-config | ||
ln -s /usr/local/Cellar/icu4c/<VERSION>/include/unicode /usr/local/include | ||
``` | ||
If experiencing issues with 'homebrew' installing version 50.1 of icu4c, try the following: | ||
```shell | ||
brew search icu4c | ||
brew tap homebrew/versions | ||
brew versions icu4c | ||
cd $(brew --prefix) && git pull --rebase | ||
git checkout c25fd2f $(brew --prefix)/Library/Formula/icu4c.rb | ||
brew install icu4c | ||
``` | ||
## Usage | ||
@@ -24,11 +59,13 @@ | ||
var charsetDetector = require("node-icu-charset-detector"); | ||
```javascript | ||
var charsetDetector = require("node-icu-charset-detector"); | ||
var buffer = fs.readFileSync("/path/to/the/file"); | ||
var charset = charsetDetector.detectCharset(buffer); | ||
console.log("charset name: " + charset.toString()); | ||
console.log("language: " + charset.language); | ||
console.log("detection confidence: " + charset.confidence); | ||
var buffer = fs.readFileSync("/path/to/the/file"); | ||
var charset = charsetDetector.detectCharset(buffer); | ||
console.log("charset name: " + charset.toString()); | ||
console.log("language: " + charset.language); | ||
console.log("detection confidence: " + charset.confidence); | ||
``` | ||
`detectCharset(buffer)` returns the detected charset name for `buffer`, and the returned charset name has two extra properties `language` and `confidence`: | ||
@@ -47,16 +84,18 @@ | ||
function bufferToString(buffer) { | ||
var charsetDetector = require("node-icu-charset-detector"); | ||
var charset = charsetDetector.detectCharset(buffer).toString(); | ||
```javascript | ||
function bufferToString(buffer) { | ||
var charsetDetector = require("node-icu-charset-detector"); | ||
var charset = charsetDetector.detectCharset(buffer).toString(); | ||
try { | ||
return buffer.toString(charset); | ||
} catch (x) { | ||
var Iconv = require("iconv").Iconv; | ||
var charsetConverter = new Iconv(charset, "utf8"); | ||
return charsetConverter.convert(buffer).toString(); | ||
} | ||
} | ||
try { | ||
return buffer.toString(charset); | ||
} catch (x) { | ||
var Iconv = require("iconv").Iconv; | ||
var charsetConverter = new Iconv(charset, "utf8"); | ||
return charsetConverter.convert(buffer).toString(); | ||
} | ||
} | ||
var buffer = fs.readFileSync("/path/to/the/file"); | ||
var bufferString = bufferToString(buffer); | ||
var buffer = fs.readFileSync("/path/to/the/file"); | ||
var bufferString = bufferToString(buffer); | ||
``` |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Trivial Package
Supply chain riskPackages less than 10 lines of code are easily copied into your own project and may not warrant the additional supply chain risk of an external dependency.
Found 1 instance in 1 package
10866
34
99
1