baidu-ocr-api
Advanced tools
Comparing version 1.0.2 to 2.0.0
### start | ||
```js | ||
node orc.js | ||
node cdnurl.ocr.js | ||
``` |
165
lib/ocr.js
var crypto = require('crypto'); | ||
var Promise = require('bluebird'); | ||
var fs = require('fs'); | ||
var request = require('request-promise'); | ||
var urllib = require('urllib'); | ||
var fs = require('fs'); | ||
Promise.promisifyAll(fs); | ||
var headersToSign = []; | ||
@@ -20,5 +24,11 @@ function OCR(ak,sk) { | ||
} | ||
var merge = opt.merge||true; | ||
var merge = opt.merge; | ||
if(merge === 'false'||merge ===false) | ||
{ | ||
merge = false; | ||
}else{ | ||
merge = true; | ||
} | ||
// init data | ||
var type = opt.type||'line'; | ||
var type = opt.type||'text'; | ||
var path = pathOpt[type]; | ||
@@ -38,49 +48,62 @@ var url = opt.url; | ||
}; | ||
getImgBase64(url,function (err,result) { | ||
if(err){ | ||
return cb(err); | ||
} | ||
var data = { | ||
base64:result, | ||
language:language | ||
}; | ||
// get Authorization | ||
var databuffer = new Buffer(JSON.stringify(data)); | ||
headers['Content-Type'] = 'application/json'; | ||
headers['Content-Length'] = databuffer.length; | ||
String.prototype.startWith = function(compareStr){ | ||
return this.indexOf(compareStr) == 0; | ||
} | ||
return new Promise(function (resolve,reject) { | ||
getImgBase64(url).then(function(result) { | ||
var data = { | ||
base64:result, | ||
language:language | ||
}; | ||
// get Authorization | ||
var databuffer = new Buffer(JSON.stringify(data)); | ||
headers['Content-Type'] = 'clarapplication/json'; | ||
headers['Content-Length'] = databuffer.length; | ||
var content = 'bce-auth-v1/'+ accessKeyId +'/'+ requestDate +'/' + expire; | ||
// get SigningKey | ||
var SigningKey = crypto.createHmac('sha256', secretAccessKey).update(content).digest('hex'); | ||
var CanonicalURI = path; | ||
var CanonicalQueryString = getCanonicalQueryString(params); | ||
var CanonicalHeaders = getCanonicalHeaders(headers); | ||
var CanonicalRequest = [httpMethod.toUpperCase(), CanonicalURI, CanonicalQueryString, CanonicalHeaders].join('\n'); | ||
// get Signature | ||
var Signature = crypto.createHmac('sha256', SigningKey).update(CanonicalRequest).digest('hex'); | ||
// Mosaic Authorization | ||
headers.Authorization = [content, headersToSign.join(';'), Signature].join('/'); | ||
var url = 'http://'+headers.host+path; | ||
var options = { | ||
host: headers.host, | ||
path: path+'?'+getCanonicalQueryString(params), | ||
method: httpMethod, | ||
headers: headers, | ||
timeout:15000, | ||
data:data | ||
}; | ||
// send request | ||
urllib.request(url,options,function (err,data,resp) { | ||
if(err){ | ||
return cb(err) | ||
} | ||
var resultJson = JSON.parse(data.toString()); | ||
return cb(null,resultJson); | ||
var content = 'bce-auth-v1/'+ accessKeyId +'/'+ requestDate +'/' + expire; | ||
// get SigningKey | ||
var SigningKey = crypto.createHmac('sha256', secretAccessKey).update(content).digest('hex'); | ||
var CanonicalURI = path; | ||
var CanonicalQueryString = getCanonicalQueryString(params); | ||
var CanonicalHeaders = getCanonicalHeaders(headers); | ||
var CanonicalRequest = [httpMethod.toUpperCase(), CanonicalURI, CanonicalQueryString, CanonicalHeaders].join('\n'); | ||
// get Signature | ||
var Signature = crypto.createHmac('sha256', SigningKey).update(CanonicalRequest).digest('hex'); | ||
// Mosaic Authorization | ||
headers.Authorization = [content, headersToSign.join(';'), Signature].join('/'); | ||
var url = 'http://'+headers.host+path; | ||
var options = { | ||
json:data, | ||
host: headers.host, | ||
path: path+'?'+getCanonicalQueryString(params), | ||
headers: headers, | ||
method:httpMethod, | ||
encoding:'UTF-8' | ||
}; | ||
request(url,options).then(function (result) { | ||
if(!result.results){ | ||
return reject(result) | ||
} | ||
if(merge){ | ||
var words = ''; | ||
var rectangles = []; | ||
result.results.forEach(function (result) { | ||
words+= result.word; | ||
rectangles.push(result.rectangle) | ||
}) | ||
return resolve({results:{ | ||
words:words, | ||
rectangles:rectangles | ||
}}) | ||
} | ||
return resolve(result); | ||
}).catch(function (err) { | ||
reject(err); | ||
}) | ||
}).catch(function (err) { | ||
reject(err); | ||
}) | ||
}) | ||
} | ||
String.prototype.startWith = function(compareStr){ | ||
return this.indexOf(compareStr) == 0; | ||
} | ||
function getCanonicalQueryString(params) { | ||
@@ -134,26 +157,30 @@ var result = []; | ||
// 获取base64 | ||
function getImgBase64(url,cb) { | ||
function getImgBase64(url) { | ||
return new Promise(function (resolve,reject) { | ||
// 外部地址 | ||
if(url.startWith('http')||url.startWith('https')){ | ||
urllib.request(url,{ | ||
headers:{ | ||
'User-Agent': 'Paw/2.1 (Macintosh; OS X/10.10.5) GCDHTTPRequest', | ||
'Referer':'http://baidu.com' | ||
} | ||
},function(err,data,resp) { | ||
if(err){ | ||
return cb(err) | ||
} | ||
return cb(null,data.toString('base64')); | ||
}) | ||
}else{ // 本地地址 | ||
fs.readFile(url,function (err,data) { | ||
if(err){ | ||
return cb(err) | ||
}else{ | ||
return cb(null,data.toString('base64')) | ||
} | ||
}) | ||
} | ||
if(url.startWith('http')||url.startWith('https')){ | ||
request({ | ||
method:'GET', | ||
url:url, | ||
headers:{ | ||
'User-Agent': 'Paw/2.1 (Macintosh; OS X/10.10.5) GCDHTTPRequest', | ||
'Referer':'http://baidu.com' | ||
}, | ||
encoding:null | ||
}).then(function (result) { | ||
resolve(result.toString('base64')); | ||
}).catch(function (err) { | ||
reject(err); | ||
}) | ||
}else{ // 本地地址 | ||
fs.readFileAsync(url) | ||
.then(function (data) { | ||
resolve(data.toString('base64')) | ||
}) | ||
.catch(function (err) { | ||
reject(err); | ||
}) | ||
} | ||
}) | ||
} | ||
}; |
{ | ||
"name": "baidu-ocr-api", | ||
"version": "1.0.2", | ||
"version": "2.0.0", | ||
"description": "OCR for baidu api", | ||
@@ -12,2 +12,5 @@ "main": "index.js", | ||
}, | ||
"bin": { | ||
"ocr": "./bin/ocr" | ||
}, | ||
"keywords": [ | ||
@@ -21,2 +24,7 @@ "ocr", | ||
"dependencies": { | ||
"bluebird": "^3.4.1", | ||
"colors": "^1.1.2", | ||
"minimist": "^1.2.0", | ||
"request": "^2.73.0", | ||
"request-promise": "^4.0.2", | ||
"urllib": "^2.11.0" | ||
@@ -23,0 +31,0 @@ }, |
## [Baidu-OCR-API](https://bce.baidu.com/doc/OCR/ProductDescription.html#.E4.BB.8B.E7.BB.8D) for nodejs | ||
[![NPM version][npm-image]][npm-url] [![Build Status][travis-image]][travis-url] [![Coveralls Status][coveralls-image]][coveralls-url] | ||
@@ -8,44 +7,58 @@ | ||
### Install | ||
### Install & Usage | ||
#### 1. Global | ||
```sh | ||
npm install baidu-ocr-api -g | ||
ocr --help | ||
# 远程图片 | ||
ocr http://7pun4e.com1.z0.glb.clouddn.com/test.jpg | ||
# 本地图片 | ||
ocr ./test.jpg | ||
``` | ||
##### 效果图 | ||
![](https://raw.githubusercontent.com/netpi/baidu-ocr-api/master/examples/test01.jpg) | ||
的早期世界观是建立在《魔兽争霸3:冰封王座》的基础上的,因此与现在暴雪公司的《魔兽世界》的背景设定有一定的联系,但由于版本更迭又略有不同。整个地图中地形名费伍德森林,费伍德森林是网络游戏《魔兽世界》中的游戏地图,位于卡利姆多境内的一片森林。这片由森林和草场构成的繁荣动荡的土地曾经由卡尔多雷掌管,并曾经处于半神塞纳留斯的保护下。燃烧军团的铁蹄践踏了这片土地,没有被毁灭的树木和生物则被恶魔的暴行永远的诅咒着 | ||
#### 2. nodejs | ||
```sh | ||
npm install baidu-ocr-api --save | ||
``` | ||
### Usage | ||
FYI [examples](https://github.com/netpi/baidu-ocr-api/tree/master/examples) | ||
> 登陆 [百度bcs控制台中心](https://console.bce.baidu.com/iam/#/iam/accesslist) 申请access key | ||
> | ||
参看 [examples](https://github.com/netpi/baidu-ocr-api/tree/master/examples) | ||
```js | ||
/** | ||
```js | ||
登陆 百度bcs控制台中心 申请access key | ||
https://console.bce.baidu.com/iam/#/iam/accesslist | ||
**/ | ||
var ak = 'your ak'; | ||
var sk = 'your sk'; | ||
var ocr = require('baidu-ocr-api').create(ak,sk); | ||
var opt= { | ||
// url can be a cdn url, or a local url like : __dirname+'/test.jpg' | ||
url:'http://7xod3k.com1.z0.glb.clouddn.com/fbuguhlemsgeilpkxykeluenbjkozzne', | ||
// type: line,text,character default:line | ||
type:'line', | ||
language:'CHN_ENG' | ||
} | ||
ocr.scan(opt,function (err,result) { | ||
if(err){ | ||
return console.log(err); | ||
} | ||
console.log(result); // 参看 examples | ||
var ocr = require('../').create(ak,sk); | ||
// 外部图片 | ||
ocr.scan({ | ||
url:'http://7pun4e.com1.z0.glb.clouddn.com/test.jpg', // 支持本地路径 | ||
type:'text', | ||
}).then(function (result) { | ||
return console.log(result) | ||
}).catch(function (err) { | ||
console.log('err', err); | ||
}) | ||
``` | ||
### opt | ||
| 字段名 | 值 | 选项 | 描述 | | ||
| -------- | ---------------------------------------- | ----------- | ------ | | ||
| url | `cdn` 地址 <br/> 本地地址: __diranme+'/test.jpg' | 必选 | 目标地址 | | ||
| type | `text`:识别某张图中的所有文字<br>`line`: 将结果作为单行文字去解析<br>`character`:识别某张图中的单个文字 | 可选(默认:line) | 返回结果结构 | | ||
| language | , <br/>可选 : `CHN_ENG`/`CHN`/`ENG` | 可选<br/> 默认:`CHN_ENG` | 返回语言类型 | | ||
### test | ||
@@ -52,0 +65,0 @@ ```sh |
var should = require('should'); | ||
var ak = 'b7d11214c8fc452db3de12028cf46daa'; | ||
var sk = '64631fe987f4423bb0a117101bf90a45' | ||
var sk = '64631fe987f4423bb0a117101bf90a45'; | ||
var wrong_sk = 'wrong_sk'; | ||
var ocr = require('../').create(ak,sk); | ||
var ocr2 = require('../').create(ak,wrong_sk); | ||
@@ -10,6 +12,29 @@ describe('test/ocr.test.js',function () { | ||
ocr.scan({ | ||
url:'https://raw.githubusercontent.com/netpi/baidu-ocr-api/master/test/test.jpg', | ||
url:'http://7xod3k.com1.z0.glb.clouddn.com/mjdalykzuyefpzlgmlnkjizcfcuelxnu', | ||
type:'text', | ||
}).then(function (result) { | ||
result.should.be.an.instanceOf(Object); | ||
done(); | ||
}) | ||
}) | ||
}) | ||
describe('wrong_sk ',function () { | ||
it('should be catch error',function (done) { | ||
ocr2.scan({ | ||
url:'http://7xod3k.com1.z0.glb.clouddn.com/mjdalykzuyefpzlgmlnkjizcfcuelxnu', | ||
}).then(function (result) { | ||
}).catch(function (err) { | ||
err.should.be.an.instanceOf(Error); | ||
done(); | ||
}) | ||
}) | ||
}) | ||
describe('scan for cdn_url: merge==false ',function () { | ||
it('should have result',function (done) { | ||
ocr.scan({ | ||
url:'http://7xod3k.com1.z0.glb.clouddn.com/mjdalykzuyefpzlgmlnkjizcfcuelxnu', | ||
type:'text', | ||
merge:false | ||
},function (err,result) { | ||
}).then(function (result) { | ||
result.should.be.an.instanceOf(Object); | ||
@@ -20,9 +45,21 @@ done() | ||
}) | ||
describe('scan for wrong local_url ',function () { | ||
it('should have result',function (done) { | ||
ocr.scan({ | ||
url:'http://wrong_url', | ||
type:'text' | ||
}).then(function (result) { | ||
}).catch(function (err) { | ||
err.should.be.an.instanceOf(Error); | ||
done() | ||
}) | ||
}) | ||
}) | ||
describe('scan for local url ',function () { | ||
it('should return object',function (done) { | ||
ocr.scan({ | ||
url:__dirname+'/test.jpg', | ||
type:'line' | ||
},function (err,result) { | ||
url:__dirname+'/test01.jpg', | ||
type:'text' | ||
}).then(function(result) { | ||
result.should.be.an.instanceOf(Object); | ||
@@ -34,3 +71,3 @@ done() | ||
describe('scan for wrong url ',function () { | ||
describe('scan for wrong local_url ',function () { | ||
it('should have result',function (done) { | ||
@@ -40,3 +77,5 @@ ocr.scan({ | ||
type:'line' | ||
},function (err,result) { | ||
}).then(function (result) { | ||
}).catch(function (err) { | ||
err.should.be.an.instanceOf(Error); | ||
@@ -43,0 +82,0 @@ done() |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
689936
15
285
81
6
+ Addedbluebird@^3.4.1
+ Addedcolors@^1.1.2
+ Addedminimist@^1.2.0
+ Addedrequest@^2.73.0
+ Addedrequest-promise@^4.0.2
+ Addedajv@6.12.6(transitive)
+ Addedasn1@0.2.6(transitive)
+ Addedassert-plus@1.0.0(transitive)
+ Addedasynckit@0.4.0(transitive)
+ Addedaws-sign2@0.7.0(transitive)
+ Addedaws4@1.13.2(transitive)
+ Addedbcrypt-pbkdf@1.0.2(transitive)
+ Addedbluebird@3.7.2(transitive)
+ Addedcaseless@0.12.0(transitive)
+ Addedcolors@1.4.0(transitive)
+ Addedcombined-stream@1.0.8(transitive)
+ Addedcore-util-is@1.0.2(transitive)
+ Addeddashdash@1.14.1(transitive)
+ Addeddelayed-stream@1.0.0(transitive)
+ Addedecc-jsbn@0.1.2(transitive)
+ Addedextend@3.0.2(transitive)
+ Addedextsprintf@1.3.0(transitive)
+ Addedfast-deep-equal@3.1.3(transitive)
+ Addedfast-json-stable-stringify@2.1.0(transitive)
+ Addedforever-agent@0.6.1(transitive)
+ Addedform-data@2.3.3(transitive)
+ Addedgetpass@0.1.7(transitive)
+ Addedhar-schema@2.0.0(transitive)
+ Addedhar-validator@5.1.5(transitive)
+ Addedhttp-signature@1.2.0(transitive)
+ Addedis-typedarray@1.0.0(transitive)
+ Addedisstream@0.1.2(transitive)
+ Addedjsbn@0.1.1(transitive)
+ Addedjson-schema@0.4.0(transitive)
+ Addedjson-schema-traverse@0.4.1(transitive)
+ Addedjson-stringify-safe@5.0.1(transitive)
+ Addedjsprim@1.4.2(transitive)
+ Addedlodash@4.17.21(transitive)
+ Addedmime-db@1.52.0(transitive)
+ Addedmime-types@2.1.35(transitive)
+ Addedoauth-sign@0.9.0(transitive)
+ Addedperformance-now@2.1.0(transitive)
+ Addedpsl@1.15.0(transitive)
+ Addedpunycode@2.3.1(transitive)
+ Addedqs@6.5.3(transitive)
+ Addedrequest@2.88.2(transitive)
+ Addedrequest-promise@4.2.6(transitive)
+ Addedrequest-promise-core@1.1.4(transitive)
+ Addedsafe-buffer@5.2.1(transitive)
+ Addedsshpk@1.18.0(transitive)
+ Addedstealthy-require@1.1.1(transitive)
+ Addedtough-cookie@2.5.0(transitive)
+ Addedtunnel-agent@0.6.0(transitive)
+ Addedtweetnacl@0.14.5(transitive)
+ Addeduri-js@4.4.1(transitive)
+ Addeduuid@3.4.0(transitive)
+ Addedverror@1.10.0(transitive)
- Removedcall-bind-apply-helpers@1.0.1(transitive)
- Removedcall-bound@1.0.3(transitive)
- Removeddunder-proto@1.0.1(transitive)
- Removedes-define-property@1.0.1(transitive)
- Removedes-errors@1.3.0(transitive)
- Removedes-object-atoms@1.0.0(transitive)
- Removedfunction-bind@1.1.2(transitive)
- Removedget-intrinsic@1.2.6(transitive)
- Removedgopd@1.2.0(transitive)
- Removedhas-symbols@1.1.0(transitive)
- Removedhasown@2.0.2(transitive)
- Removedmath-intrinsics@1.1.0(transitive)
- Removedobject-inspect@1.13.3(transitive)
- Removedqs@6.13.1(transitive)
- Removedside-channel@1.1.0(transitive)
- Removedside-channel-list@1.0.0(transitive)
- Removedside-channel-map@1.0.1(transitive)
- Removedside-channel-weakmap@1.0.2(transitive)