New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

bank-voucher-ocr

Package Overview
Dependencies
Maintainers
1
Versions
42
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

bank-voucher-ocr - npm Package Compare versions

Comparing version 0.11.0 to 0.12.0

36

dist/bvocr.esm.js

@@ -5,3 +5,3 @@ /**

*
* @version 0.11.0
* @version 0.12.0
* @author waiting

@@ -141,3 +141,5 @@ * @license MIT

fileMap.set(fileInfo.name, fileInfo);
if (fileInfo.name) {
fileMap.set(fileInfo.name, fileInfo);
}
return of(fileMap)

@@ -191,2 +193,13 @@ }))

}
if (height / pageHeight < 0.1 || height < 100) {
const ret = {
name: '',
path: '',
width: 0,
height: 0,
size: 0,
};
return of(ret)
}
const curDate = moment().format('YYYYMMDD');

@@ -204,2 +217,3 @@ const dst = join(options.targetDir, `${curDate}-${Math.random()}-${index}.jpg`);

// console.info('split page opts:', opts)
return from(crop(opts)).pipe(mergeMap((info$$1) => {

@@ -377,3 +391,12 @@ const ret = {

from(createDir(baseDir)).pipe(concatMap(() => createDir(splitDir)), concatMap(() => createDir(resizeDir)))
from(createDir(baseDir)).pipe(catchError(err => {
console.info(err);
return of(null)
}), concatMap(() => createDir(splitDir)), catchError(err => {
console.info(err);
return of(null)
}), concatMap(() => createDir(resizeDir)), catchError(err => {
console.info(err);
return of(null)
}))
.subscribe(() => { }, console.error);

@@ -454,6 +477,9 @@ }

const { baseDir, path, bankZone, bankRegexpOptsMap, debug, lang, skipImgDir, } = options;
const zoneTmpDir = join(baseDir, zoneTmpDirPrefix, Math.random().toString());
const zoneTmpDir = join(baseDir, zoneTmpDirPrefix, `${basename(path)}-${Math.random().toString()}`);
debug && console.info('recognize pageBank:', zoneTmpDir, path);
return from(createDir(zoneTmpDir)).pipe(mergeMap(() => cropImgZone(join(path), zoneTmpDir, bankZone)), // 切分page title区域
return from(createDir(zoneTmpDir)).pipe(catchError(err => {
console.info(err);
return of(null)
}), mergeMap(() => cropImgZone(join(path), zoneTmpDir, bankZone)), // 切分page title区域
concatMap(zoneInfo => {

@@ -460,0 +486,0 @@ return runOcr(zoneInfo.path, lang).pipe(map(() => ({ path, zoneImgPath: zoneInfo.path })), mapTo(zoneInfo.path), catchError(() => of(zoneInfo.path)))

4

dist/bvocr.esm.min.js

@@ -5,3 +5,3 @@ /**

*
* @version 0.11.0
* @version 0.12.0
* @author waiting

@@ -12,3 +12,3 @@ * @license MIT

import{access,chmod,close,copyFile,mkdir,open,readdir,readFile,rmdir,stat,unlink,write,writeFile}from"fs";import{basename,join,normalize,resolve,sep}from"path";import{promisify}from"util";import{tmpdir}from"os";import{crop,info,resize}from"easyimage";import*as moment_ from"moment";import{from,of,range,defer}from"rxjs";import{concatMap,map,mergeMap,reduce,catchError,defaultIfEmpty,delay,filter,mapTo,retry,skipWhile,take,tap}from"rxjs/operators";import run from"rxrunscript";const closeAsync=promisify(close),chmodAsync=promisify(chmod),copyFileAsync=promisify(copyFile),mkdirAsync=promisify(mkdir),openAsync=promisify(open),readFileAsync=promisify(readFile),readDirAsync=promisify(readdir),rmdirAsync=promisify(rmdir),unlinkAsync=promisify(unlink),writeAsync=promisify(write),writeFileAsync=promisify(writeFile);function isPathAcessible(e){return e?new Promise(t=>access(e,e=>t(!e))):Promise.resolve(!1)}function isDirExists(e){return e?isDirFileExists(e,"DIR"):Promise.resolve(!1)}function isFileExists(e){return e?isDirFileExists(e,"FILE"):Promise.resolve(!1)}function isDirFileExists(e,t){return e?new Promise(i=>{stat(e,(e,r)=>{i(!(e||!r)&&("DIR"===t?r.isDirectory():r.isFile()))})}):Promise.resolve(!1)}async function createDir(e){if(!e)throw new Error("value of path param invalid");e=normalize(e),await isDirExists(e)||await e.split(sep).reduce(async(e,t)=>{const i=resolve(await e,t);return await isPathAcessible(i)||await mkdirAsync(i,493),i},Promise.resolve(sep))}async function rimraf(e){e&&(await _rimraf(e),await isDirExists(e)&&await rmdirAsync(e))}async function _rimraf(e){if(e&&await isPathAcessible(e)){if(await isFileExists(e))return void await unlinkAsync(e);const t=await readDirAsync(e);if(t.length)for(const i of t)await _rimraf(join(e,i));else await rmdirAsync(e)}}const initialBaseTmpDir=join(tmpdir(),"voucher-ocr"),initialResizeImgDir=join(initialBaseTmpDir,"resize"),initialSplitTmpDir=join(initialBaseTmpDir,"split"),zoneTmpDirPrefix="zone",initialBankZone={zoneName:"bank",width:2250,height:390,offsetX:70,offsetY:10},moment=moment_;function splitPagetoItems(e,t,i){return readImgInfo(e).pipe(map(r=>{const n=calcItemsPerPage(r.height,i.height);return n?range(0,n).pipe(mergeMap(n=>{const a={index:n,itemConfig:Object.assign({},i),srcPath:e,targetDir:t,pageHeight:r.height};return r.width<a.itemConfig.width&&(a.itemConfig.width=r.width),parseSplitPage(a).pipe(mergeMap(e=>{const t=new Map;return t.set(e.name,e),of(t)}))})):of(new Map)}),concatMap(e=>e))}function resizeAndSaveImg(e,t,i,r){if(i<=0||i>1)throw new Error(`value of scale invalid: "${i}"`);return readImgInfo(e).pipe(mergeMap(n=>{const a={src:e,dst:t,width:n.width*i,height:n.height*i,quality:r};return from(resize(a))}),map(e=>{const t={name:e.name,path:e.path,width:e.width,height:e.height,size:e.size};return t}))}function parseSplitPage(e){const{index:t,srcPath:i,pageHeight:r}=e,{width:n,marginBottom:a}=e.itemConfig;let{height:o}=e.itemConfig;const s=0,c=t*o;c+o>r&&(o=r-c);const p=moment().format("YYYYMMDD"),m=join(e.targetDir,`${p}-${Math.random()}-${t}.jpg`),l={dst:m,src:i,quality:100,cropWidth:n,cropHeight:o+a,x:0,y:c};return from(crop(l)).pipe(mergeMap(e=>{const t={name:e.name,path:e.path,width:e.width,height:e.height,size:e.size};return of(t)}))}function readImgInfo(e){return from(info(e))}function calcItemsPerPage(e,t){const i=33;return e>=t?Math.ceil((e+33)/t):1}function getOcrZoneOptsByBankName(e,t){return t.get(e)}function cropImgAllZones(e,t,i){return from(i).pipe(mergeMap(i=>cropImgZone(e,t,i).pipe(map(e=>[i.zoneName,e]))),reduce((e,t)=>(e.set(t[0],t[1]),e),new Map))}function cropImgZone(e,t,i){const{zoneName:r,width:n,height:a,offsetX:o,offsetY:s}=i,c=join(t,`${r}-${Math.random()}.png`),p={dst:c,src:e,quality:100,cropWidth:n,cropHeight:a,x:o,y:s};return from(crop(p)).pipe(map(e=>{const t={name:e.name,path:e.path,width:e.width,height:e.height,size:e.size};return t}))}function runOcr(e,t){t||(t="eng");const i=`tesseract "${e}" "${e}" -l ${t}`;return run(i)}function retrieveKeyValuesFromOcrResult(e,t,i,r=!1){if(!t)throw new Error("matchRules empty");return readTxtFile(e).pipe(map(e=>({buf:e,regexp:t})),map(({buf:e,regexp:t})=>{const n=i&&"function"==typeof i?i(e):e.toString("utf8");return retrieveValueByRegexp(n,t,r)}))}function getRegexpOptsByName(e,t){for(const i of Object.keys(t))if(i===e)return t[i]}function retrieveValueByRegexp(e,t,i=!1){const r=regexMatch(e,t,i);return i&&console.info("retrieveValueByRegexp ----- text start: ---------------\x3e \n",e,"\n<--------------- text END ----------------\n\n",t,">>>>>>>>matched value: ",r,"\n"),r}function readTxtFile(e){if(!e)throw new Error("path empty");const t=e.split(".");if(t.length>1&&"txt"!==t[t.length-1].toLowerCase())throw new Error(`file extensiion must empty or be '.txt', but is: "${e}"`);return defer(async()=>readFileAsync(e))}function prepareContent(e){let t=e&&e.byteLength?e.toString():"";return t?t=t.replace(/(?<=\S) /g,""):""}function regexMatch(e,t,i=!1){if(e)for(const r of t){const t=e.match(r);if(Array.isArray(t)&&t.length)return r.global&&t.length>1?(i&&console.info("----------multi matched regex: --------------\x3e\n",t,"\n--- used regex ----: ",r,"\n<-------------ignore matched result---------------\n\n"),""):(i&&console.info("----------matched regex: --------------\x3e\n",t,"\n--- used regex ----: ",r,"\n<----------------------------\n\n"),t[0])}}const moment$1=moment_;class Bvo{constructor(e){this.options=e;const t=+this.options.globalScale;this.options.globalScale=Number.isNaN(t)||t<=0?1:t,this.options.debug=!!this.options.debug;const{baseTmpDir:i,splitTmpDir:r,resizeImgDir:n}=e,a=i||initialBaseTmpDir,o=r||initialSplitTmpDir,s=n||initialResizeImgDir;from(createDir(a)).pipe(concatMap(()=>createDir(o)),concatMap(()=>createDir(s))).subscribe(()=>{},console.error)}run(e){return recognize(e,this.options)}}function recognize(e,t){const{bankZone:i,baseTmpDir:r,debug:n,defaultOcrLang:a,jpegQuality:o,scale:s,splitTmpDir:c,resizeImgDir:p,voucherConfigMap:m,globalScale:l,skipImgDir:f}=t,u=r||initialBaseTmpDir,g=c||initialSplitTmpDir,h=p||initialResizeImgDir,d=f?join(f,moment$1().format("YYYYMMDD")):"",y=parseVoucherConfigMapScale(m,l),w=parseOcrZoneScale(i,l),b=s/l,k=getBankRegexpOpts(y),D={baseDir:u,path:e,bankZone:w,bankRegexpOptsMap:k,debug:!!n,lang:a,skipImgDir:d},M=recognizePageBank(D).pipe(filter(({bankName:e})=>!!e&&"n/a"!==e),concatMap(({bankName:e,pagePath:t})=>(n&&console.info("start split page"),splitPageToImgs(t,e,g,y))),concatMap(({bankName:e,imgFile:t})=>{const i=getOcrFields(e,y);if(!i)throw new Error(`ocrFields not defined with bankName: "${e}"`);const r={bankName:e,baseDir:u,concurrent:2,debug:!!n,defaultValue:"",imgFile:t,ocrFields:i,voucherConfigMap:y};return n&&console.info("recognize item"),recognizeFields(r).pipe(map(i=>(i.set("bank",e),i.set("filename",t.name.trim()),i.set("path",t.path.trim()),i)))}),mergeMap(e=>{const t={retInfo:e,resizeDir:h,scale:b,jpegQuality:o,debug:!!n};return saveImgAndPrune(t)})),x=from(isFileExists(e)).pipe(filter(e=>e));return x.pipe(mergeMap(()=>M))}function recognizePageBank(e){const{baseDir:t,path:i,bankZone:r,bankRegexpOptsMap:n,debug:a,lang:o,skipImgDir:s}=e,c=join(t,zoneTmpDirPrefix,Math.random().toString());return a&&console.info("recognize pageBank:",c,i),from(createDir(c)).pipe(mergeMap(()=>cropImgZone(join(i),c,r)),concatMap(e=>runOcr(e.path,o).pipe(map(()=>({path:i,zoneImgPath:e.path})),mapTo(e.path),catchError(()=>of(e.path)))),concatMap(e=>from(n.entries()).pipe(concatMap(([t,i])=>retrieveKeyValuesFromOcrResult(e+".txt",i,e=>e.toString().replace(/(?<=\S)[. ]{1,2}(?=\S)/g,""),a).pipe(map(e=>({bankName:t,value:e})))),skipWhile(({value:e})=>"undefined"==typeof e||"string"==typeof e&&!e.length),take(1),map(({bankName:e})=>({bankName:e,pagePath:i})),defaultIfEmpty({bankName:"n/a",pagePath:""}))),tap(e=>{const{bankName:t,pagePath:r}=e;"n/a"!==t&&r||(console.info(`recognize bank of page fail. no matached regexp. file: "${i}", pagePath: "${r}" `),cpSkipImg(i,s)),a||rimraf(c).catch(console.info)}))}async function cpSkipImg(e,t){t&&(await isPathAcessible(t)||await createDir(t),copyFileAsync(e,join(t,basename(e))).catch(console.error))}function splitPageToImgs(e,t,i,r){const n=r.get(t);if(!n)throw new Error("bank config empty during split page to images");return splitPagetoItems(e,i,n).pipe(mergeMap(e=>{const i=from(e.values()).pipe(map(e=>({bankName:t,imgFile:e})));return i}))}function recognizeFields(e){const{bankName:t,baseDir:i,concurrent:r,debug:n,defaultValue:a,imgFile:o,ocrFields:s,voucherConfigMap:c}=e,p=join(i,zoneTmpDirPrefix,moment$1().format("YYYYMMDD")+"-"+Math.random().toString()),m=getOcrZoneOptsByBankName(t,c);if(!m)throw new Error(`get bankConfig empty with bankName: "${t}"`);const l=from(createDir(p)).pipe(mergeMap(()=>cropImgAllZones(o.path,p,m.ocrZones)),concatMap(e=>{const t={bankConfig:m,ocrFields:s,defaultValue:a,debug:n,concurrent:r>0?r:2,zoneImgMap:e};return batchOcrAndRetrieve(t)}));return l}function batchOcrAndRetrieve(e){const{zoneImgMap:t,bankConfig:i,ocrFields:r,defaultValue:n,concurrent:a,debug:o}=e,{bankName:s}=i,c=from(t.entries()).pipe(delay(5e3),mergeMap(([,e])=>defer(async()=>{const t=e.path,i=t+".txt";return await isFileExists(t)&&await rimraf(t),await isFileExists(i)&&await rimraf(i),null}).pipe(delay(2e4),retry(2),catchError(e=>(console.info("Delete zone file retry failed:",e),of(null))))),catchError(()=>of(null))),p=from(t.entries()).pipe(concatMap(e=>ocrAndPickFromZoneImg(e,i,a,o)),reduce((e,t)=>e.set(t.fieldName,t.value),new Map),map(e=>e.set("bank",s)),map(e=>setDefaultValue(e,r,n)),tap(()=>o||c.subscribe()));return p}function setDefaultValue(e,t,i=""){const r=new Map;for(const i of Object.keys(t)){const t=e.get(i);"string"==typeof t?r.set(i,t):r.set(i,"")}return r}function processZoneImgRow(e){const{fieldName:t,value:i}=e,r={fieldName:t,value:i};switch(t){case"amount":r.value=i.trim().replace(/,/g,"");break;case"date":r.value=i.trim().replace(/\D/g,"");break;case"sn":case"destAccountNumber":case"paymentAccountNumber":r.value=i.trim()}return r}function validateZoneImgRow(e,t){if("string"!=typeof t)return!1;switch(e){case"amount":if(validateRetInfoAmout(t))return!0;break;case"sn":if(t)return!0;break;case"date":if(validateRetInfoDate(t))return!0;break;case"bank":return!0;default:if("string"==typeof t)return!0}return!1}function validateRetInfoAmout(e){if(!e)return!1;if(!e.trim())return!1;const t=parseFloat(e);return!Number.isNaN(t)&&"number"==typeof t}function validateRetInfoDate(e){return!!e&&moment$1(e,"YYYYMMDD").isValid()}function getBankRegexpOpts(e){const t=new Map;for(const{bankName:i,regexpOpts:r}of e.values())r&&r.bank&&t.set(i,r.bank);if(!t.size)throw new Error("not BankRegexpOpts found, should not set");return t}function getOcrFields(e,t){const i=t.get(e);if(!i)throw new Error(`get ocrFields empty by bankName: "${e}"`);return i.ocrFields}function saveImgAndPrune(e){const{retInfo:t,resizeDir:i,debug:r,scale:n,jpegQuality:a}=e,o=t.get("filename"),s=t.get("path"),c=t.get("sn");if(!o)throw new Error(`result info map invalid with empty path. info: ${t}`);if(!s)throw new Error(`result info map invalid with empty path. info: ${t}`);const p=c?`${(new Date).getTime()}-${c.replace(/[^\d\w]/g,"_")}.jpg`:o,m=moment$1().format("YYYY-MM-DD"),l=join(i,m,p);return t.set("filename",p),resizeAndSaveImg(s,l,n,a).pipe(map(e=>(t.set("path",e.path),t)),tap(()=>{r||unlinkAsync(s).catch(console.info)}))}function genFieldLangs(e,t,i){return i&&"undefined"!=typeof i[e]&&Array.isArray(i[e])?i[e]:t}function ocrAndPickFromZoneImg(e,t,i=2,r=!1){const{ocrDefaultLangs:n,ocrFieldLangs:a,regexpOpts:o,ocrFields:s}=t;return from(Object.entries(s)).pipe(filter(t=>{const i=t[1];return!!i&&i===e[0]}),mergeMap(t=>{const i=t[0];return ocrAndPickFieldFromZoneImg(i,e,o,n,a,r)},i))}function ocrAndPickFieldFromZoneImg(e,t,i,r,n,a=!1){const[,o]=t,s=genFieldLangs(e,r,n),c=s.length-1,p=getRegexpOptsByName(e,i);if(!p)throw new Error(`got regexp empty by zoneName: "${e}"`);return from(s).pipe(concatMap(e=>runOcr(o.path,e).pipe(mapTo(!0),catchError(()=>of(!0)))),concatMap(()=>retrieveKeyValuesFromOcrResult(o.path+".txt",p,prepareContent,a).pipe(map(t=>({fieldName:e,value:t})))),skipWhile((e,t)=>{const i=validateZoneImgRow(e.fieldName,e.value);return!i&&t!==c}),take(1),map(e=>("string"!=typeof e.value&&(e.value=""),e)),map(processZoneImgRow))}function parseVoucherConfigMapScale(e,t){const i=new Map;for(const[r,n]of e){const e=Object.assign({},n),a=[];for(const i of e.ocrZones)a.push(parseOcrZoneScale(i,t));e.ocrZones=a,e.width=e.width*t,e.height=e.height*t,e.marginBottom=e.marginBottom*t,i.set(r,e)}return i}function parseOcrZoneScale(e,t){const i=Object.assign({},e);return i.width=i.width*t,i.height=i.height*t,i.offsetX=i.offsetX*t,i.offsetY=i.offsetY*t,i}export{initialBankZone,initialBaseTmpDir,initialResizeImgDir,initialSplitTmpDir,zoneTmpDirPrefix,Bvo,recognize};
import{access,chmod,close,copyFile,mkdir,open,readdir,readFile,rmdir,stat,unlink,write,writeFile}from"fs";import{basename,join,normalize,resolve,sep}from"path";import{promisify}from"util";import{tmpdir}from"os";import{crop,info,resize}from"easyimage";import*as moment_ from"moment";import{from,of,range,defer}from"rxjs";import{concatMap,map,mergeMap,reduce,catchError,defaultIfEmpty,delay,filter,mapTo,retry,skipWhile,take,tap}from"rxjs/operators";import run from"rxrunscript";const closeAsync=promisify(close),chmodAsync=promisify(chmod),copyFileAsync=promisify(copyFile),mkdirAsync=promisify(mkdir),openAsync=promisify(open),readFileAsync=promisify(readFile),readDirAsync=promisify(readdir),rmdirAsync=promisify(rmdir),unlinkAsync=promisify(unlink),writeAsync=promisify(write),writeFileAsync=promisify(writeFile);function isPathAcessible(e){return e?new Promise(t=>access(e,e=>t(!e))):Promise.resolve(!1)}function isDirExists(e){return e?isDirFileExists(e,"DIR"):Promise.resolve(!1)}function isFileExists(e){return e?isDirFileExists(e,"FILE"):Promise.resolve(!1)}function isDirFileExists(e,t){return e?new Promise(r=>{stat(e,(e,i)=>{r(!(e||!i)&&("DIR"===t?i.isDirectory():i.isFile()))})}):Promise.resolve(!1)}async function createDir(e){if(!e)throw new Error("value of path param invalid");e=normalize(e),await isDirExists(e)||await e.split(sep).reduce(async(e,t)=>{const r=resolve(await e,t);return await isPathAcessible(r)||await mkdirAsync(r,493),r},Promise.resolve(sep))}async function rimraf(e){e&&(await _rimraf(e),await isDirExists(e)&&await rmdirAsync(e))}async function _rimraf(e){if(e&&await isPathAcessible(e)){if(await isFileExists(e))return void await unlinkAsync(e);const t=await readDirAsync(e);if(t.length)for(const r of t)await _rimraf(join(e,r));else await rmdirAsync(e)}}const initialBaseTmpDir=join(tmpdir(),"voucher-ocr"),initialResizeImgDir=join(initialBaseTmpDir,"resize"),initialSplitTmpDir=join(initialBaseTmpDir,"split"),zoneTmpDirPrefix="zone",initialBankZone={zoneName:"bank",width:2250,height:390,offsetX:70,offsetY:10},moment=moment_;function splitPagetoItems(e,t,r){return readImgInfo(e).pipe(map(i=>{const n=calcItemsPerPage(i.height,r.height);return n?range(0,n).pipe(mergeMap(n=>{const o={index:n,itemConfig:Object.assign({},r),srcPath:e,targetDir:t,pageHeight:i.height};return i.width<o.itemConfig.width&&(o.itemConfig.width=i.width),parseSplitPage(o).pipe(mergeMap(e=>{const t=new Map;return e.name&&t.set(e.name,e),of(t)}))})):of(new Map)}),concatMap(e=>e))}function resizeAndSaveImg(e,t,r,i){if(r<=0||r>1)throw new Error(`value of scale invalid: "${r}"`);return readImgInfo(e).pipe(mergeMap(n=>{const o={src:e,dst:t,width:n.width*r,height:n.height*r,quality:i};return from(resize(o))}),map(e=>{const t={name:e.name,path:e.path,width:e.width,height:e.height,size:e.size};return t}))}function parseSplitPage(e){const{index:t,srcPath:r,pageHeight:i}=e,{width:n,marginBottom:o}=e.itemConfig;let{height:a}=e.itemConfig;const s=0,c=t*a;if(c+a>i&&(a=i-c),a/i<.1||a<100){const e={name:"",path:"",width:0,height:0,size:0};return of(e)}const p=moment().format("YYYYMMDD"),m=join(e.targetDir,`${p}-${Math.random()}-${t}.jpg`),l={dst:m,src:r,quality:100,cropWidth:n,cropHeight:a+o,x:0,y:c};return from(crop(l)).pipe(mergeMap(e=>{const t={name:e.name,path:e.path,width:e.width,height:e.height,size:e.size};return of(t)}))}function readImgInfo(e){return from(info(e))}function calcItemsPerPage(e,t){const r=33;return e>=t?Math.ceil((e+33)/t):1}function getOcrZoneOptsByBankName(e,t){return t.get(e)}function cropImgAllZones(e,t,r){return from(r).pipe(mergeMap(r=>cropImgZone(e,t,r).pipe(map(e=>[r.zoneName,e]))),reduce((e,t)=>(e.set(t[0],t[1]),e),new Map))}function cropImgZone(e,t,r){const{zoneName:i,width:n,height:o,offsetX:a,offsetY:s}=r,c=join(t,`${i}-${Math.random()}.png`),p={dst:c,src:e,quality:100,cropWidth:n,cropHeight:o,x:a,y:s};return from(crop(p)).pipe(map(e=>{const t={name:e.name,path:e.path,width:e.width,height:e.height,size:e.size};return t}))}function runOcr(e,t){t||(t="eng");const r=`tesseract "${e}" "${e}" -l ${t}`;return run(r)}function retrieveKeyValuesFromOcrResult(e,t,r,i=!1){if(!t)throw new Error("matchRules empty");return readTxtFile(e).pipe(map(e=>({buf:e,regexp:t})),map(({buf:e,regexp:t})=>{const n=r&&"function"==typeof r?r(e):e.toString("utf8");return retrieveValueByRegexp(n,t,i)}))}function getRegexpOptsByName(e,t){for(const r of Object.keys(t))if(r===e)return t[r]}function retrieveValueByRegexp(e,t,r=!1){const i=regexMatch(e,t,r);return r&&console.info("retrieveValueByRegexp ----- text start: ---------------\x3e \n",e,"\n<--------------- text END ----------------\n\n",t,">>>>>>>>matched value: ",i,"\n"),i}function readTxtFile(e){if(!e)throw new Error("path empty");const t=e.split(".");if(t.length>1&&"txt"!==t[t.length-1].toLowerCase())throw new Error(`file extensiion must empty or be '.txt', but is: "${e}"`);return defer(async()=>readFileAsync(e))}function prepareContent(e){let t=e&&e.byteLength?e.toString():"";return t?t=t.replace(/(?<=\S) /g,""):""}function regexMatch(e,t,r=!1){if(e)for(const i of t){const t=e.match(i);if(Array.isArray(t)&&t.length)return i.global&&t.length>1?(r&&console.info("----------multi matched regex: --------------\x3e\n",t,"\n--- used regex ----: ",i,"\n<-------------ignore matched result---------------\n\n"),""):(r&&console.info("----------matched regex: --------------\x3e\n",t,"\n--- used regex ----: ",i,"\n<----------------------------\n\n"),t[0])}}const moment$1=moment_;class Bvo{constructor(e){this.options=e;const t=+this.options.globalScale;this.options.globalScale=Number.isNaN(t)||t<=0?1:t,this.options.debug=!!this.options.debug;const{baseTmpDir:r,splitTmpDir:i,resizeImgDir:n}=e,o=r||initialBaseTmpDir,a=i||initialSplitTmpDir,s=n||initialResizeImgDir;from(createDir(o)).pipe(catchError(e=>(console.info(e),of(null))),concatMap(()=>createDir(a)),catchError(e=>(console.info(e),of(null))),concatMap(()=>createDir(s)),catchError(e=>(console.info(e),of(null)))).subscribe(()=>{},console.error)}run(e){return recognize(e,this.options)}}function recognize(e,t){const{bankZone:r,baseTmpDir:i,debug:n,defaultOcrLang:o,jpegQuality:a,scale:s,splitTmpDir:c,resizeImgDir:p,voucherConfigMap:m,globalScale:l,skipImgDir:f}=t,u=i||initialBaseTmpDir,g=c||initialSplitTmpDir,h=p||initialResizeImgDir,d=f?join(f,moment$1().format("YYYYMMDD")):"",y=parseVoucherConfigMapScale(m,l),w=parseOcrZoneScale(r,l),b=s/l,k=getBankRegexpOpts(y),D={baseDir:u,path:e,bankZone:w,bankRegexpOptsMap:k,debug:!!n,lang:o,skipImgDir:d},M=recognizePageBank(D).pipe(filter(({bankName:e})=>!!e&&"n/a"!==e),concatMap(({bankName:e,pagePath:t})=>(n&&console.info("start split page"),splitPageToImgs(t,e,g,y))),concatMap(({bankName:e,imgFile:t})=>{const r=getOcrFields(e,y);if(!r)throw new Error(`ocrFields not defined with bankName: "${e}"`);const i={bankName:e,baseDir:u,concurrent:2,debug:!!n,defaultValue:"",imgFile:t,ocrFields:r,voucherConfigMap:y};return n&&console.info("recognize item"),recognizeFields(i).pipe(map(r=>(r.set("bank",e),r.set("filename",t.name.trim()),r.set("path",t.path.trim()),r)))}),mergeMap(e=>{const t={retInfo:e,resizeDir:h,scale:b,jpegQuality:a,debug:!!n};return saveImgAndPrune(t)})),x=from(isFileExists(e)).pipe(filter(e=>e));return x.pipe(mergeMap(()=>M))}function recognizePageBank(e){const{baseDir:t,path:r,bankZone:i,bankRegexpOptsMap:n,debug:o,lang:a,skipImgDir:s}=e,c=join(t,zoneTmpDirPrefix,`${basename(r)}-${Math.random().toString()}`);return o&&console.info("recognize pageBank:",c,r),from(createDir(c)).pipe(catchError(e=>(console.info(e),of(null))),mergeMap(()=>cropImgZone(join(r),c,i)),concatMap(e=>runOcr(e.path,a).pipe(map(()=>({path:r,zoneImgPath:e.path})),mapTo(e.path),catchError(()=>of(e.path)))),concatMap(e=>from(n.entries()).pipe(concatMap(([t,r])=>retrieveKeyValuesFromOcrResult(e+".txt",r,e=>e.toString().replace(/(?<=\S)[. ]{1,2}(?=\S)/g,""),o).pipe(map(e=>({bankName:t,value:e})))),skipWhile(({value:e})=>"undefined"==typeof e||"string"==typeof e&&!e.length),take(1),map(({bankName:e})=>({bankName:e,pagePath:r})),defaultIfEmpty({bankName:"n/a",pagePath:""}))),tap(e=>{const{bankName:t,pagePath:i}=e;"n/a"!==t&&i||(console.info(`recognize bank of page fail. no matached regexp. file: "${r}", pagePath: "${i}" `),cpSkipImg(r,s)),o||rimraf(c).catch(console.info)}))}async function cpSkipImg(e,t){t&&(await isPathAcessible(t)||await createDir(t),copyFileAsync(e,join(t,basename(e))).catch(console.error))}function splitPageToImgs(e,t,r,i){const n=i.get(t);if(!n)throw new Error("bank config empty during split page to images");return splitPagetoItems(e,r,n).pipe(mergeMap(e=>{const r=from(e.values()).pipe(map(e=>({bankName:t,imgFile:e})));return r}))}function recognizeFields(e){const{bankName:t,baseDir:r,concurrent:i,debug:n,defaultValue:o,imgFile:a,ocrFields:s,voucherConfigMap:c}=e,p=join(r,zoneTmpDirPrefix,moment$1().format("YYYYMMDD")+"-"+Math.random().toString()),m=getOcrZoneOptsByBankName(t,c);if(!m)throw new Error(`get bankConfig empty with bankName: "${t}"`);const l=from(createDir(p)).pipe(mergeMap(()=>cropImgAllZones(a.path,p,m.ocrZones)),concatMap(e=>{const t={bankConfig:m,ocrFields:s,defaultValue:o,debug:n,concurrent:i>0?i:2,zoneImgMap:e};return batchOcrAndRetrieve(t)}));return l}function batchOcrAndRetrieve(e){const{zoneImgMap:t,bankConfig:r,ocrFields:i,defaultValue:n,concurrent:o,debug:a}=e,{bankName:s}=r,c=from(t.entries()).pipe(delay(5e3),mergeMap(([,e])=>defer(async()=>{const t=e.path,r=t+".txt";return await isFileExists(t)&&await rimraf(t),await isFileExists(r)&&await rimraf(r),null}).pipe(delay(2e4),retry(2),catchError(e=>(console.info("Delete zone file retry failed:",e),of(null))))),catchError(()=>of(null))),p=from(t.entries()).pipe(concatMap(e=>ocrAndPickFromZoneImg(e,r,o,a)),reduce((e,t)=>e.set(t.fieldName,t.value),new Map),map(e=>e.set("bank",s)),map(e=>setDefaultValue(e,i,n)),tap(()=>a||c.subscribe()));return p}function setDefaultValue(e,t,r=""){const i=new Map;for(const r of Object.keys(t)){const t=e.get(r);"string"==typeof t?i.set(r,t):i.set(r,"")}return i}function processZoneImgRow(e){const{fieldName:t,value:r}=e,i={fieldName:t,value:r};switch(t){case"amount":i.value=r.trim().replace(/,/g,"");break;case"date":i.value=r.trim().replace(/\D/g,"");break;case"sn":case"destAccountNumber":case"paymentAccountNumber":i.value=r.trim()}return i}function validateZoneImgRow(e,t){if("string"!=typeof t)return!1;switch(e){case"amount":if(validateRetInfoAmout(t))return!0;break;case"sn":if(t)return!0;break;case"date":if(validateRetInfoDate(t))return!0;break;case"bank":return!0;default:if("string"==typeof t)return!0}return!1}function validateRetInfoAmout(e){if(!e)return!1;if(!e.trim())return!1;const t=parseFloat(e);return!Number.isNaN(t)&&"number"==typeof t}function validateRetInfoDate(e){return!!e&&moment$1(e,"YYYYMMDD").isValid()}function getBankRegexpOpts(e){const t=new Map;for(const{bankName:r,regexpOpts:i}of e.values())i&&i.bank&&t.set(r,i.bank);if(!t.size)throw new Error("not BankRegexpOpts found, should not set");return t}function getOcrFields(e,t){const r=t.get(e);if(!r)throw new Error(`get ocrFields empty by bankName: "${e}"`);return r.ocrFields}function saveImgAndPrune(e){const{retInfo:t,resizeDir:r,debug:i,scale:n,jpegQuality:o}=e,a=t.get("filename"),s=t.get("path"),c=t.get("sn");if(!a)throw new Error(`result info map invalid with empty path. info: ${t}`);if(!s)throw new Error(`result info map invalid with empty path. info: ${t}`);const p=c?`${(new Date).getTime()}-${c.replace(/[^\d\w]/g,"_")}.jpg`:a,m=moment$1().format("YYYY-MM-DD"),l=join(r,m,p);return t.set("filename",p),resizeAndSaveImg(s,l,n,o).pipe(map(e=>(t.set("path",e.path),t)),tap(()=>{i||unlinkAsync(s).catch(console.info)}))}function genFieldLangs(e,t,r){return r&&"undefined"!=typeof r[e]&&Array.isArray(r[e])?r[e]:t}function ocrAndPickFromZoneImg(e,t,r=2,i=!1){const{ocrDefaultLangs:n,ocrFieldLangs:o,regexpOpts:a,ocrFields:s}=t;return from(Object.entries(s)).pipe(filter(t=>{const r=t[1];return!!r&&r===e[0]}),mergeMap(t=>{const r=t[0];return ocrAndPickFieldFromZoneImg(r,e,a,n,o,i)},r))}function ocrAndPickFieldFromZoneImg(e,t,r,i,n,o=!1){const[,a]=t,s=genFieldLangs(e,i,n),c=s.length-1,p=getRegexpOptsByName(e,r);if(!p)throw new Error(`got regexp empty by zoneName: "${e}"`);return from(s).pipe(concatMap(e=>runOcr(a.path,e).pipe(mapTo(!0),catchError(()=>of(!0)))),concatMap(()=>retrieveKeyValuesFromOcrResult(a.path+".txt",p,prepareContent,o).pipe(map(t=>({fieldName:e,value:t})))),skipWhile((e,t)=>{const r=validateZoneImgRow(e.fieldName,e.value);return!r&&t!==c}),take(1),map(e=>("string"!=typeof e.value&&(e.value=""),e)),map(processZoneImgRow))}function parseVoucherConfigMapScale(e,t){const r=new Map;for(const[i,n]of e){const e=Object.assign({},n),o=[];for(const r of e.ocrZones)o.push(parseOcrZoneScale(r,t));e.ocrZones=o,e.width=e.width*t,e.height=e.height*t,e.marginBottom=e.marginBottom*t,r.set(i,e)}return r}function parseOcrZoneScale(e,t){const r=Object.assign({},e);return r.width=r.width*t,r.height=r.height*t,r.offsetX=r.offsetX*t,r.offsetY=r.offsetY*t,r}export{initialBankZone,initialBaseTmpDir,initialResizeImgDir,initialSplitTmpDir,zoneTmpDirPrefix,Bvo,recognize};
//# sourceMappingURL=bvocr.esm.min.js.map

@@ -5,3 +5,3 @@ /**

*
* @version 0.11.0
* @version 0.12.0
* @author waiting

@@ -147,3 +147,5 @@ * @license MIT

fileMap.set(fileInfo.name, fileInfo);
if (fileInfo.name) {
fileMap.set(fileInfo.name, fileInfo);
}
return rxjs.of(fileMap)

@@ -197,2 +199,13 @@ }))

}
if (height / pageHeight < 0.1 || height < 100) {
const ret = {
name: '',
path: '',
width: 0,
height: 0,
size: 0,
};
return rxjs.of(ret)
}
const curDate = moment().format('YYYYMMDD');

@@ -210,2 +223,3 @@ const dst = path.join(options.targetDir, `${curDate}-${Math.random()}-${index}.jpg`);

// console.info('split page opts:', opts)
return rxjs.from(easyimage.crop(opts)).pipe(operators.mergeMap((info) => {

@@ -383,3 +397,12 @@ const ret = {

rxjs.from(createDir(baseDir)).pipe(operators.concatMap(() => createDir(splitDir)), operators.concatMap(() => createDir(resizeDir)))
rxjs.from(createDir(baseDir)).pipe(operators.catchError(err => {
console.info(err);
return rxjs.of(null)
}), operators.concatMap(() => createDir(splitDir)), operators.catchError(err => {
console.info(err);
return rxjs.of(null)
}), operators.concatMap(() => createDir(resizeDir)), operators.catchError(err => {
console.info(err);
return rxjs.of(null)
}))
.subscribe(() => { }, console.error);

@@ -460,6 +483,9 @@ }

const { baseDir, path: path$$1, bankZone, bankRegexpOptsMap, debug, lang, skipImgDir, } = options;
const zoneTmpDir = path.join(baseDir, zoneTmpDirPrefix, Math.random().toString());
const zoneTmpDir = path.join(baseDir, zoneTmpDirPrefix, `${path.basename(path$$1)}-${Math.random().toString()}`);
debug && console.info('recognize pageBank:', zoneTmpDir, path$$1);
return rxjs.from(createDir(zoneTmpDir)).pipe(operators.mergeMap(() => cropImgZone(path.join(path$$1), zoneTmpDir, bankZone)), // 切分page title区域
return rxjs.from(createDir(zoneTmpDir)).pipe(operators.catchError(err => {
console.info(err);
return rxjs.of(null)
}), operators.mergeMap(() => cropImgZone(path.join(path$$1), zoneTmpDir, bankZone)), // 切分page title区域
operators.concatMap(zoneInfo => {

@@ -466,0 +492,0 @@ return runOcr(zoneInfo.path, lang).pipe(operators.map(() => ({ path: path$$1, zoneImgPath: zoneInfo.path })), operators.mapTo(zoneInfo.path), operators.catchError(() => rxjs.of(zoneInfo.path)))

@@ -28,3 +28,5 @@ import { crop, info as getImgInfo, resize } from 'easyimage'

fileMap.set(fileInfo.name, fileInfo)
if (fileInfo.name) {
fileMap.set(fileInfo.name, fileInfo)
}
return of(fileMap)

@@ -78,2 +80,13 @@ }))

}
if (height / pageHeight < 0.1 || height < 100) {
const ret = {
name: '',
path: '',
width: 0,
height: 0,
size: 0,
}
return of(ret)
}
const curDate = moment().format('YYYYMMDD')

@@ -91,2 +104,3 @@ const dst = join(options.targetDir, `${curDate}-${Math.random()}-${index}.jpg`)

// console.info('split page opts:', opts)
return ofrom(crop(opts)).pipe(mergeMap((info) => {

@@ -93,0 +107,0 @@ const ret = {

@@ -25,3 +25,12 @@ import * as moment_ from 'moment'

ofrom(createDir(baseDir)).pipe(concatMap(() => createDir(splitDir)), concatMap(() => createDir(resizeDir)))
ofrom(createDir(baseDir)).pipe(catchError(err => {
console.info(err)
return of(null)
}), concatMap(() => createDir(splitDir)), catchError(err => {
console.info(err)
return of(null)
}), concatMap(() => createDir(resizeDir)), catchError(err => {
console.info(err)
return of(null)
}))
.subscribe(() => { }, console.error)

@@ -102,6 +111,9 @@ }

const { baseDir, path, bankZone, bankRegexpOptsMap, debug, lang, skipImgDir, } = options
const zoneTmpDir = join(baseDir, zoneTmpDirPrefix, Math.random().toString())
const zoneTmpDir = join(baseDir, zoneTmpDirPrefix, `${basename(path)}-${Math.random().toString()}`)
debug && console.info('recognize pageBank:', zoneTmpDir, path)
return ofrom(createDir(zoneTmpDir)).pipe(mergeMap(() => cropImgZone(join(path), zoneTmpDir, bankZone)), // 切分page title区域
return ofrom(createDir(zoneTmpDir)).pipe(catchError(err => {
console.info(err)
return of(null)
}), mergeMap(() => cropImgZone(join(path), zoneTmpDir, bankZone)), // 切分page title区域
concatMap(zoneInfo => {

@@ -108,0 +120,0 @@ return runOcr(zoneInfo.path, lang).pipe(map(() => ({ path, zoneImgPath: zoneInfo.path })), mapTo(zoneInfo.path), catchError(() => of(zoneInfo.path)))

{
"name": "bank-voucher-ocr",
"author": "waiting",
"version": "0.11.0",
"version": "0.12.0",
"description": "Bank Voucher ocr by tesseract and retrieve fields",

@@ -6,0 +6,0 @@ "keywords": [

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc