tesseract.js-utils
Advanced tools
Comparing version 1.0.0-beta.6 to 1.0.0-beta.7
{ | ||
"name": "tesseract.js-utils", | ||
"version": "1.0.0-beta.6", | ||
"version": "1.0.0-beta.7", | ||
"description": "Utilities for tesseract.js", | ||
@@ -5,0 +5,0 @@ "main": "src/index.node.js", |
const isURL = require('is-url'); | ||
const fileType = require('file-type'); | ||
const axios = require('axios'); | ||
@@ -9,3 +10,3 @@ | ||
cacheMethod, | ||
lang, | ||
langCode, | ||
}) => (data) => { | ||
@@ -20,6 +21,6 @@ if (TessModule) { | ||
} | ||
TessModule.FS.writeFile(`${dataPath || '.'}/${lang}.traineddata`, data); | ||
TessModule.FS.writeFile(`${dataPath || '.'}/${langCode}.traineddata`, data); | ||
} | ||
if (['write', 'refresh', undefined].includes(cacheMethod)) { | ||
return modules.writeCache(`${cachePath || '.'}/${lang}.traineddata`, data) | ||
return modules.writeCache(`${cachePath || '.'}/${langCode}.traineddata`, data) | ||
.then(() => data); | ||
@@ -35,5 +36,18 @@ } | ||
cacheMethod, | ||
gzip = true, | ||
...options | ||
}) => (lang) => { | ||
const langCode = typeof lang === 'string' ? lang : lang.code; | ||
const handleTraineddata = (data) => { | ||
const type = fileType(data); | ||
if (type !== null && type.mime === 'application/gzip') { | ||
return modules.gunzip(new Uint8Array(data)); | ||
} | ||
return new Uint8Array(data); | ||
}; | ||
const doHandleLang = handleLang(modules)({ | ||
cachePath, cacheMethod, langCode, ...options, | ||
}); | ||
let { readCache } = modules; | ||
if (['refresh', 'none'].includes(cacheMethod)) { | ||
@@ -43,3 +57,3 @@ readCache = () => Promise.resolve(); | ||
return readCache(`${cachePath || '.'}/${lang}.traineddata`) | ||
return readCache(`${cachePath || '.'}/${langCode}.traineddata`) | ||
.then((data) => { | ||
@@ -49,33 +63,38 @@ if (typeof data === 'undefined') { | ||
} | ||
return handleLang(modules)({ | ||
cachePath, cacheMethod, lang, ...options, | ||
})(data); | ||
return doHandleLang(data); | ||
}) | ||
/* | ||
* If not found in the cache | ||
*/ | ||
.catch(() => { | ||
const fetchTrainedData = iLangPath => ( | ||
axios.get(`${iLangPath}/${lang}.traineddata.gz`, { | ||
responseType: 'arraybuffer', | ||
}) | ||
.then(resp => modules.gunzip(new Uint8Array(resp.data))) | ||
.then(handleLang(modules)({ | ||
cachePath, cacheMethod, lang, ...options, | ||
})) | ||
); | ||
if (typeof lang === 'string') { | ||
const fetchTrainedData = iLangPath => ( | ||
axios.get(`${iLangPath}/${langCode}.traineddata${gzip ? '.gz' : ''}`, { | ||
responseType: 'arraybuffer', | ||
}) | ||
.then(({ data }) => new Uint8Array(data)) | ||
.then(handleTraineddata) | ||
.then(doHandleLang) | ||
); | ||
/** When langPath is an URL, just do the fetch */ | ||
if (isURL(langPath)) { | ||
return fetchTrainedData(langPath); | ||
} | ||
/** When langPath is an URL, just do the fetch */ | ||
if (isURL(langPath)) { | ||
return fetchTrainedData(langPath); | ||
} | ||
/** When langPath is not an URL in browser environment */ | ||
if (process.browser) { | ||
return fetchTrainedData(modules.resolveURL(langPath)); | ||
/** When langPath is not an URL in browser environment */ | ||
if (process.browser) { | ||
return fetchTrainedData(modules.resolveURL(langPath)); | ||
} | ||
/** When langPath is not an URL in Node.js environment */ | ||
return modules.readCache(`${langPath}/${langCode}.traineddata${gzip ? '.gz' : ''}`) | ||
.then(handleTraineddata) | ||
.then(doHandleLang); | ||
} | ||
/** When langPath is not an URL in Node.js environment */ | ||
return modules.readCache(`${langPath}/${lang}.traineddata.gz`) | ||
.then(buf => modules.gunzip(new Uint8Array(buf))) | ||
.then(handleLang(modules)({ | ||
cachePath, cacheMethod, lang, ...options, | ||
})); | ||
return Promise | ||
.resolve(lang.data) | ||
.then(handleTraineddata) | ||
.then(doHandleLang); | ||
}); | ||
@@ -89,3 +108,10 @@ }; | ||
* @param {object} options | ||
* @param {string} options.lang - langs to load, use '+' for multiple languages, ex: eng+chi_tra | ||
* @param {array} options.langs - | ||
* langs to load. | ||
* Each item in the array can be string (ex. 'eng') or object like: | ||
* { | ||
* code: 'eng', | ||
* gzip: false, | ||
* data: Uint8Array | ||
* } | ||
* @param {object} options.TessModule - TesseractModule | ||
@@ -95,2 +121,4 @@ * @param {string} options.langPath - prefix path for downloading lang file | ||
* @param {string} options.dataPath - path to store data in mem | ||
* @param {boolean}options.gzip - | ||
* indicate whether to download gzip version from remote, default: true | ||
* @param {string} options.cacheMethod - | ||
@@ -105,7 +133,7 @@ * method of cache invaliation, should one of following options: | ||
module.exports = modules => ({ | ||
lang: langs, | ||
langs, | ||
...options | ||
}) => ( | ||
Promise | ||
.all(langs.split('+').map(loadAndGunzipFile(modules)(options))) | ||
.all((typeof langs === 'string' ? langs.split('+') : langs).map(loadAndGunzipFile(modules)(options))) | ||
); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
8597
213