deepspeech-gpu
Advanced tools
Comparing version 0.7.0-alpha.3 to 0.7.0-alpha.4
197
client.js
#!/usr/bin/env node | ||
'use strict'; | ||
const Fs = require('fs'); | ||
const Sox = require('sox-stream'); | ||
const Ds = require('./index.js'); | ||
const argparse = require('argparse'); | ||
const MemoryStream = require('memory-stream'); | ||
const Wav = require('node-wav'); | ||
const Duplex = require('stream').Duplex; | ||
const util = require('util'); | ||
var VersionAction = function VersionAction(options) { | ||
options = options || {}; | ||
options.nargs = 0; | ||
argparse.Action.call(this, options); | ||
"use strict"; | ||
// This is required for process.versions.electron below | ||
/// <reference types="electron" /> | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
var __importStar = (this && this.__importStar) || function (mod) { | ||
if (mod && mod.__esModule) return mod; | ||
var result = {}; | ||
if (mod != null) for (var k in mod) if (Object.hasOwnProperty.call(mod, k)) result[k] = mod[k]; | ||
result["default"] = mod; | ||
return result; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
const index_1 = __importDefault(require("./index")); | ||
const Fs = __importStar(require("fs")); | ||
const sox_stream_1 = __importDefault(require("sox-stream")); | ||
const argparse = __importStar(require("argparse")); | ||
const MemoryStream = require("memory-stream"); | ||
const Wav = require("node-wav"); | ||
const Duplex = require("stream").Duplex; | ||
class VersionAction extends argparse.Action { | ||
call(parser, namespace, values, optionString) { | ||
console.log('DeepSpeech ' + index_1.default.Version()); | ||
let runtime = 'Node'; | ||
if (process.versions.electron) { | ||
runtime = 'Electron'; | ||
} | ||
console.error('Runtime: ' + runtime); | ||
process.exit(0); | ||
} | ||
} | ||
util.inherits(VersionAction, argparse.Action); | ||
VersionAction.prototype.call = function(parser) { | ||
console.log('DeepSpeech ' + Ds.Version()); | ||
let runtime = 'Node'; | ||
if (process.versions.electron) { | ||
runtime = 'Electron'; | ||
} | ||
console.error('Runtime: ' + runtime); | ||
process.exit(0); | ||
} | ||
var parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'}); | ||
parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'}); | ||
parser.addArgument(['--scorer'], {help: 'Path to the external scorer file'}); | ||
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'}); | ||
parser.addArgument(['--beam_width'], {help: 'Beam width for the CTC decoder', type: 'int'}); | ||
parser.addArgument(['--lm_alpha'], {help: 'Language model weight (lm_alpha). If not specified, use default from the scorer package.', type: 'float'}); | ||
parser.addArgument(['--lm_beta'], {help: 'Word insertion bonus (lm_beta). If not specified, use default from the scorer package.', type: 'float'}); | ||
parser.addArgument(['--version'], {action: VersionAction, help: 'Print version and exits'}); | ||
parser.addArgument(['--extended'], {action: 'storeTrue', help: 'Output string from extended metadata'}); | ||
var args = parser.parseArgs(); | ||
let parser = new argparse.ArgumentParser({ addHelp: true, description: 'Running DeepSpeech inference.' }); | ||
parser.addArgument(['--model'], { required: true, help: 'Path to the model (protocol buffer binary file)' }); | ||
parser.addArgument(['--scorer'], { help: 'Path to the external scorer file' }); | ||
parser.addArgument(['--audio'], { required: true, help: 'Path to the audio file to run (WAV format)' }); | ||
parser.addArgument(['--version'], { action: VersionAction, nargs: 0, help: 'Print version and exits' }); | ||
parser.addArgument(['--extended'], { action: 'storeTrue', help: 'Output string from extended metadata' }); | ||
let args = parser.parseArgs(); | ||
function totalTime(hrtimeValue) { | ||
return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4); | ||
return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4); | ||
} | ||
function candidateTranscriptToString(transcript) { | ||
var retval = "" | ||
for (var i = 0; i < transcript.tokens.length; ++i) { | ||
retval += transcript.tokens[i].text; | ||
} | ||
return retval; | ||
var retval = ""; | ||
for (var i = 0; i < transcript.tokens.length; ++i) { | ||
retval += transcript.tokens[i].text; | ||
} | ||
return retval; | ||
} | ||
// sphinx-doc: js_ref_model_start | ||
console.error('Loading model from file %s', args['model']); | ||
const model_load_start = process.hrtime(); | ||
var model = new Ds.Model(args['model']); | ||
let model = new index_1.default.Model(args['model']); | ||
const model_load_end = process.hrtime(model_load_start); | ||
console.error('Loaded model in %ds.', totalTime(model_load_end)); | ||
if (args['beam_width']) { | ||
model.setBeamWidth(args['beam_width']); | ||
model.setBeamWidth(args['beam_width']); | ||
} | ||
var desired_sample_rate = model.sampleRate(); | ||
// sphinx-doc: js_ref_model_stop | ||
let desired_sample_rate = model.sampleRate(); | ||
if (args['scorer']) { | ||
console.error('Loading scorer from file %s', args['scorer']); | ||
const scorer_load_start = process.hrtime(); | ||
model.enableExternalScorer(args['scorer']); | ||
const scorer_load_end = process.hrtime(scorer_load_start); | ||
console.error('Loaded scorer in %ds.', totalTime(scorer_load_end)); | ||
if (args['lm_alpha'] && args['lm_beta']) { | ||
model.setScorerAlphaBeta(args['lm_alpha'], args['lm_beta']); | ||
} | ||
console.error('Loading scorer from file %s', args['scorer']); | ||
const scorer_load_start = process.hrtime(); | ||
model.enableExternalScorer(args['scorer']); | ||
const scorer_load_end = process.hrtime(scorer_load_start); | ||
console.error('Loaded scorer in %ds.', totalTime(scorer_load_end)); | ||
if (args['lm_alpha'] && args['lm_beta']) { | ||
model.setScorerAlphaBeta(args['lm_alpha'], args['lm_beta']); | ||
} | ||
} | ||
const buffer = Fs.readFileSync(args['audio']); | ||
const result = Wav.decode(buffer); | ||
if (result.sampleRate < desired_sample_rate) { | ||
console.error('Warning: original sample rate (' + result.sampleRate + ') ' + | ||
'is lower than ' + desired_sample_rate + 'Hz. ' + | ||
'Up-sampling might produce erratic speech recognition.'); | ||
console.error(`Warning: original sample rate ( ${result.sampleRate})` + | ||
`is lower than ${desired_sample_rate} Hz. ` + | ||
`Up-sampling might produce erratic speech recognition.`); | ||
} | ||
function bufferToStream(buffer) { | ||
var stream = new Duplex(); | ||
stream.push(buffer); | ||
stream.push(null); | ||
return stream; | ||
var stream = new Duplex(); | ||
stream.push(buffer); | ||
stream.push(null); | ||
return stream; | ||
} | ||
var audioStream = new MemoryStream(); | ||
let audioStream = new MemoryStream(); | ||
bufferToStream(buffer). | ||
pipe(Sox({ | ||
pipe(sox_stream_1.default({ | ||
global: { | ||
'no-dither': true, | ||
'no-dither': true, | ||
'replay-gain': 'off', | ||
}, | ||
output: { | ||
bits: 16, | ||
rate: desired_sample_rate, | ||
channels: 1, | ||
encoding: 'signed-integer', | ||
endian: 'little', | ||
compression: 0.0, | ||
type: 'raw' | ||
bits: 16, | ||
rate: desired_sample_rate, | ||
channels: 1, | ||
encoding: 'signed-integer', | ||
endian: 'little', | ||
compression: 0.0, | ||
type: 'raw' | ||
} | ||
})). | ||
pipe(audioStream); | ||
})). | ||
pipe(audioStream); | ||
audioStream.on('finish', () => { | ||
let audioBuffer = audioStream.toBuffer(); | ||
const inference_start = process.hrtime(); | ||
console.error('Running inference.'); | ||
const audioLength = (audioBuffer.length / 2) * (1 / desired_sample_rate); | ||
if (args['extended']) { | ||
let metadata = model.sttWithMetadata(audioBuffer, 1); | ||
console.log(candidateTranscriptToString(metadata.transcripts[0])); | ||
Ds.FreeMetadata(metadata); | ||
} else { | ||
console.log(model.stt(audioBuffer)); | ||
} | ||
const inference_stop = process.hrtime(inference_start); | ||
console.error('Inference took %ds for %ds audio file.', totalTime(inference_stop), audioLength.toPrecision(4)); | ||
Ds.FreeModel(model); | ||
process.exit(0); | ||
let audioBuffer = audioStream.toBuffer(); | ||
const inference_start = process.hrtime(); | ||
console.error('Running inference.'); | ||
const audioLength = (audioBuffer.length / 2) * (1 / desired_sample_rate); | ||
// sphinx-doc: js_ref_inference_start | ||
if (args['extended']) { | ||
let metadata = model.sttWithMetadata(audioBuffer, 1); | ||
console.log(candidateTranscriptToString(metadata.transcripts[0])); | ||
index_1.default.FreeMetadata(metadata); | ||
} | ||
else { | ||
console.log(model.stt(audioBuffer)); | ||
} | ||
// sphinx-doc: js_ref_inference_stop | ||
const inference_stop = process.hrtime(inference_start); | ||
console.error('Inference took %ds for %ds audio file.', totalTime(inference_stop), audioLength.toPrecision(4)); | ||
index_1.default.FreeModel(model); | ||
process.exit(0); | ||
}); |
{ | ||
"name": "deepspeech-gpu", | ||
"version": "0.7.0-alpha.3", | ||
"version": "0.7.0-alpha.4", | ||
"description": "DeepSpeech NodeJS bindings", | ||
"main": "./index", | ||
"main": "./index.js", | ||
"types": "./index.d.ts", | ||
"bin": { | ||
@@ -11,3 +12,3 @@ "deepspeech": "./client.js" | ||
"license": "MPL-2.0", | ||
"homepage": "https://github.com/mozilla/DeepSpeech/tree/v0.7.0-alpha.3#project-deepspeech", | ||
"homepage": "https://github.com/mozilla/DeepSpeech/tree/v0.7.0-alpha.4#project-deepspeech", | ||
"files": [ | ||
@@ -17,2 +18,3 @@ "README.md", | ||
"index.js", | ||
"index.d.ts", | ||
"lib/*" | ||
@@ -35,9 +37,15 @@ ], | ||
"dependencies": { | ||
"node-pre-gyp": "0.14.x", | ||
"argparse": "1.0.x", | ||
"sox-stream": "2.0.x", | ||
"memory-stream": "0.0.3", | ||
"node-pre-gyp": "^0.14.0", | ||
"node-wav": "0.0.2", | ||
"sox-stream": "2.0.x" | ||
"node-wav": "0.0.2" | ||
}, | ||
"devDependencies": {}, | ||
"devDependencies": { | ||
"electron": "^1.7.9", | ||
"node-gyp": "4.x - 5.x", | ||
"typescript": "3.6.x", | ||
"@types/argparse": "1.0.x", | ||
"@types/node": "13.9.x" | ||
}, | ||
"scripts": { | ||
@@ -44,0 +52,0 @@ "test": "node index.js" |
Full project description and documentation on GitHub: [https://github.com/mozilla/DeepSpeech](https://github.com/mozilla/DeepSpeech). | ||
## Generating TypeScript Type Definitions | ||
You can generate the TypeScript type declaration file using `dts-gen`. | ||
This requires a compiled/installed version of the DeepSpeech NodeJS client. | ||
Upon API change, it is required to generate a new `index.d.ts` type declaration | ||
file, you have to run: | ||
```sh | ||
npm install -g dts-gen | ||
dts-gen --module deepspeech --file index.d.ts | ||
``` | ||
### Example usage | ||
See `client.ts` |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
144267743
27
590
19
5
Updatednode-pre-gyp@0.14.x