deepspeech-gpu
Advanced tools
Comparing version 0.6.0-alpha.8 to 0.6.0-alpha.9
@@ -65,7 +65,25 @@ #!/usr/bin/env node | ||
console.error('Loading model from file %s', args['model']); | ||
const model_load_start = process.hrtime(); | ||
var model = new Ds.Model(args['model'], args['alphabet'], BEAM_WIDTH); | ||
const model_load_end = process.hrtime(model_load_start); | ||
console.error('Loaded model in %ds.', totalTime(model_load_end)); | ||
var desired_sample_rate = model.sampleRate(); | ||
if (args['lm'] && args['trie']) { | ||
console.error('Loading language model from files %s %s', args['lm'], args['trie']); | ||
const lm_load_start = process.hrtime(); | ||
model.enableDecoderWithLM(args['lm'], args['trie'], LM_ALPHA, LM_BETA); | ||
const lm_load_end = process.hrtime(lm_load_start); | ||
console.error('Loaded language model in %ds.', totalTime(lm_load_end)); | ||
} | ||
const buffer = Fs.readFileSync(args['audio']); | ||
const result = Wav.decode(buffer); | ||
if (result.sampleRate < 16000) { | ||
console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than 16kHz. Up-sampling might produce erratic speech recognition.'); | ||
if (result.sampleRate < desired_sample_rate) { | ||
console.error('Warning: original sample rate (' + result.sampleRate + ') ' + | ||
'is lower than ' + desired_sample_rate + 'Hz. ' + | ||
'Up-sampling might produce erratic speech recognition.'); | ||
} | ||
@@ -88,3 +106,3 @@ | ||
bits: 16, | ||
rate: 16000, | ||
rate: desired_sample_rate, | ||
channels: 1, | ||
@@ -102,19 +120,5 @@ encoding: 'signed-integer', | ||
console.error('Loading model from file %s', args['model']); | ||
const model_load_start = process.hrtime(); | ||
var model = new Ds.Model(args['model'], args['alphabet'], BEAM_WIDTH); | ||
const model_load_end = process.hrtime(model_load_start); | ||
console.error('Loaded model in %ds.', totalTime(model_load_end)); | ||
if (args['lm'] && args['trie']) { | ||
console.error('Loading language model from files %s %s', args['lm'], args['trie']); | ||
const lm_load_start = process.hrtime(); | ||
model.enableDecoderWithLM(args['lm'], args['trie'], LM_ALPHA, LM_BETA); | ||
const lm_load_end = process.hrtime(lm_load_start); | ||
console.error('Loaded language model in %ds.', totalTime(lm_load_end)); | ||
} | ||
const inference_start = process.hrtime(); | ||
console.error('Running inference.'); | ||
const audioLength = (audioBuffer.length / 2) * ( 1 / 16000); | ||
const audioLength = (audioBuffer.length / 2) * (1 / desired_sample_rate); | ||
@@ -124,5 +128,5 @@ // We take half of the buffer_size because buffer is a char* while | ||
if (args['extended']) { | ||
console.log(metadataToString(model.sttWithMetadata(audioBuffer.slice(0, audioBuffer.length / 2), 16000))); | ||
console.log(metadataToString(model.sttWithMetadata(audioBuffer.slice(0, audioBuffer.length / 2)))); | ||
} else { | ||
console.log(model.stt(audioBuffer.slice(0, audioBuffer.length / 2), 16000)); | ||
console.log(model.stt(audioBuffer.slice(0, audioBuffer.length / 2))); | ||
} | ||
@@ -129,0 +133,0 @@ const inference_stop = process.hrtime(inference_start); |
18
index.js
@@ -49,2 +49,11 @@ 'use strict'; | ||
/** | ||
* Return the sample rate expected by the model. | ||
* | ||
* @return {number} Sample rate. | ||
*/ | ||
Model.prototype.sampleRate = function() { | ||
return binding.GetModelSampleRate(this._impl); | ||
} | ||
/** | ||
* Enable decoding using beam scoring with a KenLM language model. | ||
@@ -68,5 +77,4 @@ * | ||
* | ||
* @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate. | ||
* @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). | ||
* @param {number} aBufferSize The number of samples in the audio signal. | ||
* @param {number} aSampleRate The sample-rate of the audio signal. | ||
* | ||
@@ -84,5 +92,4 @@ * @return {string} The STT result. Returns undefined on error. | ||
* | ||
* @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate. | ||
* @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). | ||
* @param {number} aBufferSize The number of samples in the audio signal. | ||
* @param {number} aSampleRate The sample-rate of the audio signal. | ||
* | ||
@@ -99,3 +106,2 @@ * @return {object} Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error. | ||
* | ||
* @param {number} aSampleRate The sample-rate of the audio signal. | ||
* @return {object} an opaque object that represents the streaming state. | ||
@@ -121,3 +127,3 @@ * | ||
* @param {buffer} aBuffer An array of 16-bit, mono raw audio samples at the | ||
* appropriate sample rate. | ||
* appropriate sample rate (matching what the model was trained on). | ||
* @param {number} aBufferSize The number of samples in @param aBuffer. | ||
@@ -124,0 +130,0 @@ */ |
{ | ||
"name" : "deepspeech-gpu", | ||
"version" : "0.6.0-alpha.8", | ||
"version" : "0.6.0-alpha.9", | ||
"description" : "DeepSpeech NodeJS bindings", | ||
@@ -11,5 +11,5 @@ "main" : "./index", | ||
"license": "MPL-2.0", | ||
"homepage": "https://github.com/mozilla/DeepSpeech/tree/v0.6.0-alpha.8#project-deepspeech", | ||
"homepage": "https://github.com/mozilla/DeepSpeech/tree/v0.6.0-alpha.9#project-deepspeech", | ||
"files": [ | ||
"README.md", | ||
"README.rst", | ||
"client.js", | ||
@@ -16,0 +16,0 @@ "index.js", |
144994265
46
354
92