deepspeech-gpu
Advanced tools
Comparing version 0.7.1 to 0.7.3
@@ -5,5 +5,2 @@ #!/usr/bin/env node | ||
/// <reference types="electron" /> | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
var __importStar = (this && this.__importStar) || function (mod) { | ||
@@ -16,4 +13,7 @@ if (mod && mod.__esModule) return mod; | ||
}; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
const index_1 = __importDefault(require("./index")); | ||
const Ds = __importStar(require("./index")); | ||
const Fs = __importStar(require("fs")); | ||
@@ -27,3 +27,3 @@ const sox_stream_1 = __importDefault(require("sox-stream")); | ||
call(parser, namespace, values, optionString) { | ||
console.log('DeepSpeech ' + index_1.default.Version()); | ||
console.log('DeepSpeech ' + Ds.Version()); | ||
let runtime = 'Node'; | ||
@@ -58,3 +58,3 @@ if (process.versions.electron) { | ||
const model_load_start = process.hrtime(); | ||
let model = new index_1.default.Model(args['model']); | ||
let model = new Ds.Model(args['model']); | ||
const model_load_end = process.hrtime(model_load_start); | ||
@@ -118,3 +118,3 @@ console.error('Loaded model in %ds.', totalTime(model_load_end)); | ||
console.log(candidateTranscriptToString(metadata.transcripts[0])); | ||
index_1.default.FreeMetadata(metadata); | ||
Ds.FreeMetadata(metadata); | ||
} | ||
@@ -127,3 +127,3 @@ else { | ||
console.error('Inference took %ds for %ds audio file.', totalTime(inference_stop), audioLength.toPrecision(4)); | ||
index_1.default.FreeModel(model); | ||
Ds.FreeModel(model); | ||
process.exit(0); | ||
@@ -136,2 +136,9 @@ }); | ||
stream.feedAudioContent(chunk); | ||
if (args['extended']) { | ||
let metadata = stream.intermediateDecodeWithMetadata(); | ||
console.error('intermediate: ' + candidateTranscriptToString(metadata.transcripts[0])); | ||
} | ||
else { | ||
console.error('intermediate: ' + stream.intermediateDecode()); | ||
} | ||
}); | ||
@@ -138,0 +145,0 @@ conversionStream.on('end', () => { |
286
index.d.ts
@@ -0,1 +1,2 @@ | ||
/// <reference types="node" /> | ||
/** | ||
@@ -5,7 +6,9 @@ * Stores text of an individual token, along with its timing information | ||
export interface TokenMetadata { | ||
/** The text corresponding to this token */ | ||
text: string; | ||
/** Position of the token in units of 20ms */ | ||
timestep: number; | ||
/** Position of the token in seconds */ | ||
start_time: number; | ||
} | ||
/** | ||
@@ -17,5 +20,9 @@ * A single transcript computed by the model, including a confidence value and | ||
tokens: TokenMetadata[]; | ||
/** | ||
* Approximated confidence value for this transcription. This is roughly the | ||
* sum of the acoustic model logit values for each timestep/token that | ||
* contributed to the creation of this transcription. | ||
*/ | ||
confidence: number; | ||
} | ||
/** | ||
@@ -27,97 +34,3 @@ * An array of CandidateTranscript objects computed by the model. | ||
} | ||
/** | ||
* An object providing an interface to a trained DeepSpeech model. | ||
* | ||
* @param aModelPath The path to the frozen model graph. | ||
* | ||
* @throws on error | ||
*/ | ||
export class Model { | ||
constructor(aModelPath: string) | ||
/** | ||
* Get beam width value used by the model. If :js:func:Model.setBeamWidth was | ||
* not called before, will return the default value loaded from the model file. | ||
* | ||
* @return Beam width value used by the model. | ||
*/ | ||
beamWidth(): number; | ||
/** | ||
* Set beam width value used by the model. | ||
* | ||
* @param The beam width used by the model. A larger beam width value generates better results at the cost of decoding time. | ||
* | ||
* @return Zero on success, non-zero on failure. | ||
*/ | ||
setBeamWidth(aBeamWidth: number): number; | ||
/** | ||
* Return the sample rate expected by the model. | ||
* | ||
* @return Sample rate. | ||
*/ | ||
sampleRate(): number; | ||
/** | ||
* Enable decoding using an external scorer. | ||
* | ||
* @param aScorerPath The path to the external scorer file. | ||
* | ||
* @return Zero on success, non-zero on failure (invalid arguments). | ||
*/ | ||
enableExternalScorer(aScorerPath: string): number; | ||
/** | ||
* Disable decoding using an external scorer. | ||
* | ||
* @return Zero on success, non-zero on failure (invalid arguments). | ||
*/ | ||
disableExternalScorer(): number; | ||
/** | ||
* Set hyperparameters alpha and beta of the external scorer. | ||
* | ||
* @param aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model weight. | ||
* @param aLMBeta The beta hyperparameter of the CTC decoder. Word insertion weight. | ||
* | ||
* @return Zero on success, non-zero on failure (invalid arguments). | ||
*/ | ||
setScorerAlphaBeta(aLMAlpha: number, aLMBeta: number): number; | ||
/** | ||
* Use the DeepSpeech model to perform Speech-To-Text. | ||
* | ||
* @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). | ||
* | ||
* @return The STT result. Returns undefined on error. | ||
*/ | ||
stt(aBuffer: object): string; | ||
/** | ||
* Use the DeepSpeech model to perform Speech-To-Text and output metadata | ||
* about the results. | ||
* | ||
* @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). | ||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. | ||
* Default value is 1 if not specified. | ||
* | ||
* @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. | ||
* The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error. | ||
*/ | ||
sttWithMetadata(aBuffer: object, aNumResults?: number): Metadata; | ||
/** | ||
* Create a new streaming inference state. One can then call :js:func:`Stream.feedAudioContent` and :js:func:`Stream.finishStream` on the returned stream object. | ||
* | ||
* @return a :js:func:`Stream` object that represents the streaming state. | ||
* | ||
* @throws on error | ||
*/ | ||
createStream(): Stream; | ||
} | ||
/** | ||
* @class | ||
* Provides an interface to a DeepSpeech stream. The constructor cannot be called | ||
@@ -127,47 +40,129 @@ * directly, use :js:func:`Model.createStream`. | ||
declare class Stream { | ||
/** | ||
* @param nativeStream SWIG wrapper for native StreamingState object. | ||
*/ | ||
constructor(nativeStream: object); | ||
/** | ||
* Feed audio samples to an ongoing streaming inference. | ||
* | ||
* @param aBuffer An array of 16-bit, mono raw audio samples at the | ||
* appropriate sample rate (matching what the model was trained on). | ||
*/ | ||
feedAudioContent(aBuffer: Buffer): void; | ||
/** | ||
* Compute the intermediate decoding of an ongoing streaming inference. | ||
* | ||
* @return The STT intermediate result. | ||
*/ | ||
intermediateDecode(): string; | ||
/** | ||
* Compute the intermediate decoding of an ongoing streaming inference, return results including metadata. | ||
* | ||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified. | ||
* | ||
* @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error. | ||
*/ | ||
intermediateDecodeWithMetadata(aNumResults?: number): Metadata; | ||
/** | ||
* Compute the final decoding of an ongoing streaming inference and return the result. Signals the end of an ongoing streaming inference. | ||
* | ||
* @return The STT result. | ||
* | ||
* This method will free the stream, it must not be used after this method is called. | ||
*/ | ||
finishStream(): string; | ||
/** | ||
* Compute the final decoding of an ongoing streaming inference and return the results including metadata. Signals the end of an ongoing streaming inference. | ||
* | ||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified. | ||
* | ||
* @return Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. | ||
* | ||
* This method will free the stream, it must not be used after this method is called. | ||
*/ | ||
finishStreamWithMetadata(aNumResults?: number): Metadata; | ||
} | ||
/** | ||
* Feed audio samples to an ongoing streaming inference. | ||
* | ||
* @param aBuffer An array of 16-bit, mono raw audio samples at the | ||
* appropriate sample rate (matching what the model was trained on). | ||
* An object providing an interface to a trained DeepSpeech model. | ||
*/ | ||
feedAudioContent(aBuffer: Buffer): void; | ||
/** | ||
* Compute the intermediate decoding of an ongoing streaming inference. | ||
* | ||
* @return The STT intermediate result. | ||
*/ | ||
intermediateDecode(aSctx: Stream): string; | ||
/** | ||
* Compute the intermediate decoding of an ongoing streaming inference, return results including metadata. | ||
* | ||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified. | ||
* | ||
* @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error. | ||
*/ | ||
intermediateDecodeWithMetadata (aNumResults?: number): Metadata; | ||
/** | ||
* Compute the final decoding of an ongoing streaming inference and return the result. Signals the end of an ongoing streaming inference. | ||
* | ||
* @return The STT result. | ||
* | ||
* This method will free the stream, it must not be used after this method is called. | ||
*/ | ||
finishStream(): string; | ||
/** | ||
* Compute the final decoding of an ongoing streaming inference and return the results including metadata. Signals the end of an ongoing streaming inference. | ||
* | ||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified. | ||
* | ||
* @return Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. | ||
* | ||
* This method will free the stream, it must not be used after this method is called. | ||
*/ | ||
finishStreamWithMetadata(aNumResults?: number): Metadata; | ||
export declare class Model { | ||
/** | ||
* @param aModelPath The path to the frozen model graph. | ||
* | ||
* @throws on error | ||
*/ | ||
constructor(aModelPath: string); | ||
/** | ||
* Get beam width value used by the model. If :js:func:`Model.setBeamWidth` was | ||
* not called before, will return the default value loaded from the model file. | ||
* | ||
* @return Beam width value used by the model. | ||
*/ | ||
beamWidth(): number; | ||
/** | ||
* Set beam width value used by the model. | ||
* | ||
* @param aBeamWidth The beam width used by the model. A larger beam width value generates better results at the cost of decoding time. | ||
* | ||
* @throws on error | ||
*/ | ||
setBeamWidth(aBeamWidth: number): void; | ||
/** | ||
* Return the sample rate expected by the model. | ||
* | ||
* @return Sample rate. | ||
*/ | ||
sampleRate(): number; | ||
/** | ||
* Enable decoding using an external scorer. | ||
* | ||
* @param aScorerPath The path to the external scorer file. | ||
* | ||
* @throws on error | ||
*/ | ||
enableExternalScorer(aScorerPath: string): void; | ||
/** | ||
* Disable decoding using an external scorer. | ||
* | ||
* @throws on error | ||
*/ | ||
disableExternalScorer(): void; | ||
/** | ||
* Set hyperparameters alpha and beta of the external scorer. | ||
* | ||
* @param aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model weight. | ||
* @param aLMBeta The beta hyperparameter of the CTC decoder. Word insertion weight. | ||
* | ||
* @throws on error | ||
*/ | ||
setScorerAlphaBeta(aLMAlpha: number, aLMBeta: number): void; | ||
/** | ||
* Use the DeepSpeech model to perform Speech-To-Text. | ||
* | ||
* @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). | ||
* | ||
* @return The STT result. Returns undefined on error. | ||
*/ | ||
stt(aBuffer: Buffer): string; | ||
/** | ||
* Use the DeepSpeech model to perform Speech-To-Text and output metadata | ||
* about the results. | ||
* | ||
* @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). | ||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. | ||
* Default value is 1 if not specified. | ||
* | ||
* @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. | ||
* The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error. | ||
*/ | ||
sttWithMetadata(aBuffer: Buffer, aNumResults?: number): Metadata; | ||
/** | ||
* Create a new streaming inference state. One can then call :js:func:`Stream.feedAudioContent` and :js:func:`Stream.finishStream` on the returned stream object. | ||
* | ||
* @return a :js:func:`Stream` object that represents the streaming state. | ||
* | ||
* @throws on error | ||
*/ | ||
createStream(): Stream; | ||
} | ||
/** | ||
@@ -179,4 +174,3 @@ * Frees associated resources and destroys model object. | ||
*/ | ||
export function FreeModel(model: Model): void; | ||
export declare function FreeModel(model: Model): void; | ||
/** | ||
@@ -187,4 +181,3 @@ * Free memory allocated for metadata information. | ||
*/ | ||
export function FreeMetadata(metadata: Metadata): void; | ||
export declare function FreeMetadata(metadata: Metadata): void; | ||
/** | ||
@@ -197,7 +190,8 @@ * Destroy a streaming state without decoding the computed logits. This | ||
*/ | ||
export function FreeStream(stream: Stream): void; | ||
export declare function FreeStream(stream: Stream): void; | ||
/** | ||
* Print version of this library and of the linked TensorFlow library on standard output. | ||
* Returns the version of this library. The returned version is a semantic | ||
* version (SemVer 2.0.0). | ||
*/ | ||
export function Version(): void; | ||
export declare function Version(): string; | ||
export {}; |
497
index.js
@@ -1,8 +0,10 @@ | ||
'use strict'; | ||
const binary = require('node-pre-gyp'); | ||
const path = require('path') | ||
"use strict"; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
const node_pre_gyp_1 = __importDefault(require("node-pre-gyp")); | ||
const path_1 = __importDefault(require("path")); | ||
// 'lib', 'binding', 'v0.1.1', ['node', 'v' + process.versions.modules, process.platform, process.arch].join('-'), 'deepspeech-bindings.node') | ||
const binding_path = binary.find(path.resolve(path.join(__dirname, 'package.json'))); | ||
const binding_path = node_pre_gyp_1.default.find(path_1.default.resolve(path_1.default.join(__dirname, 'package.json'))); | ||
// On Windows, we can't rely on RPATH being set to $ORIGIN/../ or on | ||
@@ -12,229 +14,219 @@ // @loader_path/../ but we can change the PATH to include the proper directory | ||
if (process.platform === 'win32') { | ||
const dslib_path = path.resolve(path.join(binding_path, '../..')); | ||
var oldPath = process.env.PATH; | ||
process.env['PATH'] = `${dslib_path};${process.env.PATH}`; | ||
const dslib_path = path_1.default.resolve(path_1.default.join(binding_path, '../..')); | ||
var oldPath = process.env.PATH; | ||
process.env['PATH'] = `${dslib_path};${process.env.PATH}`; | ||
} | ||
const binding = require(binding_path); | ||
if (process.platform === 'win32') { | ||
process.env['PATH'] = oldPath; | ||
process.env['PATH'] = oldPath; | ||
} | ||
/** | ||
* @class | ||
* An object providing an interface to a trained DeepSpeech model. | ||
* | ||
* @param {string} aModelPath The path to the frozen model graph. | ||
* | ||
* @throws on error | ||
* Provides an interface to a DeepSpeech stream. The constructor cannot be called | ||
* directly, use :js:func:`Model.createStream`. | ||
*/ | ||
function Model(aModelPath) { | ||
this._impl = null; | ||
const rets = binding.CreateModel(aModelPath); | ||
const status = rets[0]; | ||
const impl = rets[1]; | ||
if (status !== 0) { | ||
throw "CreateModel failed with '"+binding.ErrorCodeToErrorMessage(status)+"' (0x" + status.toString(16) + ")"; | ||
class Stream { | ||
/** | ||
* @param nativeStream SWIG wrapper for native StreamingState object. | ||
*/ | ||
constructor(nativeStream) { | ||
this._impl = nativeStream; | ||
} | ||
this._impl = impl; | ||
} | ||
/** | ||
* Get beam width value used by the model. If :js:func:Model.setBeamWidth was | ||
* not called before, will return the default value loaded from the model file. | ||
* | ||
* @return {number} Beam width value used by the model. | ||
*/ | ||
Model.prototype.beamWidth = function() { | ||
return binding.GetModelBeamWidth(this._impl); | ||
} | ||
/** | ||
* Set beam width value used by the model. | ||
* | ||
* @param {number} The beam width used by the model. A larger beam width value generates better results at the cost of decoding time. | ||
* | ||
* @return {number} Zero on success, non-zero on failure. | ||
*/ | ||
Model.prototype.setBeamWidth = function(aBeamWidth) { | ||
return binding.SetModelBeamWidth(this._impl, aBeamWidth); | ||
} | ||
/** | ||
* Return the sample rate expected by the model. | ||
* | ||
* @return {number} Sample rate. | ||
*/ | ||
Model.prototype.sampleRate = function() { | ||
return binding.GetModelSampleRate(this._impl); | ||
} | ||
/** | ||
* Enable decoding using an external scorer. | ||
* | ||
* @param {string} aScorerPath The path to the external scorer file. | ||
* | ||
* @throws on error | ||
*/ | ||
Model.prototype.enableExternalScorer = function(aScorerPath) { | ||
const status = binding.EnableExternalScorer(this._impl, aScorerPath); | ||
if (status !== 0) { | ||
throw "EnableExternalScorer failed with '"+binding.ErrorCodeToErrorMessage(status)+"' (0x" + status.toString(16) + ")"; | ||
/** | ||
* Feed audio samples to an ongoing streaming inference. | ||
* | ||
* @param aBuffer An array of 16-bit, mono raw audio samples at the | ||
* appropriate sample rate (matching what the model was trained on). | ||
*/ | ||
feedAudioContent(aBuffer) { | ||
binding.FeedAudioContent(this._impl, aBuffer); | ||
} | ||
/** | ||
* Compute the intermediate decoding of an ongoing streaming inference. | ||
* | ||
* @return The STT intermediate result. | ||
*/ | ||
intermediateDecode() { | ||
return binding.IntermediateDecode(this._impl); | ||
} | ||
/** | ||
* Compute the intermediate decoding of an ongoing streaming inference, return results including metadata. | ||
* | ||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified. | ||
* | ||
* @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error. | ||
*/ | ||
intermediateDecodeWithMetadata(aNumResults = 1) { | ||
return binding.IntermediateDecodeWithMetadata(this._impl, aNumResults); | ||
} | ||
/** | ||
* Compute the final decoding of an ongoing streaming inference and return the result. Signals the end of an ongoing streaming inference. | ||
* | ||
* @return The STT result. | ||
* | ||
* This method will free the stream, it must not be used after this method is called. | ||
*/ | ||
finishStream() { | ||
const result = binding.FinishStream(this._impl); | ||
this._impl = null; | ||
return result; | ||
} | ||
/** | ||
* Compute the final decoding of an ongoing streaming inference and return the results including metadata. Signals the end of an ongoing streaming inference. | ||
* | ||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified. | ||
* | ||
* @return Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. | ||
* | ||
* This method will free the stream, it must not be used after this method is called. | ||
*/ | ||
finishStreamWithMetadata(aNumResults = 1) { | ||
const result = binding.FinishStreamWithMetadata(this._impl, aNumResults); | ||
this._impl = null; | ||
return result; | ||
} | ||
} | ||
/** | ||
* Disable decoding using an external scorer. | ||
* | ||
* @return {number} Zero on success, non-zero on failure (invalid arguments). | ||
* An object providing an interface to a trained DeepSpeech model. | ||
*/ | ||
Model.prototype.disableExternalScorer = function() { | ||
return binding.EnableExternalScorer(this._impl); | ||
} | ||
/** | ||
* Set hyperparameters alpha and beta of the external scorer. | ||
* | ||
* @param {float} aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model weight. | ||
* @param {float} aLMBeta The beta hyperparameter of the CTC decoder. Word insertion weight. | ||
* | ||
* @return {number} Zero on success, non-zero on failure (invalid arguments). | ||
*/ | ||
Model.prototype.setScorerAlphaBeta = function(aLMAlpha, aLMBeta) { | ||
return binding.SetScorerAlphaBeta(this._impl, aLMAlpha, aLMBeta); | ||
} | ||
/** | ||
* Use the DeepSpeech model to perform Speech-To-Text. | ||
* | ||
* @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). | ||
* | ||
* @return {string} The STT result. Returns undefined on error. | ||
*/ | ||
Model.prototype.stt = function(aBuffer) { | ||
return binding.SpeechToText(this._impl, aBuffer); | ||
} | ||
/** | ||
* Use the DeepSpeech model to perform Speech-To-Text and output results including metadata. | ||
* | ||
* @param {object} aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). | ||
* @param {number} aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified. | ||
* | ||
* @return {object} :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error. | ||
*/ | ||
Model.prototype.sttWithMetadata = function(aBuffer, aNumResults) { | ||
aNumResults = aNumResults || 1; | ||
return binding.SpeechToTextWithMetadata(this._impl, aBuffer, aNumResults); | ||
} | ||
/** | ||
* Create a new streaming inference state. One can then call :js:func:`Stream.feedAudioContent` and :js:func:`Stream.finishStream` on the returned stream object. | ||
* | ||
* @return {object} a :js:func:`Stream` object that represents the streaming state. | ||
* | ||
* @throws on error | ||
*/ | ||
Model.prototype.createStream = function() { | ||
const rets = binding.CreateStream(this._impl); | ||
const status = rets[0]; | ||
const ctx = rets[1]; | ||
if (status !== 0) { | ||
throw "CreateStream failed with '"+binding.ErrorCodeToErrorMessage(status)+"' (0x" + status.toString(16) + ")"; | ||
class Model { | ||
/** | ||
* @param aModelPath The path to the frozen model graph. | ||
* | ||
* @throws on error | ||
*/ | ||
constructor(aModelPath) { | ||
this._impl = null; | ||
const [status, impl] = binding.CreateModel(aModelPath); | ||
if (status !== 0) { | ||
throw `CreateModel failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`; | ||
} | ||
this._impl = impl; | ||
} | ||
return new Stream(ctx); | ||
/** | ||
* Get beam width value used by the model. If :js:func:`Model.setBeamWidth` was | ||
* not called before, will return the default value loaded from the model file. | ||
* | ||
* @return Beam width value used by the model. | ||
*/ | ||
beamWidth() { | ||
return binding.GetModelBeamWidth(this._impl); | ||
} | ||
/** | ||
* Set beam width value used by the model. | ||
* | ||
* @param aBeamWidth The beam width used by the model. A larger beam width value generates better results at the cost of decoding time. | ||
* | ||
* @throws on error | ||
*/ | ||
setBeamWidth(aBeamWidth) { | ||
const status = binding.SetModelBeamWidth(this._impl, aBeamWidth); | ||
if (status !== 0) { | ||
throw `SetModelBeamWidth failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`; | ||
} | ||
} | ||
/** | ||
* Return the sample rate expected by the model. | ||
* | ||
* @return Sample rate. | ||
*/ | ||
sampleRate() { | ||
return binding.GetModelSampleRate(this._impl); | ||
} | ||
/** | ||
* Enable decoding using an external scorer. | ||
* | ||
* @param aScorerPath The path to the external scorer file. | ||
* | ||
* @throws on error | ||
*/ | ||
enableExternalScorer(aScorerPath) { | ||
const status = binding.EnableExternalScorer(this._impl, aScorerPath); | ||
if (status !== 0) { | ||
throw `EnableExternalScorer failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`; | ||
} | ||
} | ||
/** | ||
* Disable decoding using an external scorer. | ||
* | ||
* @throws on error | ||
*/ | ||
disableExternalScorer() { | ||
const status = binding.DisableExternalScorer(this._impl); | ||
if (status !== 0) { | ||
throw `DisableExternalScorer failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`; | ||
} | ||
} | ||
/** | ||
* Set hyperparameters alpha and beta of the external scorer. | ||
* | ||
* @param aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model weight. | ||
* @param aLMBeta The beta hyperparameter of the CTC decoder. Word insertion weight. | ||
* | ||
* @throws on error | ||
*/ | ||
setScorerAlphaBeta(aLMAlpha, aLMBeta) { | ||
const status = binding.SetScorerAlphaBeta(this._impl, aLMAlpha, aLMBeta); | ||
if (status !== 0) { | ||
throw `SetScorerAlphaBeta failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`; | ||
} | ||
} | ||
/** | ||
* Use the DeepSpeech model to perform Speech-To-Text. | ||
* | ||
* @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). | ||
* | ||
* @return The STT result. Returns undefined on error. | ||
*/ | ||
stt(aBuffer) { | ||
return binding.SpeechToText(this._impl, aBuffer); | ||
} | ||
/** | ||
* Use the DeepSpeech model to perform Speech-To-Text and output metadata | ||
* about the results. | ||
* | ||
* @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). | ||
* @param aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. | ||
* Default value is 1 if not specified. | ||
* | ||
* @return :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. | ||
* The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error. | ||
*/ | ||
sttWithMetadata(aBuffer, aNumResults = 1) { | ||
return binding.SpeechToTextWithMetadata(this._impl, aBuffer, aNumResults); | ||
} | ||
/** | ||
* Create a new streaming inference state. One can then call :js:func:`Stream.feedAudioContent` and :js:func:`Stream.finishStream` on the returned stream object. | ||
* | ||
* @return a :js:func:`Stream` object that represents the streaming state. | ||
* | ||
* @throws on error | ||
*/ | ||
createStream() { | ||
const [status, ctx] = binding.CreateStream(this._impl); | ||
if (status !== 0) { | ||
throw `CreateStream failed: ${binding.ErrorCodeToErrorMessage(status)} (0x${status.toString(16)})`; | ||
} | ||
return new Stream(ctx); | ||
} | ||
} | ||
exports.Model = Model; | ||
/** | ||
* @class | ||
* Provides an interface to a DeepSpeech stream. The constructor cannot be called | ||
* directly, use :js:func:`Model.createStream`. | ||
*/ | ||
function Stream(nativeStream) { | ||
this._impl = nativeStream; | ||
} | ||
/** | ||
* Feed audio samples to an ongoing streaming inference. | ||
* | ||
* @param {buffer} aBuffer An array of 16-bit, mono raw audio samples at the | ||
* appropriate sample rate (matching what the model was trained on). | ||
*/ | ||
Stream.prototype.feedAudioContent = function(aBuffer) { | ||
binding.FeedAudioContent(this._impl, aBuffer); | ||
} | ||
/** | ||
* Compute the intermediate decoding of an ongoing streaming inference. | ||
* | ||
* @return {string} The STT intermediate result. | ||
*/ | ||
Stream.prototype.intermediateDecode = function() { | ||
return binding.IntermediateDecode(this._impl); | ||
} | ||
/** | ||
* Compute the intermediate decoding of an ongoing streaming inference, return results including metadata. | ||
* | ||
* @param {number} aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified. | ||
* | ||
* @return {object} :js:func:`Metadata` object containing multiple candidate transcripts. Each transcript has per-token metadata including timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. Returns undefined on error. | ||
*/ | ||
Stream.prototype.intermediateDecodeWithMetadata = function(aNumResults) { | ||
aNumResults = aNumResults || 1; | ||
return binding.IntermediateDecode(this._impl, aNumResults); | ||
} | ||
/** | ||
* Compute the final decoding of an ongoing streaming inference and return the result. Signals the end of an ongoing streaming inference. | ||
* | ||
* @return {string} The STT result. | ||
* | ||
* This method will free the stream, it must not be used after this method is called. | ||
*/ | ||
Stream.prototype.finishStream = function() { | ||
let result = binding.FinishStream(this._impl); | ||
this._impl = null; | ||
return result; | ||
} | ||
/** | ||
* Compute the final decoding of an ongoing streaming inference and return the results including metadata. Signals the end of an ongoing streaming inference. | ||
* | ||
* @param {number} aNumResults Maximum number of candidate transcripts to return. Returned list might be smaller than this. Default value is 1 if not specified. | ||
* | ||
* @return {object} Outputs a :js:func:`Metadata` struct of individual letters along with their timing information. The user is responsible for freeing Metadata by calling :js:func:`FreeMetadata`. | ||
* | ||
* This method will free the stream, it must not be used after this method is called. | ||
*/ | ||
Stream.prototype.finishStreamWithMetadata = function(aNumResults) { | ||
aNumResults = aNumResults || 1; | ||
let result = binding.FinishStreamWithMetadata(this._impl, aNumResults); | ||
this._impl = null; | ||
return result; | ||
} | ||
/** | ||
* Frees associated resources and destroys model object. | ||
* | ||
* @param {object} model A model pointer returned by :js:func:`Model` | ||
* @param model A model pointer returned by :js:func:`Model` | ||
* | ||
*/ | ||
function FreeModel(model) { | ||
return binding.FreeModel(model._impl); | ||
binding.FreeModel(model._impl); | ||
} | ||
exports.FreeModel = FreeModel; | ||
/** | ||
* Free memory allocated for metadata information. | ||
* | ||
* @param {object} metadata Object containing metadata as returned by :js:func:`Model.sttWithMetadata` or :js:func:`Stream.finishStreamWithMetadata` | ||
* @param metadata Object containing metadata as returned by :js:func:`Model.sttWithMetadata` or :js:func:`Stream.finishStreamWithMetadata` | ||
*/ | ||
function FreeMetadata(metadata) { | ||
return binding.FreeMetadata(metadata); | ||
binding.FreeMetadata(metadata); | ||
} | ||
exports.FreeMetadata = FreeMetadata; | ||
/** | ||
@@ -245,10 +237,11 @@ * Destroy a streaming state without decoding the computed logits. This | ||
* | ||
* @param {Object} stream A stream object returned by :js:func:`Model.createStream`. | ||
* @param stream A streaming state pointer returned by :js:func:`Model.createStream`. | ||
*/ | ||
function FreeStream(stream) { | ||
return binding.FreeStream(stream._impl); | ||
binding.FreeStream(stream._impl); | ||
} | ||
exports.FreeStream = FreeStream; | ||
/** | ||
* Print version of this library and of the linked TensorFlow library on standard output. | ||
* Returns the version of this library. The returned version is a semantic | ||
* version (SemVer 2.0.0). | ||
*/ | ||
@@ -258,82 +251,2 @@ function Version() { | ||
} | ||
//// Metadata, CandidateTranscript and TokenMetadata are here only for documentation purposes | ||
/** | ||
* @class | ||
* | ||
* Stores text of an individual token, along with its timing information | ||
*/ | ||
function TokenMetadata() {} | ||
/** | ||
* The text corresponding to this token | ||
* | ||
* @return {string} The text generated | ||
*/ | ||
TokenMetadata.prototype.text = function() {} | ||
/** | ||
* Position of the token in units of 20ms | ||
* | ||
* @return {int} The position of the token | ||
*/ | ||
TokenMetadata.prototype.timestep = function() {}; | ||
/** | ||
* Position of the token in seconds | ||
* | ||
* @return {float} The position of the token | ||
*/ | ||
TokenMetadata.prototype.start_time = function() {}; | ||
/** | ||
* @class | ||
* | ||
* A single transcript computed by the model, including a confidence value and | ||
* the metadata for its constituent tokens. | ||
*/ | ||
function CandidateTranscript () {} | ||
/** | ||
* Array of tokens | ||
* | ||
* @return {array} Array of :js:func:`TokenMetadata` | ||
*/ | ||
CandidateTranscript.prototype.tokens = function() {} | ||
/** | ||
* Approximated confidence value for this transcription. This is roughly the | ||
* sum of the acoustic model logit values for each timestep/token that | ||
* contributed to the creation of this transcription. | ||
* | ||
* @return {float} Confidence value | ||
*/ | ||
CandidateTranscript.prototype.confidence = function() {} | ||
/** | ||
* @class | ||
* | ||
* An array of CandidateTranscript objects computed by the model. | ||
*/ | ||
function Metadata () {} | ||
/** | ||
* Array of transcripts | ||
* | ||
* @return {array} Array of :js:func:`CandidateTranscript` objects | ||
*/ | ||
Metadata.prototype.transcripts = function() {} | ||
module.exports = { | ||
Model: Model, | ||
Metadata: Metadata, | ||
CandidateTranscript: CandidateTranscript, | ||
TokenMetadata: TokenMetadata, | ||
Version: Version, | ||
FreeModel: FreeModel, | ||
FreeStream: FreeStream, | ||
FreeMetadata: FreeMetadata | ||
}; | ||
exports.Version = Version; |
{ | ||
"name": "deepspeech-gpu", | ||
"version": "0.7.1", | ||
"version": "0.7.3", | ||
"description": "DeepSpeech NodeJS bindings", | ||
@@ -12,3 +12,3 @@ "main": "./index.js", | ||
"license": "MPL-2.0", | ||
"homepage": "https://github.com/mozilla/DeepSpeech/tree/v0.7.1#project-deepspeech", | ||
"homepage": "https://github.com/mozilla/DeepSpeech/tree/v0.7.3#project-deepspeech", | ||
"files": [ | ||
@@ -36,3 +36,3 @@ "README.md", | ||
"dependencies": { | ||
"node-pre-gyp": "0.14.x", | ||
"node-pre-gyp": "0.15.x", | ||
"argparse": "1.0.x", | ||
@@ -46,3 +46,4 @@ "sox-stream": "2.0.x", | ||
"node-gyp": "4.x - 5.x", | ||
"typescript": "3.6.x", | ||
"typescript": "3.8.x", | ||
"typedoc": "0.17.x", | ||
"@types/argparse": "1.0.x", | ||
@@ -49,0 +50,0 @@ "@types/node": "13.9.x" |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
144785222
31
6
586
+ Addednode-pre-gyp@0.15.0(transitive)
- Removednode-pre-gyp@0.14.0(transitive)
Updatednode-pre-gyp@0.15.x