deepspeech-gpu
Advanced tools
Comparing version 0.5.0-alpha.5 to 0.5.0-alpha.6
#!/usr/bin/env node | ||
'use strict'; | ||
const Fs = require('fs'); | ||
@@ -43,2 +45,7 @@ const Sox = require('sox-stream'); | ||
Ds.printVersions(); | ||
let runtime = 'Node'; | ||
if (process.versions.electron) { | ||
runtime = 'Electron'; | ||
} | ||
console.error('Runtime: ' + runtime); | ||
process.exit(0); | ||
@@ -53,3 +60,4 @@ } | ||
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'}); | ||
parser.addArgument(['--version'], {action: VersionAction, help: 'Print version and exits'}) | ||
parser.addArgument(['--version'], {action: VersionAction, help: 'Print version and exits'}); | ||
parser.addArgument(['--extended'], {action: 'storeTrue', help: 'Output string from extended metadata'}); | ||
var args = parser.parseArgs(); | ||
@@ -61,2 +69,10 @@ | ||
function metadataToString(metadata) { | ||
var retval = "" | ||
for (var i = 0; i < metadata.num_items; ++i) { | ||
retval += metadata.items[i].character; | ||
} | ||
return retval; | ||
} | ||
const buffer = Fs.readFileSync(args['audio']); | ||
@@ -95,3 +111,3 @@ const result = Wav.decode(buffer); | ||
audioStream.on('finish', () => { | ||
audioBuffer = audioStream.toBuffer(); | ||
let audioBuffer = audioStream.toBuffer(); | ||
@@ -119,5 +135,10 @@ console.error('Loading model from file %s', args['model']); | ||
// LocalDsSTT() expected a short* | ||
console.log(model.stt(audioBuffer.slice(0, audioBuffer.length / 2), 16000)); | ||
if (args['extended']) { | ||
console.log(metadataToString(model.sttWithMetadata(audioBuffer.slice(0, audioBuffer.length / 2), 16000))); | ||
} else { | ||
console.log(model.stt(audioBuffer.slice(0, audioBuffer.length / 2), 16000)); | ||
} | ||
const inference_stop = process.hrtime(inference_start); | ||
console.error('Inference took %ds for %ds audio file.', totalTime(inference_stop), audioLength.toPrecision(4)); | ||
process.exit(0); | ||
}); |
11
index.js
@@ -0,1 +1,3 @@ | ||
'use strict'; | ||
const binary = require('node-pre-gyp'); | ||
@@ -44,2 +46,7 @@ const path = require('path') | ||
Model.prototype.sttWithMetadata = function() { | ||
const args = [this._impl].concat(Array.prototype.slice.call(arguments)); | ||
return binding.SpeechToTextWithMetadata.apply(null, args); | ||
} | ||
Model.prototype.setupStream = function() { | ||
@@ -68,2 +75,6 @@ const args = [this._impl].concat(Array.prototype.slice.call(arguments)); | ||
Model.prototype.finishStreamWithMetadata = function() { | ||
return binding.FinishStreamWithMetadata.apply(null, arguments); | ||
} | ||
module.exports = { | ||
@@ -70,0 +81,0 @@ Model: Model, |
{ | ||
"name" : "deepspeech-gpu", | ||
"version" : "0.5.0-alpha.5", | ||
"version" : "0.5.0-alpha.6", | ||
"description" : "DeepSpeech NodeJS bindings", | ||
@@ -11,3 +11,3 @@ "main" : "./index", | ||
"license": "MPL-2.0", | ||
"homepage": "https://github.com/mozilla/DeepSpeech/tree/v0.5.0-alpha.5#project-deepspeech", | ||
"homepage": "https://github.com/mozilla/DeepSpeech/tree/v0.5.0-alpha.6#project-deepspeech", | ||
"files": [ | ||
@@ -43,5 +43,4 @@ "README.md", | ||
"scripts": { | ||
"install" : "node-pre-gyp install --fallback-to-build", | ||
"test": "node index.js" | ||
} | ||
} |
142
README.md
@@ -35,13 +35,13 @@ # Project DeepSpeech | ||
- [Getting the code](#getting-the-code) | ||
- [Getting the pre-trained model](#getting-the-pre-trained-model) | ||
- [Using the model](#using-the-model) | ||
- [Using a Pre-trained Model](#using-a-pre-trained-model) | ||
- [CUDA dependency](#cuda-dependency) | ||
- [Getting the pre-trained model](#getting-the-pre-trained-model) | ||
- [Model compatibility](#model-compatibility) | ||
- [Using the Python package](#using-the-python-package) | ||
- [Using the command-line client](#using-the-command-line-client) | ||
- [Using the Node.JS package](#using-the-nodejs-package) | ||
- [Using the Command Line client](#using-the-command-line-client) | ||
- [Installing bindings from source](#installing-bindings-from-source) | ||
- [Third party bindings](#third-party-bindings) | ||
- [Training](#training) | ||
- [Installing prerequisites for training](#installing-prerequisites-for-training) | ||
- [Training your own Model](#training-your-own-model) | ||
- [Installing training prerequisites](#installing-training-prerequisites) | ||
- [Recommendations](#recommendations) | ||
@@ -55,2 +55,3 @@ - [Common Voice training data](#common-voice-training-data) | ||
- [Continuing training from a release model](#continuing-training-from-a-release-model) | ||
- [Contribution guidelines](#contribution-guidelines) | ||
- [Contact/Getting Help](#contactgetting-help) | ||
@@ -73,24 +74,34 @@ | ||
## Getting the pre-trained model | ||
If you want to use the pre-trained English model for performing speech-to-text, you can download it (along with other important inference material) from the DeepSpeech [releases page](https://github.com/mozilla/DeepSpeech/releases). Alternatively, you can run the following command to download and unzip the model files in your current directory: | ||
## Using a Pre-trained Model | ||
```bash | ||
wget https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/deepspeech-0.4.1-models.tar.gz | ||
tar xvfz deepspeech-0.4.1-models.tar.gz | ||
``` | ||
## Using the model | ||
There are three ways to use DeepSpeech inference: | ||
- [The Python package](#using-the-python-package) | ||
- [The command-line client](#using-the-command-line-client) | ||
- [The Node.JS package](#using-the-nodejs-package) | ||
- [The Command-Line client](#using-the-command-line-client) | ||
Running `deepspeech` might require some runtime dependencies to be already installed on your system. Regardless of which bindings you are using, you will need the following: | ||
* libsox2 | ||
* libstdc++6 | ||
* libgomp1 | ||
* libpthread | ||
Please refer to your system's documentation on how to install these dependencies. | ||
### CUDA dependency | ||
The GPU capable builds (Python, NodeJS, C++ etc) depend on the same CUDA runtime as upstream TensorFlow. Currently with TensorFlow r1.12 it depends on CUDA 9.0 and CuDNN v7.2. | ||
The GPU capable builds (Python, NodeJS, C++, etc) depend on the same CUDA runtime as upstream TensorFlow. Make sure you've installed the correct version of CUDA | ||
### Getting the pre-trained model | ||
If you want to use the pre-trained English model for performing speech-to-text, you can download it (along with other important inference material) from the DeepSpeech [releases page](https://github.com/mozilla/DeepSpeech/releases). Alternatively, you can run the following command to download and unzip the model files in your current directory: | ||
```bash | ||
wget https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/deepspeech-0.4.1-models.tar.gz | ||
tar xvfz deepspeech-0.4.1-models.tar.gz | ||
``` | ||
### Model compatibility | ||
@@ -169,7 +180,28 @@ | ||
### Using the command-line client | ||
### Using the Node.JS package | ||
To download the pre-built binaries for the `deepspeech` command-line client, use `util/taskcluster.py`: | ||
You can download the Node.JS bindings using `npm`: | ||
```bash | ||
npm install deepspeech | ||
``` | ||
Please note that as of now, we only support Node.JS versions 4, 5 and 6. Once [SWIG has support](https://github.com/swig/swig/pull/968) we can build for newer versions. | ||
Alternatively, if you're using Linux and have a supported NVIDIA GPU, you can install the GPU specific package as follows: | ||
```bash | ||
npm install deepspeech-gpu | ||
``` | ||
See the [release notes](https://github.com/mozilla/DeepSpeech/releases) to find which GPUs are supported. Please ensure you have the required [CUDA dependency](#cuda-dependency). | ||
See [client.js](native_client/javascript/client.js) for an example of how to use the bindings. Or download the [wav example](examples/nodejs_wav). | ||
### Using the Command-Line client | ||
To download the pre-built binaries for the `deepspeech` command-line (compiled C++) client, use `util/taskcluster.py`: | ||
```bash | ||
python3 util/taskcluster.py --target . | ||
@@ -200,20 +232,2 @@ ``` | ||
### Using the Node.JS package | ||
You can download the Node.JS bindings using `npm`: | ||
```bash | ||
npm install deepspeech | ||
``` | ||
Alternatively, if you're using Linux and have a supported NVIDIA GPU, you can install the GPU specific package as follows: | ||
```bash | ||
npm install deepspeech-gpu | ||
``` | ||
See the [release notes](https://github.com/mozilla/DeepSpeech/releases) to find which GPUs are supported. Please ensure you have the required [CUDA dependency](#cuda-dependency). | ||
See [client.js](native_client/javascript/client.js) for an example of how to use the bindings. Or download the [wav example](examples/nodejs_wav). | ||
### Installing bindings from source | ||
@@ -232,5 +246,5 @@ | ||
## Training | ||
## Training Your Own Model | ||
### Installing prerequisites for training | ||
### Installing Training Prerequisites | ||
@@ -368,2 +382,54 @@ Install the required dependencies using `pip3`: | ||
## Contribution guidelines | ||
This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). | ||
Before making a Pull Request, check your changes for basic mistakes and style problems by using a linter. We have cardboardlinter setup in this repository, so for example, if you've made some changes and would like to run the linter on just the changed code, you can use the follow command: | ||
```bash | ||
pip install pylint cardboardlint | ||
cardboardlinter --refspec master | ||
``` | ||
This will compare the code against master and run the linter on all the changes. We plan to introduce more linter checks (e.g. for C++) in the future. To run it automatically as a git pre-commit hook, do the following: | ||
```bash | ||
cat <<\EOF > .git/hooks/pre-commit | ||
#!/bin/bash | ||
if [ ! -x "$(command -v cardboardlinter)" ]; then | ||
exit 0 | ||
fi | ||
# First, stash index and work dir, keeping only the | ||
# to-be-committed changes in the working directory. | ||
echo "Stashing working tree changes..." 1>&2 | ||
old_stash=$(git rev-parse -q --verify refs/stash) | ||
git stash save -q --keep-index | ||
new_stash=$(git rev-parse -q --verify refs/stash) | ||
# If there were no changes (e.g., `--amend` or `--allow-empty`) | ||
# then nothing was stashed, and we should skip everything, | ||
# including the tests themselves. (Presumably the tests passed | ||
# on the previous commit, so there is no need to re-run them.) | ||
if [ "$old_stash" = "$new_stash" ]; then | ||
echo "No changes, skipping lint." 1>&2 | ||
exit 0 | ||
fi | ||
# Run tests | ||
cardboardlinter --refspec HEAD^ -n auto | ||
status=$? | ||
# Restore changes | ||
echo "Restoring working tree changes..." 1>&2 | ||
git reset --hard -q && git stash apply --index -q && git stash drop -q | ||
# Exit with status from test-run: nonzero prevents commit | ||
exit $status | ||
EOF | ||
chmod +x .git/hooks/pre-commit | ||
``` | ||
This will run the linters on just the changes made in your commit. | ||
## Contact/Getting Help | ||
@@ -370,0 +436,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Native code
Supply chain riskContains native code (e.g., compiled binaries or shared libraries). Including native code can obscure malicious behavior.
Found 34 instances in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Install scripts
Supply chain riskInstall scripts are run when the package is installed. The majority of malware in npm is hidden in install scripts.
Found 1 instance in 1 package
Native code
Supply chain riskContains native code (e.g., compiled binaries or shared libraries). Including native code can obscure malicious behavior.
Found 18 instances in 1 package
334536448
38
180
439
0
34