Javascript Speechly API
See the generic Speechly gRPC stubs documentation for more information about using the API. This package is meant and tested for node.js server side implementations.
Installation
The gRPC libraries (@grpc/grpc-js
and google-protobuf
) are declared as peer dependencies, meaning that they need to be installed separately in the main package. This is to prevent multiple versions of @grpc/grpc-js
existing in the module tree. There is another gRPC implementation for node, grpc
, which is deprecated and not supported by this package.
npm install --save @grpc/grpc-js google-protobuf
npm install --save @speechly/api
Usage
The generated code can be used with Javascript or TypeScript code, but it only works on platforms that support ES or CommonJS modules. Messages and services are located within their specific packages and in separate files.
Creating a Client
Every gRPC service definition is bundled with a generic client that can be used to access the service. Create API clients like this:
const { credentials, Metadata } = require("@grpc/grpc-js");
const { IdentityAPIClient } = require("@speechly/api/speechly/identity/v2/identity_api_grpc_pb");
const identityClient = new IdentityAPIClient("api.speechly.com", credentials.createSsl());
const { SLUClient } = require("@speechly/api/speechly/slu/v1/slu_grpc_pb");
const sluClient = new SLUClient("api.speechly.com", credentials.createSsl());
The clients will use protobuf messages, which are included in this package:
const { LoginRequest, ApplicationScope, ProjectScope } = require("@speechly/api/speechly/identity/v2/identity_api_pb");
const { SLURequest, SLUConfig, SLUEvent } = require("@speechly/api/speechly/slu/v1/slu_pb");
IdentityAPI
Logging in with IdentityAPI is a single RPC call, wrapped here in a Promise
. The login
function takes either an appId
or a projectId
, and returns the resulting token with the expiry time.
async function login(deviceId, appId, projectId) {
return new Promise((resolve, reject) => {
const req = new LoginRequest();
req.setDeviceId(deviceId);
if (appId !== undefined) {
const app = new ApplicationScope();
app.setAppId(appId);
req.setApplication(app);
} else {
const project = new ProjectScope();
project.setProjectId(projectId);
req.setProject(project);
}
identityClient.login(req, (err, res) => {
if (err) {
reject(err);
}
resolve({
token: res.getToken(),
expires: new Date(res.getExpiresAt())
});
});
});
}
SLU
Streaming SLU is a bidirectional RPC call to the API, which means that it is not no simple as the login example above. The data
parameter is a nodejs stream, providing the audio data in Buffer
data events. The appId
is required if the token is project scoped.
async function stream_speech(data, appId, token) {
return new Promise((resolve, reject) => {
const md = new Metadata();
md.add("Authorization", `Bearer ${token}`);
const call = sluClient.stream(md);
const transcript = [];
const entities = [];
let intent = "";
call.on("data", d => {
if (d.hasStarted()) {
console.log("Started audio context", d.getAudioContext());
} else if (d.hasFinished()) {
console.log("Stopped audio context", d.getAudioContext());
} else if (d.hasTranscript()) {
transcript.push(d.getTranscript().getWord());
} else if (d.hasEntity()) {
entities.push(d.getEntity().getEntity());
} else if (d.hasIntent()) {
intent = d.getIntent().getIntent();
}
});
call.on("error", err => {
reject(err);
});
call.on("end", () => {
resolve({
intent,
entities: entities.join(", "),
transcript: transcript.join(" ")
});
});
const config = new SLUConfig();
config.setEncoding(SLUConfig.Encoding.LINEAR16);
config.setChannels(1);
config.setSampleRateHertz(16000);
const configReq = new SLURequest();
configReq.setConfig(config);
call.write(configReq);
const startContextReq = new SLURequest();
const startEvent = new SLUEvent();
startEvent.setEvent(SLUEvent.Event.START);
startEvent.setAppId(appId);
startContextReq.setEvent(startEvent);
call.write(startContextReq);
data.on("data", chunk => {
const req = new SLURequest();
req.setAudio(chunk);
call.write(req);
});
data.on("end", () => {
const stopContextReq = new SLURequest();
const stopEvent = new SLUEvent();
stopEvent.setEvent(SLUEvent.Event.STOP);
stopContextReq.setEvent(stopEvent);
call.write(stopContextReq);
call.end();
});
});
}
Combine and run!
(async () => {
try {
const projectId = "your_project_id";
const deviceId = "generated_UUID_for_device";
const appId = "your_app_id";
const loginRes = await login(deviceId, undefined, projectId);
const data = readAudioSource();
const res = await stream_speech(data, appId, loginRes.token);
console.log("Intent: ", res.intent);
console.log("Entities: ", res.entities);
console.log("Transcript: ", res.transcript);
} catch (err) {
console.error(err);
}
})();
While it is recommended to stream the audio directly sto the Speechly SLU API, as a test case a pre-recorded WAV file can be used. Note that the audio config message needs to match with the actual audio contained in the WAV file, otherwise the results will not be of good quality. You can resample recordings with eg. sox.
Reading a WAV file in a format usable in the above code, using the wav
npm package:
const wav = require("wav");
const fs = require("fs");
function readWAV(file) {
const fstream = fs.createReadStream(file);
const reader = new wav.Reader();
fstream.pipe(reader);
return reader;
}