Socket
Socket
Sign inDemoInstall

web-speech-cognitive-services

Package Overview
Dependencies
Maintainers
1
Versions
153
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

web-speech-cognitive-services - npm Package Compare versions

Comparing version 0.0.1-master.869e22a to 0.0.1-master.da41f39

336

lib/CognitiveServicesSpeechRecognition.js

@@ -7,2 +7,4 @@ 'use strict';

var _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; };
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();

@@ -14,2 +16,6 @@

var _eventAsPromise = require('event-as-promise');
var _eventAsPromise2 = _interopRequireDefault(_eventAsPromise);
var _memoizeOne = require('memoize-one');

@@ -23,2 +29,4 @@

function _objectWithoutProperties(obj, keys) { var target = {}; for (var i in obj) { if (keys.indexOf(i) >= 0) continue; if (!Object.prototype.hasOwnProperty.call(obj, i)) continue; target[i] = obj[i]; } return target; }
function _asyncToGenerator(fn) { return function () { var gen = fn.apply(this, arguments); return new Promise(function (resolve, reject) { function step(key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { return Promise.resolve(value).then(function (value) { step("next", value); }, function (err) { step("throw", err); }); } } return step("next"); }); }; }

@@ -77,6 +85,6 @@

var name = 'Browser';
var osVersion = "0.0.1-master.869e22a";
var osVersion = "0.0.1-master.da41f39";
var manufacturer = 'web-speech-cognitive-services';
var model = 'web-speech-cognitive-services';
var deviceVersion = "0.0.1-master.869e22a";
var deviceVersion = "0.0.1-master.da41f39";

@@ -144,139 +152,202 @@ var config = new CognitiveSpeech.RecognizerConfig(new CognitiveSpeech.SpeechConfig(new CognitiveSpeech.Context(new CognitiveSpeech.OS(platform, name, osVersion), new CognitiveSpeech.Device(manufacturer, model, deviceVersion))), mode, lang, CognitiveSpeech.SpeechResultFormat.Detailed);

_createClass(CognitiveServicesSpeechRecognition, [{
key: '_transitTo',
value: function _transitTo(nextReadyState) {
// console.log(`_transitTo: readyState = ${ this.readyState }, nextReadyState = ${ nextReadyState }`);
key: 'abort',
value: function abort() {
// TODO: Should redesign how to stop a recognition session
// After abort is called, we should not saw it is a "success", "silent", or "no match"
var _ref3 = this.recognizer || {},
AudioSource = _ref3.AudioSource;
if (nextReadyState > this.readyState) {
var lifecycleEvents = [null, null, this.onstart, this.onaudiostart, this.onsoundstart, this.onspeechstart, this.onspeechend, this.onsoundend, this.onaudioend, this.onend];
AudioSource && AudioSource.TurnOff();
if (this.readyState === AUDIO_START && nextReadyState >= AUDIO_END) {
// If soundstart, speechstart, speechend, and soundend are not fired after audiostart,
// we can skip them and just fire audioend directly
this.readyState = SOUND_END;
}
this._aborted = true;
}
}, {
key: 'emit',
value: function emit(name, event) {
var listener = this['on' + name];
for (var transition = this.readyState + 1; transition <= nextReadyState; transition++) {
var eventListener = lifecycleEvents[transition];
// eventListener && console.log(`Firing "${ EVENT_TYPES[transition] }"`);
eventListener && eventListener({ type: EVENT_TYPES[transition] });
}
if (nextReadyState === END) {
this.readyState = IDLE;
} else {
this.readyState = nextReadyState;
}
}
listener && listener.call(this, _extends({}, event, { type: name }));
}
}, {
key: '_handleDetailedPhrase',
value: function _handleDetailedPhrase(event) {
console.log(event);
key: 'stop',
value: function stop() {
throw new Error('not supported');
}
}, {
key: 'start',
value: function () {
var _ref4 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee3() {
var recognizer, _toPromise, eventListener, promises, error, listeningStarted, recognitionStarted, gotFirstHypothesis, speechHypothesis, speechDetailedPhrase, recognitionResult;
this._transitTo(AUDIO_END);
return regeneratorRuntime.wrap(function _callee3$(_context3) {
while (1) {
switch (_context3.prev = _context3.next) {
case 0:
recognizer = this.recognizer = this.createRecognizer(window.localStorage.getItem('SPEECH_KEY'), this.lang);
_toPromise = toPromise(), eventListener = _toPromise.eventListener, promises = _objectWithoutProperties(_toPromise, ['eventListener']);
if (CognitiveSpeech.RecognitionStatus[event.Result.RecognitionStatus] === CognitiveSpeech.RecognitionStatus.Success) {
var nBest = event.Result.NBest;
recognizer.Recognize(eventListener);
this._aborted = false;
this.onresult && this.onresult(buildSpeechResult(event.Result.NBest[0].Display, event.Result.NBest[0].Confidence, true));
} else {
this.onerror && this.onerror({ error: event.Result.RecognitionStatus, type: 'error' });
}
}
}, {
key: '_handleHypothesis',
value: function _handleHypothesis(event) {
console.log(event);
_context3.next = 6;
return promises.recognitionTriggered;
this.onresult && this.onresult(buildSpeechResult(event.Result.Text, .5, false));
}
}, {
key: 'abort',
value: function abort() {
var _ref3 = this.recognizer || {},
AudioSource = _ref3.AudioSource;
case 6:
error = void 0;
_context3.next = 9;
return Promise.race([promises.listeningStarted, promises.recognitionEnded]);
console.log('ABORT: ' + AudioSource);
case 9:
listeningStarted = _context3.sent;
AudioSource && AudioSource.TurnOff();
}
}, {
key: 'handleRecognize',
value: function handleRecognize(event) {
try {
var name = event.Name;
if (!(listeningStarted.Name === 'RecognitionEndedEvent')) {
_context3.next = 14;
break;
}
// Possibly not authorized to use microphone
if (listeningStarted.Status === CognitiveSpeech.RecognitionCompletionStatus.AudioSourceError) {
error = 'not-allowed';
} else {
error = CognitiveSpeech.RecognitionCompletionStatus[listeningStarted.Status];
}
_context3.next = 51;
break;
console.log('handleRecognize: ' + name);
case 14:
this.emit('start');
switch (name) {
case 'ListeningStartedEvent':
this._transitTo(AUDIO_START);
break;
_context3.next = 17;
return promises.connectingToService;
case 'RecognitionEndedEvent':
if (event.Status !== CognitiveSpeech.RecognitionCompletionStatus.Success) {
this._transitTo(AUDIO_END);
this.onerror && this.onerror({ error: CognitiveSpeech.RecognitionCompletionStatus[event.Status], type: 'error' });
}
case 17:
_context3.next = 19;
return Promise.race([promises.recognitionStarted, promises.recognitionEnded]);
this._transitTo(END);
case 19:
recognitionStarted = _context3.sent;
break;
case 'RecognitionStartedEvent':
this._transitTo(SPEECH_START);
break;
this.emit('audiostart');
case 'RecognitionTriggeredEvent':
this._transitTo(START);
break;
if (!(recognitionStarted.Name === 'RecognitionEndedEvent')) {
_context3.next = 25;
break;
}
case 'SpeechEndDetectedEvent':
this._transitTo(SPEECH_END);
break;
// Possibly network error
if (recognitionStarted.Status === CognitiveSpeech.RecognitionCompletionStatus.ConnectError) {
error = 'network';
} else {
error = CognitiveSpeech.RecognitionCompletionStatus[recognitionStarted.Status];
}
_context3.next = 36;
break;
case 'SpeechStartDetectedEvent':
this._transitTo(SPEECH_START);
break;
case 25:
gotFirstHypothesis = void 0;
case 'SpeechHypothesisEvent':
this._handleHypothesis(event);
break;
case 26:
_context3.next = 28;
return Promise.race([promises.getSpeechHypothesisPromise(), promises.speechEndDetected]);
case 'SpeechDetailedPhraseEvent':
this._handleDetailedPhrase(event);
break;
case 28:
speechHypothesis = _context3.sent;
case 'ConnectingToServiceEvent':
case 'SpeechSimplePhraseEvent':
break;
if (!(speechHypothesis.Name === 'SpeechEndDetectedEvent')) {
_context3.next = 31;
break;
}
default:
console.warn('Unexpected event "' + name + '" from Cognitive Services, please file a bug to https://github.com/compulim/web-speech-cognitive-services');
break;
}
} catch (err) {
// Cognitive Services will hide all exceptions thrown in the event listener
// We need to show it otherwise when exception happen, we will not know what's going on
console.error(err);
throw err;
return _context3.abrupt('break', 35);
case 31:
if (!gotFirstHypothesis) {
gotFirstHypothesis = true;
this.emit('soundstart');
this.emit('speechstart');
}
this.emit('result', buildSpeechResult(speechHypothesis.Result.Text, .5, false));
case 33:
_context3.next = 26;
break;
case 35:
if (gotFirstHypothesis) {
this.emit('speechend');
this.emit('soundend');
}
case 36:
this.emit('audioend');
if (!this._aborted) {
_context3.next = 43;
break;
}
error = 'aborted';
_context3.next = 41;
return promises.recognitionEnded;
case 41:
_context3.next = 51;
break;
case 43:
_context3.next = 45;
return Promise.race([promises.speechDetailedPhrase, promises.recognitionEnded]);
case 45:
speechDetailedPhrase = _context3.sent;
if (!(speechDetailedPhrase.Name !== 'RecognitionEndedEvent')) {
_context3.next = 51;
break;
}
recognitionResult = CognitiveSpeech.RecognitionStatus[speechDetailedPhrase.Result.RecognitionStatus];
if (recognitionResult === CognitiveSpeech.RecognitionStatus.Success) {
this.emit('result', buildSpeechResult(speechDetailedPhrase.Result.NBest[0].Display, speechDetailedPhrase.Result.NBest[0].Confidence, true));
} else if (recognitionResult !== CognitiveSpeech.RecognitionStatus.NoMatch) {
// Possibly silent or muted
if (recognitionResult === CognitiveSpeech.RecognitionStatus.InitialSilenceTimeout) {
error = 'no-speech';
} else {
error = speechDetailedPhrase.Result.RecognitionStatus;
}
}
_context3.next = 51;
return promises.recognitionEnded;
case 51:
error && this.emit('error', { error: error });
this.emit('end');
case 53:
case 'end':
return _context3.stop();
}
}
}, _callee3, this);
}));
function start() {
return _ref4.apply(this, arguments);
}
}
}, {
key: 'start',
value: function start() {
this.recognizer = this.createRecognizer(window.localStorage.getItem('SPEECH_KEY'), this.lang);
this.recognizer.Recognize(this.handleRecognize.bind(this));
this._transitTo(START);
}
return start;
}()
}, {
key: 'stop',
value: function stop() {
throw new Error('not supported');
}
}, {
key: 'grammars',

@@ -336,3 +407,46 @@ get: function get() {

function toPromise() {
var events = {
ConnectingToServiceEvent: new _eventAsPromise2.default(),
ListeningStartedEvent: new _eventAsPromise2.default(),
RecognitionEndedEvent: new _eventAsPromise2.default(),
RecognitionStartedEvent: new _eventAsPromise2.default(),
RecognitionTriggeredEvent: new _eventAsPromise2.default(),
SpeechDetailedPhraseEvent: new _eventAsPromise2.default(),
SpeechEndDetectedEvent: new _eventAsPromise2.default(),
SpeechHypothesisEvent: new _eventAsPromise2.default(),
SpeechSimplePhraseEvent: new _eventAsPromise2.default(),
SpeechStartDetectedEvent: new _eventAsPromise2.default()
};
return {
connectingToService: events.ConnectingToServiceEvent.upcoming(),
listeningStarted: events.ListeningStartedEvent.upcoming(),
recognitionEnded: events.RecognitionEndedEvent.upcoming(),
recognitionStarted: events.RecognitionStartedEvent.upcoming(),
recognitionTriggered: events.RecognitionTriggeredEvent.upcoming(),
speechDetailedPhrase: events.SpeechDetailedPhraseEvent.upcoming(),
speechEndDetected: events.SpeechEndDetectedEvent.upcoming(),
getSpeechHypothesisPromise: function getSpeechHypothesisPromise() {
return events.SpeechHypothesisEvent.upcoming();
},
speechSimplePhrase: events.SpeechSimplePhraseEvent.upcoming(),
speechStartDetected: events.SpeechStartDetectedEvent.upcoming(),
eventListener: function eventListener(event) {
var name = event.Name;
var eventAsPromise = events[name];
console.log('handling ' + name);
if (eventAsPromise) {
eventAsPromise.eventListener.call(null, event);
} else {
console.warn('Unexpected event "' + name + '" from Cognitive Services, please file a bug to https://github.com/compulim/web-speech-cognitive-services');
}
}
};
}
exports.default = CognitiveServicesSpeechRecognition;
//# sourceMappingURL=data:application/json;charset=utf-8;base64,
//# sourceMappingURL=data:application/json;charset=utf-8;base64,
{
"name": "web-speech-cognitive-services",
"version": "0.0.1-master.869e22a",
"version": "0.0.1-master.da41f39",
"description": "",

@@ -31,8 +31,10 @@ "keywords": [],

"babel-plugin-version-transform": "^1.0.0",
"babel-polyfill": "^6.26.0",
"babel-preset-stage-3": "^6.24.1",
"classnames": "^2.2.6",
"event-as-promise": "^1.0.3",
"glamor": "^2.20.40",
"jest": "^22.4.4",
"lerna": "^2.11.0",
"microsoft-speech-browser-sdk": "0.0.12",
"microsoft-speech-browser-sdk": "^0.0.12",
"react": "^16.4.1",

@@ -39,0 +41,0 @@ "react-dictate-button": "^1.0.0",

@@ -7,2 +7,33 @@ # web-speech-cognitive-services

## Test matrix
Browsers are all latest as of 2018-06-28, except macOS was 10.13.1 (2017-10-31), instead of 10.13.5. But there should be no change on the matrix since Safari does not support Web Speech API.
Overall in point form:
* With Web Speech API only, web dev can enable speech recognition on most popular platforms, except iOS
* iOS: No browsers on iOS support Web Speech API
* Some platforms requires non-default browser
* With Cognitive Services Speech-to-Text, all popular platforms with their default browsers are supported
* iOS: Chrome and Edge does not support Cognitive Services because WebRTC is disabled
| Platform | OS | Browser | Cognitive Services (WebRTC) | Web Speech API |
| - | - | - | - | - |
| PC | Windows 10 (1803) | Chrome 67.0.3396.99 | Yes | Yes |
| PC | Windows 10 (1803) | Edge 42.17134.1.0 | Yes | No, `SpeechRecognition` not implemented |
| PC | Windows 10 (1803) | Firefox 61.0 | Yes | No, `SpeechRecognition` not implemented |
| MacBook Pro | macOS High Sierra 10.13.1 | Chrome 67.0.3396.99 | Yes | Yes |
| MacBook Pro | macOS High Sierra 10.13.1 | Safari 11.0.1 | Yes | No, `SpeechRecognition` not implemented |
| Apple iPhone X | iOS 11.4 | Chrome 67.0.3396.87 | No, `AudioSourceError` | No, `SpeechRecognition` not implemented |
| Apple iPhone X | iOS 11.4 | Edge 42.2.2.0 | No, `AudioSourceError` | No, `SpeechRecognition` not implemented |
| Apple iPhone X | iOS 11.4 | Safari | Yes | No, `SpeechRecognition` not implemented |
| Apple iPod (6th gen) | iOS 11.4 | Chrome 67.0.3396.87 | No, `AudioSourceError` | No, `SpeechRecognition` not implemented |
| Apple iPod (6th gen) | iOS 11.4 | Edge 42.2.2.0 | No, `AudioSourceError` | No, `SpeechRecognition` not implemented |
| Apple iPod (6th gen) | iOS 11.4 | Safari | No, `AudioSourceError` | No, `SpeechRecognition` not implemented |
| Google Pixel 2 | Android 8.1.0 | Chrome 67.0.3396.87 | Yes | Yes |
| Google Pixel 2 | Android 8.1.0 | Edge 42.0.0.2057 | Yes | Yes |
| Google Pixel 2 | Android 8.1.0 | Firefox 60.1.0 | Yes | Yes |
| Microsoft Lumia 950 | Windows 10 (1709) | Edge 40.15254.489.0 | No, `AudioSourceError` | No, `SpeechRecognition` not implemented |
| Microsoft Xbox One | Windows 10 (?) | Edge ? | No, `AudioSourceError` | No, `SpeechRecognition` not implemented |
## Event lifecycle mapping from Cognitive Services

@@ -24,35 +55,90 @@

* Happy path
1. `RecognitionTriggeredEvent`
2. `ListeningStartedEvent`
3. `ConnectingToServiceEvent`
4. `RecognitionStartedEvent`
5. `SpeechHypothesisEvent` (could be more than one)
6. `SpeechEndDetectedEvent`
7. `SpeechSimplePhraseEvent`
8. `RecognitionEndedEvent`
* Cognitive Services
1. `RecognitionTriggeredEvent`
2. `ListeningStartedEvent`
3. `ConnectingToServiceEvent`
4. `RecognitionStartedEvent`
5. `SpeechHypothesisEvent` (could be more than one)
6. `SpeechEndDetectedEvent`
7. `SpeechDetailedPhraseEvent`
8. `RecognitionEndedEvent`
* Web Speech API
1. `start`
2. `audiostart`
3. `soundstart`
4. `speechstart`
5. `result` (multiple times)
6. `speechend`
7. `soundend`
8. `audioend`
9. `result(results = [{ isFinal = true }])`
10. `end`
* Abort is called during recognition
* Cognitive Services
* Essentially muted the speech, that could still result in success, silent, or no match
* Web Speech API
1. `start`
2. `audiostart`
3. `soundstart` (optional)
4. `speechstart` (optional)
5. `result` (optional)
6. `speechend` (optional)
7. `soundend` (optional)
8. `audioend`
9. `error(error = 'aborted')`
10. `end`
* Network issues
1. `RecognitionTriggeredEvent`
2. `ListeningStartedEvent`
3. `ConnectingToServiceEvent`
4. `SpeechSimplePhraseEvent`
5. `RecognitionEndedEvent`
* Cognitive Services
1. `RecognitionTriggeredEvent`
2. `ListeningStartedEvent`
3. `ConnectingToServiceEvent`
5. `RecognitionEndedEvent(Result.RecognitionStatus = 'ConnectError')`
* Web Speech API
1. `start`
2. `audiostart`
3. `audioend`
4. `error(error = 'network')`
5. `end`
* Audio muted or volume too low
1. `RecognitionTriggeredEvent`
2. `ListeningStartedEvent`
3. `ConnectingToServiceEvent`
4. `RecognitionStartedEvent`
5. `SpeechEndDetectedEvent`
6. `SpeechSimplePhraseEvent(Result.RecognitionStatus = 'InitialSilenceTimeout')`
7. `RecognitionEndedEvent`
* Cognitive Services
1. `RecognitionTriggeredEvent`
2. `ListeningStartedEvent`
3. `ConnectingToServiceEvent`
4. `RecognitionStartedEvent`
5. `SpeechEndDetectedEvent`
6. `SpeechDetailedPhraseEvent(Result.RecognitionStatus = 'InitialSilenceTimeout')`
7. `RecognitionEndedEvent`
* Web Speech API
1. `start`
2. `audiostart`
3. `audioend`
4. `error(error = 'no-speech')`
5. `end`
* Failed to recognize speech (a.k.a. no match)
1. `RecognitionTriggeredEvent`
2. `ListeningStartedEvent`
3. `ConnectingToServiceEvent`
4. `RecognitionStartedEvent`
5. `SpeechHypothesisEvent` (could be more than one)
6. `SpeechEndDetectedEvent`
7. `SpeechSimplePhraseEvent(Result.RecognitionStatus = 'NoMatch')`
8. `RecognitionEndedEvent`
* User abort
* Essentially muted the speech, that could result in success, silent, or no match
* Cognitive Services
1. `RecognitionTriggeredEvent`
2. `ListeningStartedEvent`
3. `ConnectingToServiceEvent`
4. `RecognitionStartedEvent`
5. `SpeechHypothesisEvent` (could be more than one)
6. `SpeechEndDetectedEvent`
7. `SpeechDetailedPhraseEvent(Result.RecognitionStatus = 'NoMatch')`
8. `RecognitionEndedEvent`
* Web Speech API
1. `start`
2. `audiostart`
3. `soundstart`
4. `speechstart`
5. `result`
6. `speechend`
7. `soundend`
8. `audioend`
9. `end`
* Not authorized to use microphone
* Cognitive Services
1. `RecognitionTriggeredEvent`
2. `RecognitionEndedEvent(Result.RecognitionStatus = 'AudioSourceError')`
* Web Speech API
1. `error(error = 'not-allowed')`
2. `end`

@@ -59,0 +145,0 @@ # Contributions

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc