web-speech-cognitive-services
Advanced tools
Comparing version 0.0.1-master.e70a05d to 0.0.1-master.f80884e
@@ -7,2 +7,4 @@ 'use strict'; | ||
var _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; | ||
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); | ||
@@ -14,2 +16,6 @@ | ||
var _eventAsPromise = require('event-as-promise'); | ||
var _eventAsPromise2 = _interopRequireDefault(_eventAsPromise); | ||
var _memoizeOne = require('memoize-one'); | ||
@@ -23,2 +29,4 @@ | ||
function _objectWithoutProperties(obj, keys) { var target = {}; for (var i in obj) { if (keys.indexOf(i) >= 0) continue; if (!Object.prototype.hasOwnProperty.call(obj, i)) continue; target[i] = obj[i]; } return target; } | ||
function _asyncToGenerator(fn) { return function () { var gen = fn.apply(this, arguments); return new Promise(function (resolve, reject) { function step(key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { return Promise.resolve(value).then(function (value) { step("next", value); }, function (err) { step("throw", err); }); } } return step("next"); }); }; } | ||
@@ -77,6 +85,6 @@ | ||
var name = 'Browser'; | ||
var osVersion = "0.0.1-master.e70a05d"; | ||
var osVersion = "0.0.1-master.f80884e"; | ||
var manufacturer = 'web-speech-cognitive-services'; | ||
var model = 'web-speech-cognitive-services'; | ||
var deviceVersion = "0.0.1-master.e70a05d"; | ||
var deviceVersion = "0.0.1-master.f80884e"; | ||
@@ -144,139 +152,202 @@ var config = new CognitiveSpeech.RecognizerConfig(new CognitiveSpeech.SpeechConfig(new CognitiveSpeech.Context(new CognitiveSpeech.OS(platform, name, osVersion), new CognitiveSpeech.Device(manufacturer, model, deviceVersion))), mode, lang, CognitiveSpeech.SpeechResultFormat.Detailed); | ||
_createClass(CognitiveServicesSpeechRecognition, [{ | ||
key: '_transitTo', | ||
value: function _transitTo(nextReadyState) { | ||
// console.log(`_transitTo: readyState = ${ this.readyState }, nextReadyState = ${ nextReadyState }`); | ||
key: 'abort', | ||
value: function abort() { | ||
// TODO: Should redesign how to stop a recognition session | ||
// After abort is called, we should not saw it is a "success", "silent", or "no match" | ||
var _ref3 = this.recognizer || {}, | ||
AudioSource = _ref3.AudioSource; | ||
if (nextReadyState > this.readyState) { | ||
var lifecycleEvents = [null, null, this.onstart, this.onaudiostart, this.onsoundstart, this.onspeechstart, this.onspeechend, this.onsoundend, this.onaudioend, this.onend]; | ||
AudioSource && AudioSource.TurnOff(); | ||
if (this.readyState === AUDIO_START && nextReadyState >= AUDIO_END) { | ||
// If soundstart, speechstart, speechend, and soundend are not fired after audiostart, | ||
// we can skip them and just fire audioend directly | ||
this.readyState = SOUND_END; | ||
} | ||
this._aborted = true; | ||
} | ||
}, { | ||
key: 'emit', | ||
value: function emit(name, event) { | ||
var listener = this['on' + name]; | ||
for (var transition = this.readyState + 1; transition <= nextReadyState; transition++) { | ||
var eventListener = lifecycleEvents[transition]; | ||
// eventListener && console.log(`Firing "${ EVENT_TYPES[transition] }"`); | ||
eventListener && eventListener({ type: EVENT_TYPES[transition] }); | ||
} | ||
if (nextReadyState === END) { | ||
this.readyState = IDLE; | ||
} else { | ||
this.readyState = nextReadyState; | ||
} | ||
} | ||
listener && listener.call(this, _extends({}, event, { type: name })); | ||
} | ||
}, { | ||
key: '_handleDetailedPhrase', | ||
value: function _handleDetailedPhrase(event) { | ||
console.log(event); | ||
key: 'stop', | ||
value: function stop() { | ||
throw new Error('not supported'); | ||
} | ||
}, { | ||
key: 'start', | ||
value: function () { | ||
var _ref4 = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee3() { | ||
var recognizer, _toPromise, eventListener, promises, error, listeningStarted, recognitionStarted, gotFirstHypothesis, speechHypothesis, speechDetailedPhrase, recognitionResult; | ||
this._transitTo(AUDIO_END); | ||
return regeneratorRuntime.wrap(function _callee3$(_context3) { | ||
while (1) { | ||
switch (_context3.prev = _context3.next) { | ||
case 0: | ||
recognizer = this.recognizer = this.createRecognizer(window.localStorage.getItem('SPEECH_KEY'), this.lang); | ||
_toPromise = toPromise(), eventListener = _toPromise.eventListener, promises = _objectWithoutProperties(_toPromise, ['eventListener']); | ||
if (CognitiveSpeech.RecognitionStatus[event.Result.RecognitionStatus] === CognitiveSpeech.RecognitionStatus.Success) { | ||
var nBest = event.Result.NBest; | ||
recognizer.Recognize(eventListener); | ||
this._aborted = false; | ||
this.onresult && this.onresult(buildSpeechResult(event.Result.NBest[0].Display, event.Result.NBest[0].Confidence, true)); | ||
} else { | ||
this.onerror && this.onerror({ error: event.Result.RecognitionStatus, type: 'error' }); | ||
} | ||
} | ||
}, { | ||
key: '_handleHypothesis', | ||
value: function _handleHypothesis(event) { | ||
console.log(event); | ||
_context3.next = 6; | ||
return promises.recognitionTriggered; | ||
this.onresult && this.onresult(buildSpeechResult(event.Result.Text, .5, false)); | ||
} | ||
}, { | ||
key: 'abort', | ||
value: function abort() { | ||
var _ref3 = this.recognizer || {}, | ||
AudioSource = _ref3.AudioSource; | ||
case 6: | ||
error = void 0; | ||
_context3.next = 9; | ||
return Promise.race([promises.listeningStarted, promises.recognitionEnded]); | ||
console.log('ABORT: ' + AudioSource); | ||
case 9: | ||
listeningStarted = _context3.sent; | ||
AudioSource && AudioSource.TurnOff(); | ||
} | ||
}, { | ||
key: 'handleRecognize', | ||
value: function handleRecognize(event) { | ||
try { | ||
var name = event.Name; | ||
if (!(listeningStarted.Name === 'RecognitionEndedEvent')) { | ||
_context3.next = 14; | ||
break; | ||
} | ||
// Possibly not authorized to use microphone | ||
if (listeningStarted.Status === CognitiveSpeech.RecognitionCompletionStatus.AudioSourceError) { | ||
error = 'not-allowed'; | ||
} else { | ||
error = CognitiveSpeech.RecognitionCompletionStatus[listeningStarted.Status]; | ||
} | ||
_context3.next = 51; | ||
break; | ||
console.log('handleRecognize: ' + name); | ||
case 14: | ||
this.emit('start'); | ||
switch (name) { | ||
case 'ListeningStartedEvent': | ||
this._transitTo(AUDIO_START); | ||
break; | ||
_context3.next = 17; | ||
return promises.connectingToService; | ||
case 'RecognitionEndedEvent': | ||
if (event.Status !== CognitiveSpeech.RecognitionCompletionStatus.Success) { | ||
this._transitTo(AUDIO_END); | ||
this.onerror && this.onerror({ error: CognitiveSpeech.RecognitionCompletionStatus[event.Status], type: 'error' }); | ||
} | ||
case 17: | ||
_context3.next = 19; | ||
return Promise.race([promises.recognitionStarted, promises.recognitionEnded]); | ||
this._transitTo(END); | ||
case 19: | ||
recognitionStarted = _context3.sent; | ||
break; | ||
case 'RecognitionStartedEvent': | ||
this._transitTo(SPEECH_START); | ||
break; | ||
this.emit('audiostart'); | ||
case 'RecognitionTriggeredEvent': | ||
this._transitTo(START); | ||
break; | ||
if (!(recognitionStarted.Name === 'RecognitionEndedEvent')) { | ||
_context3.next = 25; | ||
break; | ||
} | ||
case 'SpeechEndDetectedEvent': | ||
this._transitTo(SPEECH_END); | ||
break; | ||
// Possibly network error | ||
if (recognitionStarted.Status === CognitiveSpeech.RecognitionCompletionStatus.ConnectError) { | ||
error = 'network'; | ||
} else { | ||
error = CognitiveSpeech.RecognitionCompletionStatus[recognitionStarted.Status]; | ||
} | ||
_context3.next = 36; | ||
break; | ||
case 'SpeechStartDetectedEvent': | ||
this._transitTo(SPEECH_START); | ||
break; | ||
case 25: | ||
gotFirstHypothesis = void 0; | ||
case 'SpeechHypothesisEvent': | ||
this._handleHypothesis(event); | ||
break; | ||
case 26: | ||
_context3.next = 28; | ||
return Promise.race([promises.getSpeechHypothesisPromise(), promises.speechEndDetected]); | ||
case 'SpeechDetailedPhraseEvent': | ||
this._handleDetailedPhrase(event); | ||
break; | ||
case 28: | ||
speechHypothesis = _context3.sent; | ||
case 'ConnectingToServiceEvent': | ||
case 'SpeechSimplePhraseEvent': | ||
break; | ||
if (!(speechHypothesis.Name === 'SpeechEndDetectedEvent')) { | ||
_context3.next = 31; | ||
break; | ||
} | ||
default: | ||
console.warn('Unexpected event "' + name + '" from Cognitive Services, please file a bug to https://github.com/compulim/web-speech-cognitive-services'); | ||
break; | ||
} | ||
} catch (err) { | ||
// Cognitive Services will hide all exceptions thrown in the event listener | ||
// We need to show it otherwise when exception happen, we will not know what's going on | ||
console.error(err); | ||
throw err; | ||
return _context3.abrupt('break', 35); | ||
case 31: | ||
if (!gotFirstHypothesis) { | ||
gotFirstHypothesis = true; | ||
this.emit('soundstart'); | ||
this.emit('speechstart'); | ||
} | ||
this.emit('result', buildSpeechResult(speechHypothesis.Result.Text, .5, false)); | ||
case 33: | ||
_context3.next = 26; | ||
break; | ||
case 35: | ||
if (gotFirstHypothesis) { | ||
this.emit('speechend'); | ||
this.emit('soundend'); | ||
} | ||
case 36: | ||
this.emit('audioend'); | ||
if (!this._aborted) { | ||
_context3.next = 43; | ||
break; | ||
} | ||
error = 'aborted'; | ||
_context3.next = 41; | ||
return promises.recognitionEnded; | ||
case 41: | ||
_context3.next = 51; | ||
break; | ||
case 43: | ||
_context3.next = 45; | ||
return Promise.race([promises.speechDetailedPhrase, promises.recognitionEnded]); | ||
case 45: | ||
speechDetailedPhrase = _context3.sent; | ||
if (!(speechDetailedPhrase.Name !== 'RecognitionEndedEvent')) { | ||
_context3.next = 51; | ||
break; | ||
} | ||
recognitionResult = CognitiveSpeech.RecognitionStatus[speechDetailedPhrase.Result.RecognitionStatus]; | ||
if (recognitionResult === CognitiveSpeech.RecognitionStatus.Success) { | ||
this.emit('result', buildSpeechResult(speechDetailedPhrase.Result.NBest[0].Display, speechDetailedPhrase.Result.NBest[0].Confidence, true)); | ||
} else if (recognitionResult !== CognitiveSpeech.RecognitionStatus.NoMatch) { | ||
// Possibly silent or muted | ||
if (recognitionResult === CognitiveSpeech.RecognitionStatus.InitialSilenceTimeout) { | ||
error = 'no-speech'; | ||
} else { | ||
error = speechDetailedPhrase.Result.RecognitionStatus; | ||
} | ||
} | ||
_context3.next = 51; | ||
return promises.recognitionEnded; | ||
case 51: | ||
error && this.emit('error', { error: error }); | ||
this.emit('end'); | ||
case 53: | ||
case 'end': | ||
return _context3.stop(); | ||
} | ||
} | ||
}, _callee3, this); | ||
})); | ||
function start() { | ||
return _ref4.apply(this, arguments); | ||
} | ||
} | ||
}, { | ||
key: 'start', | ||
value: function start() { | ||
this.recognizer = this.createRecognizer(window.localStorage.getItem('SPEECH_KEY'), this.lang); | ||
this.recognizer.Recognize(this.handleRecognize.bind(this)); | ||
this._transitTo(START); | ||
} | ||
return start; | ||
}() | ||
}, { | ||
key: 'stop', | ||
value: function stop() { | ||
throw new Error('not supported'); | ||
} | ||
}, { | ||
key: 'grammars', | ||
@@ -336,3 +407,44 @@ get: function get() { | ||
function toPromise() { | ||
var events = { | ||
ConnectingToServiceEvent: new _eventAsPromise2.default(), | ||
ListeningStartedEvent: new _eventAsPromise2.default(), | ||
RecognitionEndedEvent: new _eventAsPromise2.default(), | ||
RecognitionStartedEvent: new _eventAsPromise2.default(), | ||
RecognitionTriggeredEvent: new _eventAsPromise2.default(), | ||
SpeechDetailedPhraseEvent: new _eventAsPromise2.default(), | ||
SpeechEndDetectedEvent: new _eventAsPromise2.default(), | ||
SpeechHypothesisEvent: new _eventAsPromise2.default(), | ||
SpeechSimplePhraseEvent: new _eventAsPromise2.default(), | ||
SpeechStartDetectedEvent: new _eventAsPromise2.default() | ||
}; | ||
return { | ||
connectingToService: events.ConnectingToServiceEvent.upcoming(), | ||
listeningStarted: events.ListeningStartedEvent.upcoming(), | ||
recognitionEnded: events.RecognitionEndedEvent.upcoming(), | ||
recognitionStarted: events.RecognitionStartedEvent.upcoming(), | ||
recognitionTriggered: events.RecognitionTriggeredEvent.upcoming(), | ||
speechDetailedPhrase: events.SpeechDetailedPhraseEvent.upcoming(), | ||
speechEndDetected: events.SpeechEndDetectedEvent.upcoming(), | ||
getSpeechHypothesisPromise: function getSpeechHypothesisPromise() { | ||
return events.SpeechHypothesisEvent.upcoming(); | ||
}, | ||
speechSimplePhrase: events.SpeechSimplePhraseEvent.upcoming(), | ||
speechStartDetected: events.SpeechStartDetectedEvent.upcoming(), | ||
eventListener: function eventListener(event) { | ||
var name = event.Name; | ||
var eventAsPromise = events[name]; | ||
if (eventAsPromise) { | ||
eventAsPromise.eventListener.call(null, event); | ||
} else { | ||
console.warn('Unexpected event "' + name + '" from Cognitive Services, please file a bug to https://github.com/compulim/web-speech-cognitive-services'); | ||
} | ||
} | ||
}; | ||
} | ||
exports.default = CognitiveServicesSpeechRecognition; | ||
//# sourceMappingURL=data:application/json;charset=utf-8;base64, | ||
//# sourceMappingURL=data:application/json;charset=utf-8;base64, |
{ | ||
"name": "web-speech-cognitive-services", | ||
"version": "0.0.1-master.e70a05d", | ||
"version": "0.0.1-master.f80884e", | ||
"description": "", | ||
@@ -31,8 +31,10 @@ "keywords": [], | ||
"babel-plugin-version-transform": "^1.0.0", | ||
"babel-polyfill": "^6.26.0", | ||
"babel-preset-stage-3": "^6.24.1", | ||
"classnames": "^2.2.6", | ||
"event-as-promise": "^1.0.3", | ||
"glamor": "^2.20.40", | ||
"jest": "^22.4.4", | ||
"lerna": "^2.11.0", | ||
"microsoft-speech-browser-sdk": "0.0.12", | ||
"microsoft-speech-browser-sdk": "^0.0.12", | ||
"react": "^16.4.1", | ||
@@ -39,0 +41,0 @@ "react-dictate-button": "^1.0.0", |
193
README.md
# web-speech-cognitive-services | ||
[![npm version](https://badge.fury.io/js/we-bspeech-cognitive-services.svg)](https://badge.fury.io/js/we-bspeech-cognitive-services) [![Build Status](https://travis-ci.org/compulim/we-bspeech-cognitive-services.svg?branch=master)](https://travis-ci.org/compulim/web-speech-cognitive-services) | ||
[![npm version](https://badge.fury.io/js/web-speech-cognitive-services.svg)](https://badge.fury.io/js/web-speech-cognitive-services) [![Build Status](https://travis-ci.org/compulim/web-speech-cognitive-services.svg?branch=master)](https://travis-ci.org/compulim/web-speech-cognitive-services) | ||
Polyfill Web Speech API with Cognitive Services. | ||
## Event lifecycle mapping from Cognitive Services | ||
This scaffold is provided by [`react-component-template`](https://github.com/compulim/react-component-template/). | ||
| # | WebSpeech | Cognitive Services | Notes | | ||
| - | - | - | - | | ||
| 1 | `start` | `RecognitionTriggeredEvent` | | | ||
| 2 | `audiostart` | `ListeningStartedEvent` | | | ||
| 3 | | `ConnectingToServiceEvent` | | | ||
| 4 | `soundstart`, `speechstart` | `RecognitionStartedEvent` | | | ||
| 5 | `onresult(isFinal = false)` | `SpeechHypothesisEvent` | | | ||
| 6 | `speechend`, `soundend`, `audioend` | `SpeechEndDetectedEvent` | `speechend` and `soundend` only fire if either `speechstart` and `soundstart` was fired | | ||
| 7 | `onresult(isFinal = true)`, `onerror` | `SpeechSimplePhraseEvent` | | | ||
| 8 | `end` | `RecognitionEndedEvent` | | | ||
# Demo | ||
### Scenarios | ||
Try out our demo at https://compulim.github.io/web-speech-cognitive-services?s=your-subscription-key. | ||
* Happy path | ||
We use [`react-dictate-button`](https://github.com/compulim/react-dictate-button/) to quickly setup the playground. | ||
# Background | ||
Web Speech API is not widely adopted on popular browsers and platforms. Polyfilling the API using cloud services is a great way to enable wider adoption. Nonetheless, Web Speech API in Google Chrome is also backed by cloud services. | ||
Microsoft Azure [Cognitive Services Speech-to-Text](https://azure.microsoft.com/en-us/services/cognitive-services/speech-to-text/) service provide speech recognition with great accuracy. But unfortunately, the APIs are not based on Web Speech API. | ||
This package will polyfill Web Speech API by turning Cognitive Services Speech-to-Text API into Web Speech API. We test this package with popular combination of platforms and browsers. | ||
# Test matrix | ||
Browsers are all latest as of 2018-06-28, except: | ||
* macOS was 10.13.1 (2017-10-31), instead of 10.13.5 | ||
* Tthere should be no change on the matrix since Safari does not support Web Speech API | ||
* Xbox was tested on Insider build (1806) | ||
Overall in point form: | ||
* With Web Speech API only, web dev can enable speech recognition on most popular platforms, except iOS | ||
* iOS: No browsers on iOS support Web Speech API | ||
* Some platforms requires non-default browser | ||
* With Cognitive Services Speech-to-Text, all popular platforms with their default browsers are supported | ||
* iOS: Chrome and Edge does not support Cognitive Services because WebRTC is disabled | ||
| Platform | OS | Browser | Cognitive Services (WebRTC) | Web Speech API | | ||
| - | - | - | - | - | | ||
| PC | Windows 10 (1803) | Chrome 67.0.3396.99 | Yes | Yes | | ||
| PC | Windows 10 (1803) | Edge 42.17134.1.0 | Yes | No, `SpeechRecognition` not implemented | | ||
| PC | Windows 10 (1803) | Firefox 61.0 | Yes | No, `SpeechRecognition` not implemented | | ||
| MacBook Pro | macOS High Sierra 10.13.1 | Chrome 67.0.3396.99 | Yes | Yes | | ||
| MacBook Pro | macOS High Sierra 10.13.1 | Safari 11.0.1 | Yes | No, `SpeechRecognition` not implemented | | ||
| Apple iPhone X | iOS 11.4 | Chrome 67.0.3396.87 | No, `AudioSourceError` | No, `SpeechRecognition` not implemented | | ||
| Apple iPhone X | iOS 11.4 | Edge 42.2.2.0 | No, `AudioSourceError` | No, `SpeechRecognition` not implemented | | ||
| Apple iPhone X | iOS 11.4 | Safari | Yes | No, `SpeechRecognition` not implemented | | ||
| Apple iPod (6th gen) | iOS 11.4 | Chrome 67.0.3396.87 | No, `AudioSourceError` | No, `SpeechRecognition` not implemented | | ||
| Apple iPod (6th gen) | iOS 11.4 | Edge 42.2.2.0 | No, `AudioSourceError` | No, `SpeechRecognition` not implemented | | ||
| Apple iPod (6th gen) | iOS 11.4 | Safari | No, `AudioSourceError` | No, `SpeechRecognition` not implemented | | ||
| Google Pixel 2 | Android 8.1.0 | Chrome 67.0.3396.87 | Yes | Yes | | ||
| Google Pixel 2 | Android 8.1.0 | Edge 42.0.0.2057 | Yes | Yes | | ||
| Google Pixel 2 | Android 8.1.0 | Firefox 60.1.0 | Yes | Yes | | ||
| Microsoft Lumia 950 | Windows 10 (1709) | Edge 40.15254.489.0 | No, `AudioSourceError` | No, `SpeechRecognition` not implemented | | ||
| Microsoft Xbox One | Windows 10 (1806) 17134.4054 | Edge 42.17134.4054.0 | No, `AudioSourceError` | No, `SpeechRecognition` not implemented | | ||
## Event lifecycle scenarios | ||
We test multiple scenarios to make sure the package polyfill Web Speech API correctly. Following are events and its firing order. | ||
* [Happy path](#happy-path) | ||
* [Abort during recognition](#abort-during-recognition) | ||
* [Network issues](#network-issues) | ||
* [Audio muted or volume too low](#audio-muted-or-volume-too-low) | ||
* [No speech is recognized](#no-speech-is-recognized) | ||
* [Not authorized to use microphone](#not-authorized-to-use-microphone) | ||
### Happy path | ||
Everything works, including multiple interim results. | ||
* Cognitive Services | ||
1. `RecognitionTriggeredEvent` | ||
@@ -29,11 +79,64 @@ 2. `ListeningStartedEvent` | ||
6. `SpeechEndDetectedEvent` | ||
7. `SpeechSimplePhraseEvent` | ||
7. `SpeechDetailedPhraseEvent` | ||
8. `RecognitionEndedEvent` | ||
* Network issues | ||
* Web Speech API | ||
1. `start` | ||
2. `audiostart` | ||
3. `soundstart` | ||
4. `speechstart` | ||
5. `result` (multiple times) | ||
6. `speechend` | ||
7. `soundend` | ||
8. `audioend` | ||
9. `result(results = [{ isFinal = true }])` | ||
10. `end` | ||
### Abort during recognition | ||
#### Abort before first recognition is made | ||
* Cognitive Services | ||
* Essentially muted the speech, that could still result in success, silent, or no match | ||
* Web Speech API | ||
1. `start` | ||
2. `audiostart` | ||
8. `audioend` | ||
9. `error(error = 'aborted')` | ||
10. `end` | ||
#### Abort after some speech is recognized | ||
* Cognitive Services | ||
* Essentially muted the speech, that could still result in success, silent, or no match | ||
* Web Speech API | ||
1. `start` | ||
2. `audiostart` | ||
3. `soundstart` (optional) | ||
4. `speechstart` (optional) | ||
5. `result` (optional) | ||
6. `speechend` (optional) | ||
7. `soundend` (optional) | ||
8. `audioend` | ||
9. `error(error = 'aborted')` | ||
10. `end` | ||
### Network issues | ||
Turn on airplane mode. | ||
* Cognitive Services | ||
1. `RecognitionTriggeredEvent` | ||
2. `ListeningStartedEvent` | ||
3. `ConnectingToServiceEvent` | ||
4. `SpeechSimplePhraseEvent` | ||
5. `RecognitionEndedEvent` | ||
* Audio muted or volume too low | ||
5. `RecognitionEndedEvent(Result.RecognitionStatus = 'ConnectError')` | ||
* Web Speech API | ||
1. `start` | ||
2. `audiostart` | ||
3. `audioend` | ||
4. `error(error = 'network')` | ||
5. `end` | ||
### Audio muted or volume too low | ||
* Cognitive Services | ||
1. `RecognitionTriggeredEvent` | ||
@@ -44,5 +147,16 @@ 2. `ListeningStartedEvent` | ||
5. `SpeechEndDetectedEvent` | ||
6. `SpeechSimplePhraseEvent(Result.RecognitionStatus = 'InitialSilenceTimeout')` | ||
6. `SpeechDetailedPhraseEvent(Result.RecognitionStatus = 'InitialSilenceTimeout')` | ||
7. `RecognitionEndedEvent` | ||
* Failed to recognize speech (a.k.a. no match) | ||
* Web Speech API | ||
1. `start` | ||
2. `audiostart` | ||
3. `audioend` | ||
4. `error(error = 'no-speech')` | ||
5. `end` | ||
### No speech is recognized | ||
Some sounds are heard, but they cannot be recognized as text. There could be some interim results with recognized text, but the confidence is so low it dropped out of final result. | ||
* Cognitive Services | ||
1. `RecognitionTriggeredEvent` | ||
@@ -54,16 +168,41 @@ 2. `ListeningStartedEvent` | ||
6. `SpeechEndDetectedEvent` | ||
7. `SpeechSimplePhraseEvent(Result.RecognitionStatus = 'NoMatch')` | ||
7. `SpeechDetailedPhraseEvent(Result.RecognitionStatus = 'NoMatch')` | ||
8. `RecognitionEndedEvent` | ||
* User abort | ||
* Essentially muted the speech, that could result in success, silent, or no match | ||
* Not authorized to use microphone | ||
* Web Speech API | ||
1. `start` | ||
2. `audiostart` | ||
3. `soundstart` | ||
4. `speechstart` | ||
5. `result` | ||
6. `speechend` | ||
7. `soundend` | ||
8. `audioend` | ||
9. `end` | ||
> Note: the Web Speech API has `onnomatch` event, but unfortunately, Google Chrome did not fire this event. | ||
### Not authorized to use microphone | ||
The user click "deny" on the permission dialog, or there are no microphone detected in the system. | ||
* Cognitive Services | ||
1. `RecognitionTriggeredEvent` | ||
2. `RecognitionEndedEvent(Result.RecognitionStatus = 'AudioSourceError')` | ||
* Web Speech API | ||
1. `error(error = 'not-allowed')` | ||
2. `end` | ||
# Known issues | ||
* Interim results do not return confidence, final result do have confidence | ||
* We always return `0.5` for interim results | ||
* Cognitive Services support grammar list but not in JSGF format, more work to be done in this area | ||
* Although Google Chrome support setting the grammar list, it seems the grammar list is not used at all | ||
# Contributions | ||
Like us? [Star](https://github.com/compulim/we-bspeech-cognitive-services/stargazers) us. | ||
Like us? [Star](https://github.com/compulim/web-speech-cognitive-services/stargazers) us. | ||
Want to make it better? [File](https://github.com/compulim/we-bspeech-cognitive-services/issues) us an issue. | ||
Want to make it better? [File](https://github.com/compulim/web-speech-cognitive-services/issues) us an issue. | ||
Don't like something you see? [Submit](https://github.com/compulim/web-speech-cognitive-services/pulls) a pull request. |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
52458
363
206
18
1