New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More →

web-speech-cognitive-services

Package Overview

Dependencies

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

web-speech-cognitive-services - npm Package Compare versions

Comparing version

1.0.1-master.2c7d643

1.0.1-master.6397e88

lib/index.js.map

lib/recognition/SpeechGrammarList.js

lib/recognition/SpeechGrammarList.js.map

lib/recognition/SpeechRecognition.js

lib/recognition/SpeechRecognition.js.map

lib/synthesis/AudioContextConsumer.js

lib/synthesis/AudioContextConsumer.js.map

lib/synthesis/AudioContextQueue.js

lib/synthesis/AudioContextQueue.js.map

lib/synthesis/buildSSML.js

lib/synthesis/buildSSML.js.map

lib/synthesis/fetchSpeechData.js

lib/synthesis/fetchSpeechData.js.map

lib/synthesis/fetchVoices.js

lib/synthesis/fetchVoices.js.map

lib/synthesis/speechSynthesis.js

lib/synthesis/speechSynthesis.js.map

lib/synthesis/SpeechSynthesisUtterance.js

lib/synthesis/SpeechSynthesisUtterance.js.map

lib/synthesis/SpeechSynthesisVoice.js

lib/synthesis/SpeechSynthesisVoice.js.map

lib/synthesis/subscribeEvent.js

lib/synthesis/subscribeEvent.js.map

lib/util/domEventEmitter.js

lib/util/domEventEmitter.js.map

lib/util/exchangeToken.js

lib/util/exchangeToken.js.map

lib/util/SubscriptionKey.js

lib/util/SubscriptionKey.js.map

CHANGELOG.md

		@@ -8,5 +8,13 @@ # Changelog
		## [Unreleased]
		### Added
		- SpeechSynthesis polyfill with Cognitive Services

		### Changed
		- Removed `CognitiveServices` prefix
		- Renamed `CognitiveServicesSpeechGrammarList` to `SpeechGrammarList`
		- Renamed `CognitiveServicesSpeechRecognition` to `SpeechRecognition`

		## [1.0.0] - 2018-06-29
		### Added
		- Initial release
		- SpeechRecognition polyfill with Cognitive Services

lib/index.js

		@@ -6,18 +6,33 @@ 'use strict';
		});
		exports.CognitiveServicesSpeechGrammarList = undefined;
		exports.SubscriptionKey = exports.SpeechSynthesisUtterance = exports.speechSynthesis = exports.SpeechRecognition = exports.SpeechGrammarList = undefined;

		require('babel-polyfill');

		var _CognitiveServicesSpeechGrammarList = require('./CognitiveServicesSpeechGrammarList');
		var _SpeechGrammarList = require('./recognition/SpeechGrammarList');

		var _CognitiveServicesSpeechGrammarList2 = _interopRequireDefault(_CognitiveServicesSpeechGrammarList);
		var _SpeechGrammarList2 = _interopRequireDefault(_SpeechGrammarList);

		var _CognitiveServicesSpeechRecognition = require('./CognitiveServicesSpeechRecognition');
		var _SpeechRecognition = require('./recognition/SpeechRecognition');

		var _CognitiveServicesSpeechRecognition2 = _interopRequireDefault(_CognitiveServicesSpeechRecognition);
		var _SpeechRecognition2 = _interopRequireDefault(_SpeechRecognition);

		var _speechSynthesis = require('./synthesis/speechSynthesis');

		var _speechSynthesis2 = _interopRequireDefault(_speechSynthesis);

		var _SpeechSynthesisUtterance = require('./synthesis/SpeechSynthesisUtterance');

		var _SpeechSynthesisUtterance2 = _interopRequireDefault(_SpeechSynthesisUtterance);

		var _SubscriptionKey = require('./util/SubscriptionKey');

		var _SubscriptionKey2 = _interopRequireDefault(_SubscriptionKey);

		function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }

		exports.default = _CognitiveServicesSpeechRecognition2.default;
		exports.CognitiveServicesSpeechGrammarList = _CognitiveServicesSpeechGrammarList2.default;
		//# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJzb3VyY2VzIjpbIi4uL3NyYy9pbmRleC5qcyJdLCJuYW1lcyI6WyJDb2duaXRpdmVTZXJ2aWNlc1NwZWVjaFJlY29nbml0aW9uIiwiQ29nbml0aXZlU2VydmljZXNTcGVlY2hHcmFtbWFyTGlzdCJdLCJtYXBwaW5ncyI6Ijs7Ozs7OztBQUFBOztBQUVBOzs7O0FBQ0E7Ozs7OztrQkFFZUEsNEM7UUFHYkMsa0MsR0FBQUEsNEMiLCJmaWxlIjoiaW5kZXguanMiLCJzb3VyY2VzQ29udGVudCI6WyJpbXBvcnQgJ2JhYmVsLXBvbHlmaWxsJztcblxuaW1wb3J0IENvZ25pdGl2ZVNlcnZpY2VzU3BlZWNoR3JhbW1hckxpc3QgZnJvbSAnLi9Db2duaXRpdmVTZXJ2aWNlc1NwZWVjaEdyYW1tYXJMaXN0JztcbmltcG9ydCBDb2duaXRpdmVTZXJ2aWNlc1NwZWVjaFJlY29nbml0aW9uIGZyb20gJy4vQ29nbml0aXZlU2VydmljZXNTcGVlY2hSZWNvZ25pdGlvbic7XG5cbmV4cG9ydCBkZWZhdWx0IENvZ25pdGl2ZVNlcnZpY2VzU3BlZWNoUmVjb2duaXRpb25cblxuZXhwb3J0IHtcbiAgQ29nbml0aXZlU2VydmljZXNTcGVlY2hHcmFtbWFyTGlzdFxufVxuIl19
		exports.SpeechGrammarList = _SpeechGrammarList2.default;
		exports.SpeechRecognition = _SpeechRecognition2.default;
		exports.speechSynthesis = _speechSynthesis2.default;
		exports.SpeechSynthesisUtterance = _SpeechSynthesisUtterance2.default;
		exports.SubscriptionKey = _SubscriptionKey2.default;
		//# sourceMappingURL=index.js.map

package.json

		{
		"name": "web-speech-cognitive-services",
		"version": "1.0.1-master.2c7d643",
		"version": "1.0.1-master.6397e88",
		"description": "Polyfill Web Speech API with Cognitive Services Speech-to-Text service",
		@@ -5,0 +5,0 @@ "keywords": [

271

README.md

		@@ -5,3 +5,3 @@ # web-speech-cognitive-services

		Polyfill Web Speech API with Cognitive Services Speech-to-Text service.
		Polyfill Web Speech API with Cognitive Services Bing Speech for both speech-to-text and text-to-speech service.

		@@ -14,3 +14,3 @@ This scaffold is provided by [`react-component-template`](https://github.com/compulim/react-component-template/).

		We use [`react-dictate-button`](https://github.com/compulim/react-dictate-button/) to quickly setup the playground.
		We use [`react-dictate-button`](https://github.com/compulim/react-dictate-button/) and [`react-say`](https://github.com/compulim/react-say/) to quickly setup the playground.

		@@ -21,5 +21,5 @@ # Background

		Microsoft Azure [Cognitive Services Speech-to-Text](https://azure.microsoft.com/en-us/services/cognitive-services/speech-to-text/) service provide speech recognition with great accuracy. But unfortunately, the APIs are not based on Web Speech API.
		Microsoft Azure [Cognitive Services Bing Speech](https://azure.microsoft.com/en-us/services/cognitive-services/speech/) service provide speech recognition with great accuracy. But unfortunately, the APIs are not based on Web Speech API.

		This package will polyfill Web Speech API by turning Cognitive Services Speech-to-Text API into Web Speech API. We test this package with popular combination of platforms and browsers.
		This package will polyfill Web Speech API by turning Cognitive Services Bing Speech API into Web Speech API. We test this package with popular combination of platforms and browsers.

		@@ -30,19 +30,12 @@ # How to use

		## Speech recognition (speech-to-text)

		```jsx
		import CognitiveServicesSpeechRecognition from 'web-speech-cognitive-services';
		import { SpeechRecognition, SubscriptionKey } from 'web-speech-cognitive-services';

		const recognition = new CognitiveServicesSpeechRecognition();
		const recognition = new SpeechRecognition();

		// There are two ways to provide your credential:
		// 1. Provide a subscription key (good for prototype, not for production)
		// 2. Provide a mechanism to obtain/refresh access token
		recognition.lang = 'en-US';
		recognition.speechToken = new SubscriptionKey('your subscription key');

		// If you are using subscription key
		recognition.subscriptionKey = 'your subscription key';

		// If you are using access token, refreshToken === true, if we are renewing the token, otherwise, false
		recognition.tokenFetch = async (authFetchEventID, refreshToken) => {
		};

		recognition.lang = 'en-US';
		recognition.onresult = ({ results }) => {
		@@ -55,16 +48,20 @@ console.log(results);

		## Integrating with React
		> Note: most browsers requires HTTPS or `localhost` for WebRTC.

		### Integrating with React

		You can use [`react-dictate-button`](https://github.com/compulim/react-dictate-button/) to integrate speech recognition functionality to your React app.

		```jsx
		import CognitiveServicesSpeechRecognitionm, { CognitiveServicesSpeechGrammarList } from 'web-speech-recognition-services';
		import { SpeechGrammarList, SpeechRecognition, SubscriptionKey } from 'web-speech-cognitive-services';
		import DictateButton from 'react-dictate-button';

		const extra = { subscriptionKey: new SubscriptionKey('your subscription key') };

		export default props =>
		<DictateButton
		extra={{ subscriptionKey: 'your subscription key' }}
		extra={ extra }
		onDictate={ ({ result }) => alert(result.transcript) }
		speechGrammarList={ CognitiveServicesSpeechGrammarList }
		speechRecognition={ CognitiveServicesSpeechRecognition }
		speechGrammarList={ SpeechGrammarList }
		speechRecognition={ SpeechRecognition }
		>
		@@ -77,190 +74,86 @@ Start dictation

		# Test matrix
		## Speech synthesis (text-to-speech)

		Browsers are all latest as of 2018-06-28, except:
		```jsx
		import { speechSynthesis, SpeechSynthesisUtterance, SubscriptionKey } from 'web-speech-cognitive-services';

		* macOS was 10.13.1 (2017-10-31), instead of 10.13.5
		* Since Safari does not support Web Speech API, the test matrix remains the same
		* Xbox was tested on Insider build (1806) with Kinect sensor connected
		* The latest Insider build does not support both WebRTC and Web Speech API, so we suspect the production build also does not support both
		const subscriptionKey = new SubscriptionKey('your subscription key');
		const utterance = new SpeechSynthesisUtterance('Hello, World!');

		Quick grab:
		speechSynthesis.speechToken = subscriptionKey;

		* Web Speech API
		* Works on most popular platforms, except iOS. Some requires non-default browser.
		* iOS: None of the popular browsers support Web Speech API
		* Windows: requires Chrome
		* Cognitive Services Speech-to-Text
		* Works on default browsers on all popular platforms
		* iOS: Chrome and Edge does not support Cognitive Services (WebRTC)
		// Need to wait until token exchange is complete before speak
		await subscriptionKey.authorized;
		await speechSynthesis.speak(utterance);
		```

		\| Platform \| OS \| Browser \| Cognitive Services (WebRTC) \| Web Speech API \|
		\| - \| - \| - \| - \| - \|
		\| PC \| Windows 10 (1803) \| Chrome 67.0.3396.99 \| Yes \| Yes \|
		\| PC \| Windows 10 (1803) \| Edge 42.17134.1.0 \| Yes \| No, `SpeechRecognition` not implemented \|
		\| PC \| Windows 10 (1803) \| Firefox 61.0 \| Yes \| No, `SpeechRecognition` not implemented \|
		\| MacBook Pro \| macOS High Sierra 10.13.1 \| Chrome 67.0.3396.99 \| Yes \| Yes \|
		\| MacBook Pro \| macOS High Sierra 10.13.1 \| Safari 11.0.1 \| Yes \| No, `SpeechRecognition` not implemented \|
		\| Apple iPhone X \| iOS 11.4 \| Chrome 67.0.3396.87 \| No, `AudioSourceError` \| No, `SpeechRecognition` not implemented \|
		\| Apple iPhone X \| iOS 11.4 \| Edge 42.2.2.0 \| No, `AudioSourceError` \| No, `SpeechRecognition` not implemented \|
		\| Apple iPhone X \| iOS 11.4 \| Safari \| Yes \| No, `SpeechRecognition` not implemented \|
		\| Apple iPod (6th gen) \| iOS 11.4 \| Chrome 67.0.3396.87 \| No, `AudioSourceError` \| No, `SpeechRecognition` not implemented \|
		\| Apple iPod (6th gen) \| iOS 11.4 \| Edge 42.2.2.0 \| No, `AudioSourceError` \| No, `SpeechRecognition` not implemented \|
		\| Apple iPod (6th gen) \| iOS 11.4 \| Safari \| No, `AudioSourceError` \| No, `SpeechRecognition` not implemented \|
		\| Google Pixel 2 \| Android 8.1.0 \| Chrome 67.0.3396.87 \| Yes \| Yes \|
		\| Google Pixel 2 \| Android 8.1.0 \| Edge 42.0.0.2057 \| Yes \| Yes \|
		\| Google Pixel 2 \| Android 8.1.0 \| Firefox 60.1.0 \| Yes \| Yes \|
		\| Microsoft Lumia 950 \| Windows 10 (1709) \| Edge 40.15254.489.0 \| No, `AudioSourceError` \| No, `SpeechRecognition` not implemented \|
		\| Microsoft Xbox One \| Windows 10 (1806) 17134.4054 \| Edge 42.17134.4054.0 \| No, `AudioSourceError` \| No, `SpeechRecognition` not implemented \|
		> Note: `speechSynthesis` is camel-casing because it is an instance.

		## Event lifecycle scenarios
		`pitch`, `rate`, `voice`, and `volume` are supported. Only `onstart`, `onerror`, and `onend` events are supported.

		We test multiple scenarios to make sure we polyfill Web Speech API correctly. Following are events and its firing order, in Cognitive Services and Web Speech API respectively.
		### Integrating with React

		* [Happy path](#happy-path)
		* [Abort during recognition](#abort-during-recognition)
		* [Network issues](#network-issues)
		* [Audio muted or volume too low](#audio-muted-or-volume-too-low)
		* [No speech is recognized](#no-speech-is-recognized)
		* [Not authorized to use microphone](#not-authorized-to-use-microphone)
		You can use [`react-say`](https://github.com/compulim/react-say/) to integrate speech synthesis functionality to your React app.

		### Happy path
		```jsx
		import { speechSynthesis, SpeechSynthesisUtterance, SubscriptionKey } from 'web-speech-cognitive-services';
		import React from 'react';
		import Say from 'react-say';

		Everything works, including multiple interim results.
		export default class extends React.Component {
		constructor(props) {
		super(props);

		* Cognitive Services
		1. `RecognitionTriggeredEvent`
		2. `ListeningStartedEvent`
		3. `ConnectingToServiceEvent`
		4. `RecognitionStartedEvent`
		5. `SpeechHypothesisEvent` (could be more than one)
		6. `SpeechEndDetectedEvent`
		7. `SpeechDetailedPhraseEvent`
		8. `RecognitionEndedEvent`
		* Web Speech API
		1. `start`
		2. `audiostart`
		3. `soundstart`
		4. `speechstart`
		5. `result` (multiple times)
		6. `speechend`
		7. `soundend`
		8. `audioend`
		9. `result(results = [{ isFinal = true }])`
		10. `end`
		speechSynthesis.speechToken = new SubscriptionKey('your subscription key');
		speechSynthesis.speechToken.authorized.then(() => this.setState(() => ({ ready: true })));

		### Abort during recognition
		this.state = { ready: false };
		}

		#### Abort before first recognition is made
		render() {
		return (
		this.state.ready &&
		<Say
		speechSynthesis={ speechSynthesis }
		speechSynthesisUtterance={ SpeechSynthesisUtterance }
		text="Hello, World!"
		/>
		);
		}
		}
		```

		* Cognitive Services
		* Essentially muted the microphone and receive `SpeechEndDetectedEvent` immediately, very similar to [happy path](#happy-path), could still result in success, silent, or no match
		* Web Speech API
		1. `start`
		2. `audiostart`
		8. `audioend`
		9. `error(error = 'aborted')`
		10. `end`
		# Test matrix

		#### Abort after some text has recognized
		For detailed test matrix, please refer to [`SPEC-RECOGNITION.md`](SPEC-RECOGNITION.md) or [`SPEC-SYNTHESIS.md`](SPEC-SYNTHESIS.md).

		* Cognitive Services
		* Essentially muted the microphone and receive `SpeechEndDetectedEvent` immediately, very similar to [happy path](#happy-path), could still result in success, silent, or no match
		* Web Speech API
		1. `start`
		2. `audiostart`
		3. `soundstart`
		4. `speechstart`
		5. `result` (one or more)
		6. `speechend`
		7. `soundend`
		8. `audioend`
		9. `error(error = 'aborted')`
		10. `end`

		### Network issues

		Turn on airplane mode.

		* Cognitive Services
		1. `RecognitionTriggeredEvent`
		2. `ListeningStartedEvent`
		3. `ConnectingToServiceEvent`
		5. `RecognitionEndedEvent(Result.RecognitionStatus = 'ConnectError')`
		* Web Speech API
		1. `start`
		2. `audiostart`
		3. `audioend`
		4. `error(error = 'network')`
		5. `end`

		### Audio muted or volume too low

		* Cognitive Services
		1. `RecognitionTriggeredEvent`
		2. `ListeningStartedEvent`
		3. `ConnectingToServiceEvent`
		4. `RecognitionStartedEvent`
		5. `SpeechEndDetectedEvent`
		6. `SpeechDetailedPhraseEvent(Result.RecognitionStatus = 'InitialSilenceTimeout')`
		7. `RecognitionEndedEvent`
		* Web Speech API
		1. `start`
		2. `audiostart`
		3. `audioend`
		4. `error(error = 'no-speech')`
		5. `end`

		### No speech is recognized

		Some sounds are heard, but they cannot be recognized as text. There could be some interim results with recognized text, but the confidence is so low it dropped out of final result.

		* Cognitive Services
		1. `RecognitionTriggeredEvent`
		2. `ListeningStartedEvent`
		3. `ConnectingToServiceEvent`
		4. `RecognitionStartedEvent`
		5. `SpeechHypothesisEvent` (could be more than one)
		6. `SpeechEndDetectedEvent`
		7. `SpeechDetailedPhraseEvent(Result.RecognitionStatus = 'NoMatch')`
		8. `RecognitionEndedEvent`
		* Web Speech API
		1. `start`
		2. `audiostart`
		3. `soundstart`
		4. `speechstart`
		5. `result`
		6. `speechend`
		7. `soundend`
		8. `audioend`
		9. `end`

		> Note: the Web Speech API has `onnomatch` event, but unfortunately, Google Chrome did not fire this event.

		### Not authorized to use microphone

		The user click "deny" on the permission dialog, or there are no microphone detected in the system.

		* Cognitive Services
		1. `RecognitionTriggeredEvent`
		2. `RecognitionEndedEvent(Result.RecognitionStatus = 'AudioSourceError')`
		* Web Speech API
		1. `error(error = 'not-allowed')`
		2. `end`

		# Known issues

		* Interim results do not return confidence, final result do have confidence
		* We always return `0.5` for interim results
		* Cognitive Services support grammar list but not in JSGF format, more work to be done in this area
		* Although Google Chrome support grammar list, it seems the grammar list is not used at all
		* Continuous mode does not work
		* Speech recognition
		* Interim results do not return confidence, final result do have confidence
		* We always return `0.5` for interim results
		* Cognitive Services support grammar list but not in JSGF format, more work to be done in this area
		* Although Google Chrome support grammar list, it seems the grammar list is not used at all
		* Continuous mode does not work
		* Speech synthesis
		* `onboundary`, `onmark`, `onpause`, and `onresume` are not supported/fired

		# Roadmap

		* [ ] Add grammar list
		* [ ] Add tests for lifecycle events
		* [ ] Investigate continuous mode
		* [ ] Enable Opus (OGG) encoding
		* Currently, there is a problem with `microsoft-speech-browser-sdk@0.0.12`, tracking on [this issue](https://github.com/Azure-Samples/SpeechToText-WebSockets-Javascript/issues/88)
		* General
		* [x] Unified [token exchange mechanism](packages/component/src/util/SubscriptionKey.js)
		* Speech recognition
		* [ ] Add grammar list
		* [ ] Add tests for lifecycle events
		* [ ] Investigate continuous mode
		* [ ] Enable Opus (OGG) encoding
		* Currently, there is a problem with `microsoft-speech-browser-sdk@0.0.12`, tracking on [this issue](https://github.com/Azure-Samples/SpeechToText-WebSockets-Javascript/issues/88)
		* [ ] Support custom speech
		* [ ] Support new [Speech-to-Text](https://azure.microsoft.com/en-us/services/cognitive-services/speech-to-text/) service
		* Point to [new URIs](https://docs.microsoft.com/en-us/azure/cognitive-services/Speech-Service/rest-apis)
		* Speech synthesis
		* [ ] Event: add `pause`/`resume` support
		* [ ] Properties: add `paused`/`pending`/`speaking` support
		* [ ] Support new [Text-to-Speech](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/how-to-text-to-speech) service
		* Custom voice fonts

		@@ -267,0 +160,0 @@ # Contributions

lib/CognitiveServicesSpeechGrammarList.js

lib/CognitiveServicesSpeechRecognition.js

web-speech-cognitive-services - npm Package Compare versions

New alerts

Improved metrics

Worsened metrics