New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

edge-tts-node

Package Overview
Dependencies
Maintainers
0
Versions
34
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

edge-tts-node - npm Package Compare versions

Comparing version 1.3.1 to 1.3.5

dist/utils.d.ts

52

dist/MsEdgeTTS.d.ts

@@ -1,3 +0,1 @@

/// <reference types="node" />
/// <reference types="node" />
import { OUTPUT_FORMAT } from "./OUTPUT_FORMAT";

@@ -41,3 +39,3 @@ import { Readable } from "stream";

static OUTPUT_FORMAT: typeof OUTPUT_FORMAT;
private static TRUSTED_CLIENT_TOKEN;
static TRUSTED_CLIENT_TOKEN: string;
private static VOICES_URL;

@@ -56,2 +54,4 @@ private static SYNTH_URL;

private readonly _agent;
private _arraybuffer;
private state;
private _log;

@@ -67,55 +67,13 @@ /**

private _initClient;
private _pushData;
private _pushAudioData;
private _SSMLTemplate;
/**
* Fetch the list of voices available in Microsoft Edge.
* These, however, are not all. The complete list of voices supported by this module [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) (neural, standard, and preview).
*/
getVoices(): Promise<Voice[]>;
/**
* Sets the required information for the speech to be synthesised and inits a new WebSocket connection.
* Must be called at least once before text can be synthesised.
* Saved in this instance. Can be called at any time times to update the metadata.
*
* @param voiceName a string with any `ShortName`. A list of all available neural voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices). However, it is not limited to neural voices: standard voices can also be used. A list of standard voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#standard-voices)
* @param outputFormat any {@link OUTPUT_FORMAT}
* @param voiceLocale (optional) any voice locale that is supported by the voice. See the list of all voices for compatibility. If not provided, the locale will be inferred from the `voiceName`
*/
setConfig(conf: any): void;
setMetadata(voiceName: string, outputFormat: OUTPUT_FORMAT, voiceLocale?: string): Promise<void>;
private _metadataCheck;
/**
* Close the WebSocket connection.
*/
close(): void;
/**
* Writes raw audio synthesised from text to a file. Uses a basic {@link _SSMLTemplate SML template}.
*
* @param path a valid output path, including a filename and file extension.
* @param input the input to synthesise
* @param options (optional) {@link ProsodyOptions}
* @returns {Promise<string>} - a `Promise` with the full filepath
*/
toFile(path: string, input: string, options?: ProsodyOptions): Promise<string>;
/**
* Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
*
* @param input the text to synthesise. Can include SSML elements.
* @param options (optional) {@link ProsodyOptions}
* @returns {Readable} - a `stream.Readable` with the audio data
*/
toStream(input: string, options?: ProsodyOptions): Readable;
/**
* Writes raw audio synthesised from text to a file. Has no SSML template. Basic SSML should be provided in the request.
*
* @param path a valid output path, including a filename and file extension.
* @param requestSSML the SSML to send. SSML elements required in order to work.
* @returns {Promise<string>} - a `Promise` with the full filepath
*/
rawToFile(path: string, requestSSML: string): Promise<string>;
/**
* Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
*
* @param requestSSML the SSML to send. SSML elements required in order to work.
* @returns {Readable} - a `stream.Readable` with the audio data
*/
rawToStream(requestSSML: string): Readable;

@@ -122,0 +80,0 @@ private _rawSSMLRequestToFile;

@@ -33,6 +33,7 @@ "use strict";

const buffer_1 = require("buffer");
const crypto_1 = require("crypto");
const randombytes_1 = __importDefault(require("randombytes"));
const OUTPUT_FORMAT_1 = require("./OUTPUT_FORMAT");
const stream_1 = require("stream");
const fs = __importStar(require("fs"));
const utils_1 = require("./utils");
class ProsodyOptions {

@@ -60,3 +61,3 @@ /**

class MsEdgeTTS {
static wordBoundaryEnabled = false;
static wordBoundaryEnabled = true;
static OUTPUT_FORMAT = OUTPUT_FORMAT_1.OUTPUT_FORMAT;

@@ -77,4 +78,10 @@ static TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";

_agent;
_arraybuffer = false;
state = {
offsetCompensation: 0,
lastDurationOffset: 0
};
_log(...o) {
if (this._enableLogger) {
o.unshift('edgetts:');
console.log(...o);

@@ -103,3 +110,3 @@ }

this._ws.send(message, () => {
this._log("<- sent message: ", message);
//this._log("<- sent message: ", message);
});

@@ -111,3 +118,4 @@ }

: new isomorphic_ws_1.default(MsEdgeTTS.SYNTH_URL, { agent: this._agent });
this._ws.binaryType = "arraybuffer";
if (this._arraybuffer)
this._ws.binaryType = "arraybuffer";
return new Promise((resolve, reject) => {

@@ -133,20 +141,77 @@ this._ws.onopen = () => {

this._ws.onmessage = (m) => {
const buffer = buffer_1.Buffer.from(m.data);
const message = buffer.toString();
const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)[1];
if (message.includes("Path:turn.start")) {
// start of turn, ignore
this._log("type:::::::: ", typeof m.data);
let mdata = m.data;
if (typeof mdata === 'string') {
const encodedData = buffer_1.Buffer.from(mdata, 'utf8');
const message = mdata;
const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)[1];
let [headers, data] = (0, utils_1.getHeadersAndData)(encodedData, encodedData.indexOf("\r\n\r\n"));
const path = headers['Path'];
if (path === "audio.metadata") {
let parsedMetadata = (0, utils_1.parseMetadata)(data, this.state["offsetCompensation"]);
this._pushData(parsedMetadata, requestId);
// 更新上一次的持续时间偏移量,用于下一次 SSML 请求
this.state["lastDurationOffset"] = parsedMetadata["offset"] + parsedMetadata["duration"];
}
else if (path === "turn.end") {
this.state["offsetCompensation"] = this.state["lastDurationOffset"];
this.state["offsetCompensation"] += 8750000;
}
else if (path !== "response" && path !== "turn.start") {
// 如果路径不是 "response" 或 "turn.start"
throw new Error("Unknown path received"); // 抛出未知响应错误
}
}
else if (message.includes("Path:turn.end")) {
// end of turn, close stream
this._streams[requestId].push(null);
else if (buffer_1.Buffer.isBuffer(mdata)) {
const message = mdata.toString();
const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)[1];
const headerLength = mdata.readUInt16BE(0);
if (headerLength > mdata.length) {
throw new Error("The header length is greater than the length of the data.");
}
// Parse the headers and data from the binary message.
let [headers, data] = (0, utils_1.getHeadersAndData)(mdata, headerLength);
if (headers['Path'] !== 'audio') {
throw new Error("Received binary message, but the path is not audio.");
}
const contentType = headers['Content-Type'];
if (contentType !== 'audio/mpeg' && contentType !== undefined) {
throw new Error("Received binary message, but with an unexpected Content-Type.");
}
// We only allow no Content-Type if there is no data.
if (contentType === undefined) {
if (data.length === 0) {
return;
}
// If the data is not empty, then we need to raise an exception.
throw new Error("Received binary message with no Content-Type, but with data.");
}
// If the data is empty now, then we need to raise an exception.
if (data.length === 0) {
throw new Error("Received binary message, but it is missing the audio data.");
}
this._pushData({ type: "audio", data: data }, requestId);
}
else if (message.includes("Path:response")) {
// context response, ignore
}
else if (message.includes("Path:audio") && m.data instanceof ArrayBuffer) {
this._pushAudioData(buffer, requestId);
}
else {
this._log("UNKNOWN MESSAGE", message);
mdata = buffer_1.Buffer.isBuffer(mdata) ? mdata : mdata['data'];
const buffer = buffer_1.Buffer.from(mdata);
const message = buffer.toString();
const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)[1];
this._log(message.includes("Path:audio"), buffer_1.Buffer.isBuffer(mdata), mdata instanceof ArrayBuffer);
if (message.includes("Path:turn.start")) {
// start of turn, ignore
}
else if (message.includes("Path:turn.end")) {
// end of turn, close stream
this._streams[requestId].push(null);
}
else if (message.includes("Path:response")) {
// context response, ignore
}
else if (message.includes("Path:audio") && buffer_1.Buffer.isBuffer(mdata)) {
this._pushAudioData(buffer, requestId);
}
else {
//this._log("UNKNOWN MESSAGE", message);
}
}

@@ -165,2 +230,6 @@ };

}
_pushData(data, requestId) {
data = typeof data == "string" ? data : JSON.stringify(data);
this._streams[requestId].push(data, 'utf8');
}
_pushAudioData(audioBuffer, requestId) {

@@ -170,3 +239,3 @@ const audioStartIndex = audioBuffer.indexOf(MsEdgeTTS.BINARY_DELIM) + MsEdgeTTS.BINARY_DELIM.length;

this._streams[requestId].push(audioData);
this._log("received audio chunk, size: ", audioData?.length);
this._log("_pushAudioData: received audio chunk, size: ", audioData?.length);
}

@@ -184,6 +253,2 @@ _SSMLTemplate(input, options = {}) {

}
/**
* Fetch the list of voices available in Microsoft Edge.
* These, however, are not all. The complete list of voices supported by this module [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) (neural, standard, and preview).
*/
getVoices() {

@@ -196,11 +261,5 @@ return new Promise((resolve, reject) => {

}
/**
* Sets the required information for the speech to be synthesised and inits a new WebSocket connection.
* Must be called at least once before text can be synthesised.
* Saved in this instance. Can be called at any time times to update the metadata.
*
* @param voiceName a string with any `ShortName`. A list of all available neural voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices). However, it is not limited to neural voices: standard voices can also be used. A list of standard voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#standard-voices)
* @param outputFormat any {@link OUTPUT_FORMAT}
* @param voiceLocale (optional) any voice locale that is supported by the voice. See the list of all voices for compatibility. If not provided, the locale will be inferred from the `voiceName`
*/
setConfig(conf) {
this._arraybuffer = conf["arraybuffer"] ?? false;
}
async setMetadata(voiceName, outputFormat, voiceLocale) {

@@ -232,26 +291,8 @@ const oldVoice = this._voice;

}
/**
* Close the WebSocket connection.
*/
close() {
this._ws.close();
}
/**
* Writes raw audio synthesised from text to a file. Uses a basic {@link _SSMLTemplate SML template}.
*
* @param path a valid output path, including a filename and file extension.
* @param input the input to synthesise
* @param options (optional) {@link ProsodyOptions}
* @returns {Promise<string>} - a `Promise` with the full filepath
*/
toFile(path, input, options) {
return this._rawSSMLRequestToFile(path, this._SSMLTemplate(input, options));
}
/**
* Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
*
* @param input the text to synthesise. Can include SSML elements.
* @param options (optional) {@link ProsodyOptions}
* @returns {Readable} - a `stream.Readable` with the audio data
*/
toStream(input, options) {

@@ -261,18 +302,5 @@ const { stream } = this._rawSSMLRequest(this._SSMLTemplate(input, options));

}
/**
* Writes raw audio synthesised from text to a file. Has no SSML template. Basic SSML should be provided in the request.
*
* @param path a valid output path, including a filename and file extension.
* @param requestSSML the SSML to send. SSML elements required in order to work.
* @returns {Promise<string>} - a `Promise` with the full filepath
*/
rawToFile(path, requestSSML) {
return this._rawSSMLRequestToFile(path, requestSSML);
}
/**
* Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
*
* @param requestSSML the SSML to send. SSML elements required in order to work.
* @returns {Readable} - a `stream.Readable` with the audio data
*/
rawToStream(requestSSML) {

@@ -303,3 +331,3 @@ const { stream } = this._rawSSMLRequest(requestSSML);

this._metadataCheck();
const requestId = (0, crypto_1.randomBytes)(16).toString("hex");
const requestId = (0, randombytes_1.default)(16).toString("hex");
const request = `X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n

@@ -313,2 +341,3 @@ ` + requestSSML.trim();

destroy(error, callback) {
self._log("+_+_+_+__+_", error);
delete self._streams[requestId];

@@ -315,0 +344,0 @@ callback(error);

{
"name": "edge-tts-node",
"version": "1.3.1",
"version": "1.3.5",
"description": "An Azure Speech Service module that uses the Microsoft Edge Read Aloud API.",

@@ -40,2 +40,3 @@ "author": "Migushthe2nd <Migushthe2nd@users.noreply.github.com>",

"process": "^0.11.10",
"randombytes": "^2.1.0",
"stream-browserify": "^3.0.0",

@@ -42,0 +43,0 @@ "ws": "^8.14.1"

@@ -1,4 +0,5 @@

# MsEdgeTTS
[![npm version](https://badge.fury.io/js/msedge-tts.svg)](https://badge.fury.io/js/msedge-tts)
# edge-tts-node
[![npm version](https://badge.fury.io/js/edge-tts-node.svg)](https://badge.fury.io/js/edge-tts-node)
An simple Azure Speech Service module that uses the Microsoft Edge Read Aloud API.

@@ -31,15 +32,18 @@

```js
import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
import { MsEdgeTTS, OUTPUT_FORMAT } from "edge-tts-node";
const tts = new MsEdgeTTS();
await tts.setMetadata("en-IE-ConnorNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
await tts.setMetadata(
"en-IE-ConnorNeural",
OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
);
const readable = tts.toStream("Hi, how are you?");
readable.on("data", (data) => {
console.log("DATA RECEIVED", data);
// raw audio file data
console.log("DATA RECEIVED", data);
// raw audio file data
});
readable.on("close", () => {
console.log("STREAM CLOSED");
console.log("STREAM CLOSED");
});

@@ -51,8 +55,11 @@ ```

```js
import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
import { MsEdgeTTS, OUTPUT_FORMAT } from "edge-tts-node";
(async () => {
const tts = new MsEdgeTTS();
await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
const tts = new MsEdgeTTS();
await tts.setMetadata(
"en-US-AriaNeural",
OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
);
const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
})();

@@ -62,9 +69,17 @@ ```

### Change voice rate, pitch and volume
```js
import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
import { MsEdgeTTS, OUTPUT_FORMAT } from "edge-tts-node";
(async () => {
const tts = new MsEdgeTTS();
await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?", {rate: 0.5, pitch: "+200Hz"});
const tts = new MsEdgeTTS();
await tts.setMetadata(
"en-US-AriaNeural",
OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
);
const filePath = await tts.toFile(
"./example_audio.webm",
"Hi, how are you?",
{ rate: 0.5, pitch: "+200Hz" }
);
})();

@@ -74,11 +89,18 @@ ```

### Use an alternative HTTP Agent
Use a custom http.Agent implementation like [https-proxy-agent](https://github.com/TooTallNate/proxy-agents) or [socks-proxy-agent](https://github.com/TooTallNate/proxy-agents/tree/main/packages/socks-proxy-agent).
```js
import {SocksProxyAgent} from 'socks-proxy-agent';
import { SocksProxyAgent } from "socks-proxy-agent";
(async () => {
const agent = new SocksProxyAgent("socks://your-name%40gmail.com:abcdef12345124@br41.nordvpn.com")
const tts = new MsEdgeTTS(agent);
await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
const agent = new SocksProxyAgent(
"socks://your-name%40gmail.com:abcdef12345124@br41.nordvpn.com"
);
const tts = new MsEdgeTTS(agent);
await tts.setMetadata(
"en-US-AriaNeural",
OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
);
const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
})();

@@ -85,0 +107,0 @@ ```

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc