@@ -1,3 +0,1 @@
		/// <reference types="node" />
		/// <reference types="node" />
		import { OUTPUT_FORMAT } from "./OUTPUT_FORMAT";
		@@ -41,3 +39,3 @@ import { Readable } from "stream";
		static OUTPUT_FORMAT: typeof OUTPUT_FORMAT;
		private static TRUSTED_CLIENT_TOKEN;
		static TRUSTED_CLIENT_TOKEN: string;
		private static VOICES_URL;
		@@ -56,2 +54,4 @@ private static SYNTH_URL;
		private readonly _agent;
		private _arraybuffer;
		private state;
		private _log;
		@@ -67,55 +67,13 @@ /**
		private _initClient;
		private _pushData;
		private _pushAudioData;
		private _SSMLTemplate;
		/**
		* Fetch the list of voices available in Microsoft Edge.
		* These, however, are not all. The complete list of voices supported by this module [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) (neural, standard, and preview).
		*/
		getVoices(): Promise<Voice[]>;
		/**
		* Sets the required information for the speech to be synthesised and inits a new WebSocket connection.
		* Must be called at least once before text can be synthesised.
		* Saved in this instance. Can be called at any time times to update the metadata.
		*
		* @param voiceName a string with any `ShortName`. A list of all available neural voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices). However, it is not limited to neural voices: standard voices can also be used. A list of standard voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#standard-voices)
		* @param outputFormat any {@link OUTPUT_FORMAT}
		* @param voiceLocale (optional) any voice locale that is supported by the voice. See the list of all voices for compatibility. If not provided, the locale will be inferred from the `voiceName`
		*/
		setConfig(conf: any): void;
		setMetadata(voiceName: string, outputFormat: OUTPUT_FORMAT, voiceLocale?: string): Promise<void>;
		private _metadataCheck;
		/**
		* Close the WebSocket connection.
		*/
		close(): void;
		/**
		* Writes raw audio synthesised from text to a file. Uses a basic {@link _SSMLTemplate SML template}.
		*
		* @param path a valid output path, including a filename and file extension.
		* @param input the input to synthesise
		* @param options (optional) {@link ProsodyOptions}
		* @returns {Promise<string>} - a `Promise` with the full filepath
		*/
		toFile(path: string, input: string, options?: ProsodyOptions): Promise<string>;
		/**
		* Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
		*
		* @param input the text to synthesise. Can include SSML elements.
		* @param options (optional) {@link ProsodyOptions}
		* @returns {Readable} - a `stream.Readable` with the audio data
		*/
		toStream(input: string, options?: ProsodyOptions): Readable;
		/**
		* Writes raw audio synthesised from text to a file. Has no SSML template. Basic SSML should be provided in the request.
		*
		* @param path a valid output path, including a filename and file extension.
		* @param requestSSML the SSML to send. SSML elements required in order to work.
		* @returns {Promise<string>} - a `Promise` with the full filepath
		*/
		rawToFile(path: string, requestSSML: string): Promise<string>;
		/**
		* Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
		*
		* @param requestSSML the SSML to send. SSML elements required in order to work.
		* @returns {Readable} - a `stream.Readable` with the audio data
		*/
		rawToStream(requestSSML: string): Readable;
		@@ -122,0 +80,0 @@ private _rawSSMLRequestToFile;

159

dist/MsEdgeTTS.js

		@@ -33,6 +33,7 @@ "use strict";
		const buffer_1 = require("buffer");
		const crypto_1 = require("crypto");
		const randombytes_1 = __importDefault(require("randombytes"));
		const OUTPUT_FORMAT_1 = require("./OUTPUT_FORMAT");
		const stream_1 = require("stream");
		const fs = __importStar(require("fs"));
		const utils_1 = require("./utils");
		class ProsodyOptions {
		@@ -60,3 +61,3 @@ /**
		class MsEdgeTTS {
		static wordBoundaryEnabled = false;
		static wordBoundaryEnabled = true;
		static OUTPUT_FORMAT = OUTPUT_FORMAT_1.OUTPUT_FORMAT;
		@@ -77,4 +78,10 @@ static TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
		_agent;
		_arraybuffer = false;
		state = {
		offsetCompensation: 0,
		lastDurationOffset: 0
		};
		_log(...o) {
		if (this._enableLogger) {
		o.unshift('edgetts:');
		console.log(...o);
		@@ -103,3 +110,3 @@ }
		this._ws.send(message, () => {
		this._log("<- sent message: ", message);
		//this._log("<- sent message: ", message);
		});
		@@ -111,3 +118,4 @@ }
		: new isomorphic_ws_1.default(MsEdgeTTS.SYNTH_URL, { agent: this._agent });
		this._ws.binaryType = "arraybuffer";
		if (this._arraybuffer)
		this._ws.binaryType = "arraybuffer";
		return new Promise((resolve, reject) => {
		@@ -133,20 +141,77 @@ this._ws.onopen = () => {
		this._ws.onmessage = (m) => {
		const buffer = buffer_1.Buffer.from(m.data);
		const message = buffer.toString();
		const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)[1];
		if (message.includes("Path:turn.start")) {
		// start of turn, ignore
		this._log("type:::::::: ", typeof m.data);
		let mdata = m.data;
		if (typeof mdata === 'string') {
		const encodedData = buffer_1.Buffer.from(mdata, 'utf8');
		const message = mdata;
		const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)[1];
		let [headers, data] = (0, utils_1.getHeadersAndData)(encodedData, encodedData.indexOf("\r\n\r\n"));
		const path = headers['Path'];
		if (path === "audio.metadata") {
		let parsedMetadata = (0, utils_1.parseMetadata)(data, this.state["offsetCompensation"]);
		this._pushData(parsedMetadata, requestId);
		// 更新上一次的持续时间偏移量，用于下一次 SSML 请求
		this.state["lastDurationOffset"] = parsedMetadata["offset"] + parsedMetadata["duration"];
		}
		else if (path === "turn.end") {
		this.state["offsetCompensation"] = this.state["lastDurationOffset"];
		this.state["offsetCompensation"] += 8750000;
		}
		else if (path !== "response" && path !== "turn.start") {
		// 如果路径不是 "response" 或 "turn.start"
		throw new Error("Unknown path received"); // 抛出未知响应错误
		}
		}
		else if (message.includes("Path:turn.end")) {
		// end of turn, close stream
		this._streams[requestId].push(null);
		else if (buffer_1.Buffer.isBuffer(mdata)) {
		const message = mdata.toString();
		const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)[1];
		const headerLength = mdata.readUInt16BE(0);
		if (headerLength > mdata.length) {
		throw new Error("The header length is greater than the length of the data.");
		}
		// Parse the headers and data from the binary message.
		let [headers, data] = (0, utils_1.getHeadersAndData)(mdata, headerLength);
		if (headers['Path'] !== 'audio') {
		throw new Error("Received binary message, but the path is not audio.");
		}
		const contentType = headers['Content-Type'];
		if (contentType !== 'audio/mpeg' && contentType !== undefined) {
		throw new Error("Received binary message, but with an unexpected Content-Type.");
		}
		// We only allow no Content-Type if there is no data.
		if (contentType === undefined) {
		if (data.length === 0) {
		return;
		}
		// If the data is not empty, then we need to raise an exception.
		throw new Error("Received binary message with no Content-Type, but with data.");
		}
		// If the data is empty now, then we need to raise an exception.
		if (data.length === 0) {
		throw new Error("Received binary message, but it is missing the audio data.");
		}
		this._pushData({ type: "audio", data: data }, requestId);
		}
		else if (message.includes("Path:response")) {
		// context response, ignore
		}
		else if (message.includes("Path:audio") && m.data instanceof ArrayBuffer) {
		this._pushAudioData(buffer, requestId);
		}
		else {
		this._log("UNKNOWN MESSAGE", message);
		mdata = buffer_1.Buffer.isBuffer(mdata) ? mdata : mdata['data'];
		const buffer = buffer_1.Buffer.from(mdata);
		const message = buffer.toString();
		const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)[1];
		this._log(message.includes("Path:audio"), buffer_1.Buffer.isBuffer(mdata), mdata instanceof ArrayBuffer);
		if (message.includes("Path:turn.start")) {
		// start of turn, ignore
		}
		else if (message.includes("Path:turn.end")) {
		// end of turn, close stream
		this._streams[requestId].push(null);
		}
		else if (message.includes("Path:response")) {
		// context response, ignore
		}
		else if (message.includes("Path:audio") && buffer_1.Buffer.isBuffer(mdata)) {
		this._pushAudioData(buffer, requestId);
		}
		else {
		//this._log("UNKNOWN MESSAGE", message);
		}
		}
		@@ -165,2 +230,6 @@ };
		}
		_pushData(data, requestId) {
		data = typeof data == "string" ? data : JSON.stringify(data);
		this._streams[requestId].push(data, 'utf8');
		}
		_pushAudioData(audioBuffer, requestId) {
		@@ -170,3 +239,3 @@ const audioStartIndex = audioBuffer.indexOf(MsEdgeTTS.BINARY_DELIM) + MsEdgeTTS.BINARY_DELIM.length;
		this._streams[requestId].push(audioData);
		this._log("received audio chunk, size: ", audioData?.length);
		this._log("_pushAudioData: received audio chunk, size: ", audioData?.length);
		}
		@@ -184,6 +253,2 @@ _SSMLTemplate(input, options = {}) {
		}
		/**
		* Fetch the list of voices available in Microsoft Edge.
		* These, however, are not all. The complete list of voices supported by this module [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) (neural, standard, and preview).
		*/
		getVoices() {
		@@ -196,11 +261,5 @@ return new Promise((resolve, reject) => {
		}
		/**
		* Sets the required information for the speech to be synthesised and inits a new WebSocket connection.
		* Must be called at least once before text can be synthesised.
		* Saved in this instance. Can be called at any time times to update the metadata.
		*
		* @param voiceName a string with any `ShortName`. A list of all available neural voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices). However, it is not limited to neural voices: standard voices can also be used. A list of standard voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#standard-voices)
		* @param outputFormat any {@link OUTPUT_FORMAT}
		* @param voiceLocale (optional) any voice locale that is supported by the voice. See the list of all voices for compatibility. If not provided, the locale will be inferred from the `voiceName`
		*/
		setConfig(conf) {
		this._arraybuffer = conf["arraybuffer"] ?? false;
		}
		async setMetadata(voiceName, outputFormat, voiceLocale) {
		@@ -232,26 +291,8 @@ const oldVoice = this._voice;
		}
		/**
		* Close the WebSocket connection.
		*/
		close() {
		this._ws.close();
		}
		/**
		* Writes raw audio synthesised from text to a file. Uses a basic {@link _SSMLTemplate SML template}.
		*
		* @param path a valid output path, including a filename and file extension.
		* @param input the input to synthesise
		* @param options (optional) {@link ProsodyOptions}
		* @returns {Promise<string>} - a `Promise` with the full filepath
		*/
		toFile(path, input, options) {
		return this._rawSSMLRequestToFile(path, this._SSMLTemplate(input, options));
		}
		/**
		* Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
		*
		* @param input the text to synthesise. Can include SSML elements.
		* @param options (optional) {@link ProsodyOptions}
		* @returns {Readable} - a `stream.Readable` with the audio data
		*/
		toStream(input, options) {
		@@ -261,18 +302,5 @@ const { stream } = this._rawSSMLRequest(this._SSMLTemplate(input, options));
		}
		/**
		* Writes raw audio synthesised from text to a file. Has no SSML template. Basic SSML should be provided in the request.
		*
		* @param path a valid output path, including a filename and file extension.
		* @param requestSSML the SSML to send. SSML elements required in order to work.
		* @returns {Promise<string>} - a `Promise` with the full filepath
		*/
		rawToFile(path, requestSSML) {
		return this._rawSSMLRequestToFile(path, requestSSML);
		}
		/**
		* Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
		*
		* @param requestSSML the SSML to send. SSML elements required in order to work.
		* @returns {Readable} - a `stream.Readable` with the audio data
		*/
		rawToStream(requestSSML) {
		@@ -303,3 +331,3 @@ const { stream } = this._rawSSMLRequest(requestSSML);
		this._metadataCheck();
		const requestId = (0, crypto_1.randomBytes)(16).toString("hex");
		const requestId = (0, randombytes_1.default)(16).toString("hex");
		const request = `X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n
		@@ -313,2 +341,3 @@ ` + requestSSML.trim();
		destroy(error, callback) {
		self._log("+_+_+_+__+_", error);
		delete self._streams[requestId];
		@@ -315,0 +344,0 @@ callback(error);

package.json

		{
		"name": "edge-tts-node",
		"version": "1.3.1",
		"version": "1.3.5",
		"description": "An Azure Speech Service module that uses the Microsoft Edge Read Aloud API.",
		@@ -40,2 +40,3 @@ "author": "Migushthe2nd <Migushthe2nd@users.noreply.github.com>",
		"process": "^0.11.10",
		"randombytes": "^2.1.0",
		"stream-browserify": "^3.0.0",
		@@ -42,0 +43,0 @@ "ws": "^8.14.1"

README.md

		@@ -1,4 +0,5 @@
		# MsEdgeTTS
		[![npm version](https://badge.fury.io/js/msedge-tts.svg)](https://badge.fury.io/js/msedge-tts)
		# edge-tts-node

		[![npm version](https://badge.fury.io/js/edge-tts-node.svg)](https://badge.fury.io/js/edge-tts-node)

		An simple Azure Speech Service module that uses the Microsoft Edge Read Aloud API.
		@@ -31,15 +32,18 @@
		```js
		import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
		import { MsEdgeTTS, OUTPUT_FORMAT } from "edge-tts-node";

		const tts = new MsEdgeTTS();
		await tts.setMetadata("en-IE-ConnorNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
		await tts.setMetadata(
		"en-IE-ConnorNeural",
		OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
		);
		const readable = tts.toStream("Hi, how are you?");

		readable.on("data", (data) => {
		console.log("DATA RECEIVED", data);
		// raw audio file data
		console.log("DATA RECEIVED", data);
		// raw audio file data
		});

		readable.on("close", () => {
		console.log("STREAM CLOSED");
		console.log("STREAM CLOSED");
		});
		@@ -51,8 +55,11 @@ ```
		```js
		import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
		import { MsEdgeTTS, OUTPUT_FORMAT } from "edge-tts-node";

		(async () => {
		const tts = new MsEdgeTTS();
		await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
		const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
		const tts = new MsEdgeTTS();
		await tts.setMetadata(
		"en-US-AriaNeural",
		OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
		);
		const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
		})();
		@@ -62,9 +69,17 @@ ```
		### Change voice rate, pitch and volume

		```js
		import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
		import { MsEdgeTTS, OUTPUT_FORMAT } from "edge-tts-node";

		(async () => {
		const tts = new MsEdgeTTS();
		await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
		const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?", {rate: 0.5, pitch: "+200Hz"});
		const tts = new MsEdgeTTS();
		await tts.setMetadata(
		"en-US-AriaNeural",
		OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
		);
		const filePath = await tts.toFile(
		"./example_audio.webm",
		"Hi, how are you?",
		{ rate: 0.5, pitch: "+200Hz" }
		);
		})();
		@@ -74,11 +89,18 @@ ```
		### Use an alternative HTTP Agent

		Use a custom http.Agent implementation like [https-proxy-agent](https://github.com/TooTallNate/proxy-agents) or [socks-proxy-agent](https://github.com/TooTallNate/proxy-agents/tree/main/packages/socks-proxy-agent).

		```js
		import {SocksProxyAgent} from 'socks-proxy-agent';
		import { SocksProxyAgent } from "socks-proxy-agent";

		(async () => {
		const agent = new SocksProxyAgent("socks://your-name%40gmail.com:abcdef12345124@br41.nordvpn.com")
		const tts = new MsEdgeTTS(agent);
		await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
		const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
		const agent = new SocksProxyAgent(
		"socks://your-name%40gmail.com:abcdef12345124@br41.nordvpn.com"
		);
		const tts = new MsEdgeTTS(agent);
		await tts.setMetadata(
		"en-US-AriaNeural",
		OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
		);
		const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
		})();
		@@ -85,0 +107,0 @@ ```

edge-tts-node - npm Package Compare versions

Improved metrics

Worsened metrics

Dependency changes