@@ -55,2 +55,4 @@ /// <reference types="node" />
		private readonly _agent;
		private _arraybuffer;
		private state;
		private _log;
		@@ -66,55 +68,13 @@ /**
		private _initClient;
		private _pushData;
		private _pushAudioData;
		private _SSMLTemplate;
		/**
		* Fetch the list of voices available in Microsoft Edge.
		* These, however, are not all. The complete list of voices supported by this module [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) (neural, standard, and preview).
		*/
		getVoices(): Promise<Voice[]>;
		/**
		* Sets the required information for the speech to be synthesised and inits a new WebSocket connection.
		* Must be called at least once before text can be synthesised.
		* Saved in this instance. Can be called at any time times to update the metadata.
		*
		* @param voiceName a string with any `ShortName`. A list of all available neural voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices). However, it is not limited to neural voices: standard voices can also be used. A list of standard voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#standard-voices)
		* @param outputFormat any {@link OUTPUT_FORMAT}
		* @param voiceLocale (optional) any voice locale that is supported by the voice. See the list of all voices for compatibility. If not provided, the locale will be inferred from the `voiceName`
		*/
		setConfig(conf: any): void;
		setMetadata(voiceName: string, outputFormat: OUTPUT_FORMAT, voiceLocale?: string): Promise<void>;
		private _metadataCheck;
		/**
		* Close the WebSocket connection.
		*/
		close(): void;
		/**
		* Writes raw audio synthesised from text to a file. Uses a basic {@link _SSMLTemplate SML template}.
		*
		* @param path a valid output path, including a filename and file extension.
		* @param input the input to synthesise
		* @param options (optional) {@link ProsodyOptions}
		* @returns {Promise<string>} - a `Promise` with the full filepath
		*/
		toFile(path: string, input: string, options?: ProsodyOptions): Promise<string>;
		/**
		* Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
		*
		* @param input the text to synthesise. Can include SSML elements.
		* @param options (optional) {@link ProsodyOptions}
		* @returns {Readable} - a `stream.Readable` with the audio data
		*/
		toStream(input: string, options?: ProsodyOptions): Readable;
		/**
		* Writes raw audio synthesised from text to a file. Has no SSML template. Basic SSML should be provided in the request.
		*
		* @param path a valid output path, including a filename and file extension.
		* @param requestSSML the SSML to send. SSML elements required in order to work.
		* @returns {Promise<string>} - a `Promise` with the full filepath
		*/
		rawToFile(path: string, requestSSML: string): Promise<string>;
		/**
		* Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
		*
		* @param requestSSML the SSML to send. SSML elements required in order to work.
		* @returns {Readable} - a `stream.Readable` with the audio data
		*/
		rawToStream(requestSSML: string): Readable;
		@@ -121,0 +81,0 @@ private _rawSSMLRequestToFile;

116

dist/MsEdgeTTS.js

		@@ -37,2 +37,3 @@ "use strict";
		const fs = __importStar(require("fs"));
		const utils_1 = require("./utils");
		class ProsodyOptions {
		@@ -60,3 +61,3 @@ /**
		class MsEdgeTTS {
		static wordBoundaryEnabled = false;
		static wordBoundaryEnabled = true;
		static OUTPUT_FORMAT = OUTPUT_FORMAT_1.OUTPUT_FORMAT;
		@@ -77,2 +78,7 @@ static TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
		_agent;
		_arraybuffer;
		state = {
		offsetCompensation: 0,
		lastDurationOffset: 0
		};
		_log(...o) {
		@@ -110,3 +116,4 @@ if (this._enableLogger) {
		: new isomorphic_ws_1.default(MsEdgeTTS.SYNTH_URL, { agent: this._agent });
		this._ws.binaryType = "arraybuffer";
		if (this._arraybuffer)
		this._ws.binaryType = "arraybuffer";
		return new Promise((resolve, reject) => {
		@@ -132,20 +139,43 @@ this._ws.onopen = () => {
		this._ws.onmessage = (m) => {
		const buffer = buffer_1.Buffer.from(m.data);
		const message = buffer.toString();
		const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)[1];
		if (message.includes("Path:turn.start")) {
		// start of turn, ignore
		if (typeof m.data === 'string') {
		const encodedData = buffer_1.Buffer.from(m.data, 'utf8');
		const message = encodedData.toString();
		const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)[1];
		let [headers, data] = (0, utils_1.getHeadersAndData)(encodedData, encodedData.indexOf("\r\n\r\n"));
		const path = headers['Path'];
		if (path === "audio.metadata") {
		let parsedMetadata = (0, utils_1.parseMetadata)(data, this.state["offsetCompensation"]);
		this._pushData(parsedMetadata, requestId);
		// 更新上一次的持续时间偏移量，用于下一次 SSML 请求
		this.state["lastDurationOffset"] = parsedMetadata["offset"] + parsedMetadata["duration"];
		}
		else if (path === "turn.end") {
		this.state["offsetCompensation"] = this.state["lastDurationOffset"];
		this.state["offsetCompensation"] += 8750000;
		}
		else if (path !== "response" && path !== "turn.start") {
		// 如果路径不是 "response" 或 "turn.start"
		throw new Error("Unknown path received"); // 抛出未知响应错误
		}
		}
		else if (message.includes("Path:turn.end")) {
		// end of turn, close stream
		this._streams[requestId].push(null);
		}
		else if (message.includes("Path:response")) {
		// context response, ignore
		}
		else if (message.includes("Path:audio") && m.data instanceof ArrayBuffer) {
		this._pushAudioData(buffer, requestId);
		}
		else {
		this._log("UNKNOWN MESSAGE", message);
		const buffer = buffer_1.Buffer.from(m.data);
		const message = buffer.toString();
		const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)[1];
		if (message.includes("Path:turn.start")) {
		// start of turn, ignore
		}
		else if (message.includes("Path:turn.end")) {
		// end of turn, close stream
		this._streams[requestId].push(null);
		}
		else if (message.includes("Path:response")) {
		// context response, ignore
		}
		else if (message.includes("Path:audio") && m.data instanceof ArrayBuffer) {
		this._pushAudioData(buffer, requestId);
		}
		else {
		this._log("UNKNOWN MESSAGE", message);
		}
		}
		@@ -164,2 +194,5 @@ };
		}
		_pushData(data, requestId) {
		this._streams[requestId].push(data);
		}
		_pushAudioData(audioBuffer, requestId) {
		@@ -182,6 +215,2 @@ const audioStartIndex = audioBuffer.indexOf(MsEdgeTTS.BINARY_DELIM) + MsEdgeTTS.BINARY_DELIM.length;
		}
		/**
		* Fetch the list of voices available in Microsoft Edge.
		* These, however, are not all. The complete list of voices supported by this module [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) (neural, standard, and preview).
		*/
		getVoices() {
		@@ -194,11 +223,5 @@ return new Promise((resolve, reject) => {
		}
		/**
		* Sets the required information for the speech to be synthesised and inits a new WebSocket connection.
		* Must be called at least once before text can be synthesised.
		* Saved in this instance. Can be called at any time times to update the metadata.
		*
		* @param voiceName a string with any `ShortName`. A list of all available neural voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices). However, it is not limited to neural voices: standard voices can also be used. A list of standard voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#standard-voices)
		* @param outputFormat any {@link OUTPUT_FORMAT}
		* @param voiceLocale (optional) any voice locale that is supported by the voice. See the list of all voices for compatibility. If not provided, the locale will be inferred from the `voiceName`
		*/
		setConfig(conf) {
		this._arraybuffer = conf["arraybuffer"] ?? false;
		}
		async setMetadata(voiceName, outputFormat, voiceLocale) {
		@@ -230,26 +253,8 @@ const oldVoice = this._voice;
		}
		/**
		* Close the WebSocket connection.
		*/
		close() {
		this._ws.close();
		}
		/**
		* Writes raw audio synthesised from text to a file. Uses a basic {@link _SSMLTemplate SML template}.
		*
		* @param path a valid output path, including a filename and file extension.
		* @param input the input to synthesise
		* @param options (optional) {@link ProsodyOptions}
		* @returns {Promise<string>} - a `Promise` with the full filepath
		*/
		toFile(path, input, options) {
		return this._rawSSMLRequestToFile(path, this._SSMLTemplate(input, options));
		}
		/**
		* Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
		*
		* @param input the text to synthesise. Can include SSML elements.
		* @param options (optional) {@link ProsodyOptions}
		* @returns {Readable} - a `stream.Readable` with the audio data
		*/
		toStream(input, options) {
		@@ -259,18 +264,5 @@ const { stream } = this._rawSSMLRequest(this._SSMLTemplate(input, options));
		}
		/**
		* Writes raw audio synthesised from text to a file. Has no SSML template. Basic SSML should be provided in the request.
		*
		* @param path a valid output path, including a filename and file extension.
		* @param requestSSML the SSML to send. SSML elements required in order to work.
		* @returns {Promise<string>} - a `Promise` with the full filepath
		*/
		rawToFile(path, requestSSML) {
		return this._rawSSMLRequestToFile(path, requestSSML);
		}
		/**
		* Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
		*
		* @param requestSSML the SSML to send. SSML elements required in order to work.
		* @returns {Readable} - a `stream.Readable` with the audio data
		*/
		rawToStream(requestSSML) {
		@@ -277,0 +269,0 @@ const { stream } = this._rawSSMLRequest(requestSSML);

package.json

		{
		"name": "edge-tts-node",
		"version": "1.0.1",
		"version": "1.0.5",
		"description": "An Azure Speech Service module that uses the Microsoft Edge Read Aloud API.",
		@@ -5,0 +5,0 @@ "author": "Migushthe2nd <Migushthe2nd@users.noreply.github.com>",

README.md

		@@ -1,4 +0,5 @@
		# MsEdgeTTS
		[![npm version](https://badge.fury.io/js/msedge-tts.svg)](https://badge.fury.io/js/msedge-tts)
		# edge-tts-node

		[![npm version](https://badge.fury.io/js/edge-tts-node.svg)](https://badge.fury.io/js/edge-tts-node)

		An simple Azure Speech Service module that uses the Microsoft Edge Read Aloud API.
		@@ -31,15 +32,18 @@
		```js
		import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
		import { MsEdgeTTS, OUTPUT_FORMAT } from "edge-tts-node";

		const tts = new MsEdgeTTS();
		await tts.setMetadata("en-IE-ConnorNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
		await tts.setMetadata(
		"en-IE-ConnorNeural",
		OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
		);
		const readable = tts.toStream("Hi, how are you?");

		readable.on("data", (data) => {
		console.log("DATA RECEIVED", data);
		// raw audio file data
		console.log("DATA RECEIVED", data);
		// raw audio file data
		});

		readable.on("close", () => {
		console.log("STREAM CLOSED");
		console.log("STREAM CLOSED");
		});
		@@ -51,8 +55,11 @@ ```
		```js
		import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
		import { MsEdgeTTS, OUTPUT_FORMAT } from "edge-tts-node";

		(async () => {
		const tts = new MsEdgeTTS();
		await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
		const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
		const tts = new MsEdgeTTS();
		await tts.setMetadata(
		"en-US-AriaNeural",
		OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
		);
		const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
		})();
		@@ -62,9 +69,17 @@ ```
		### Change voice rate, pitch and volume

		```js
		import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
		import { MsEdgeTTS, OUTPUT_FORMAT } from "edge-tts-node";

		(async () => {
		const tts = new MsEdgeTTS();
		await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
		const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?", {rate: 0.5, pitch: "+200Hz"});
		const tts = new MsEdgeTTS();
		await tts.setMetadata(
		"en-US-AriaNeural",
		OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
		);
		const filePath = await tts.toFile(
		"./example_audio.webm",
		"Hi, how are you?",
		{ rate: 0.5, pitch: "+200Hz" }
		);
		})();
		@@ -74,11 +89,18 @@ ```
		### Use an alternative HTTP Agent

		Use a custom http.Agent implementation like [https-proxy-agent](https://github.com/TooTallNate/proxy-agents) or [socks-proxy-agent](https://github.com/TooTallNate/proxy-agents/tree/main/packages/socks-proxy-agent).

		```js
		import {SocksProxyAgent} from 'socks-proxy-agent';
		import { SocksProxyAgent } from "socks-proxy-agent";

		(async () => {
		const agent = new SocksProxyAgent("socks://your-name%40gmail.com:abcdef12345124@br41.nordvpn.com")
		const tts = new MsEdgeTTS(agent);
		await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
		const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
		const agent = new SocksProxyAgent(
		"socks://your-name%40gmail.com:abcdef12345124@br41.nordvpn.com"
		);
		const tts = new MsEdgeTTS(agent);
		await tts.setMetadata(
		"en-US-AriaNeural",
		OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
		);
		const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
		})();
		@@ -85,0 +107,0 @@ ```

edge-tts-node - npm Package Compare versions

Improved metrics

Worsened metrics