# @livekit/agents

		## 0.4.4

		### Patch Changes

		- add ChunkedStream, openai.TTS - [#155](https://github.com/livekit/agents-js/pull/155) ([@nbsp](https://github.com/nbsp))

		- feat(stt): implement StreamAdapter - [#156](https://github.com/livekit/agents-js/pull/156) ([@nbsp](https://github.com/nbsp))

		- export VPAEvent not as type - [#161](https://github.com/livekit/agents-js/pull/161) ([@nbsp](https://github.com/nbsp))

		- add tts.StreamAdapter - [#156](https://github.com/livekit/agents-js/pull/156) ([@nbsp](https://github.com/nbsp))

		## 0.4.3
		@@ -4,0 +16,0 @@

dist/pipeline/agent_output.js

		@@ -110,4 +110,5 @@ import { log } from '../log.js';
		for await (const audio of ttsStream) {
		if (cancelled \|\| audio === SynthesizeStream.END_OF_STREAM)
		if (cancelled \|\| audio === SynthesizeStream.END_OF_STREAM) {
		break;
		}
		handle.queue.put(audio.frame);
		@@ -128,3 +129,2 @@ }
		const readGeneratedAudio = async () => {
		let started = false;
		for await (const audio of ttsStream) {
		@@ -134,11 +134,5 @@ if (cancelled)
		if (audio === SynthesizeStream.END_OF_STREAM) {
		if (started) {
		break;
		}
		else {
		continue;
		}
		break;
		}
		handle.queue.put(audio.frame);
		started = true;
		}
		@@ -145,0 +139,0 @@ handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);

dist/pipeline/agent_playout.js

		@@ -107,4 +107,5 @@ import EventEmitter from 'node:events';
		for await (const frame of handle.playoutSource) {
		if (cancelled \|\| frame === SynthesisHandle.FLUSH_SENTINEL)
		if (cancelled \|\| frame === SynthesisHandle.FLUSH_SENTINEL) {
		break;
		}
		if (firstFrame) {
		@@ -111,0 +112,0 @@ this.#logger

dist/pipeline/index.d.ts

		@@ -1,2 +0,2 @@
		export { type AgentState, type BeforeTTSCallback, type BeforeLLMCallback, type VPAEvent, type VPACallbacks, type AgentCallContext, type AgentTranscriptionOptions, type VPAOptions, VoicePipelineAgent, } from './pipeline_agent.js';
		export { type AgentState, type BeforeTTSCallback, type BeforeLLMCallback, type VPACallbacks, type AgentCallContext, type AgentTranscriptionOptions, type VPAOptions, VPAEvent, VoicePipelineAgent, } from './pipeline_agent.js';
		//# sourceMappingURL=index.d.ts.map

dist/pipeline/index.js

		// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
		//
		// SPDX-License-Identifier: Apache-2.0
		export { VoicePipelineAgent, } from './pipeline_agent.js';
		export { VPAEvent, VoicePipelineAgent, } from './pipeline_agent.js';
		//# sourceMappingURL=index.js.map

dist/pipeline/pipeline_agent.d.ts

		@@ -6,3 +6,3 @@ import type { RemoteParticipant, Room } from '@livekit/rtc-node';
		import { ChatContext, ChatMessage } from '../llm/index.js';
		import type { STT } from '../stt/index.js';
		import { type STT } from '../stt/index.js';
		import type { SentenceTokenizer, WordTokenizer } from '../tokenize/tokenizer.js';
		@@ -9,0 +9,0 @@ import type { TTS } from '../tts/index.js';

dist/pipeline/pipeline_agent.js

		@@ -7,3 +7,5 @@ var _a;
		import { log } from '../log.js';
		import { StreamAdapter as STTStreamAdapter } from '../stt/index.js';
		import { SentenceTokenizer as BasicSentenceTokenizer, WordTokenizer as BasicWordTokenizer, hyphenateWord, } from '../tokenize/basic/index.js';
		import { StreamAdapter as TTSStreamAdapter } from '../tts/index.js';
		import { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';
		@@ -121,2 +123,8 @@ import { AgentOutput } from './agent_output.js';
		this.#opts = { ...defaultVPAOptions, ...opts };
		if (!stt.capabilities.streaming) {
		stt = new STTStreamAdapter(stt, vad);
		}
		if (!tts.capabilities.streaming) {
		tts = new TTSStreamAdapter(tts, new BasicSentenceTokenizer());
		}
		this.#vad = vad;
		@@ -394,2 +402,3 @@ this.#stt = stt;
		this.#transcribedText = this.#transcribedText.slice(userQuestion.length);
		handle.markUserCommitted();
		};
		@@ -417,3 +426,3 @@ // wait for the playHandle to finish and check every 1s if user question should be committed
		if (isUsingTools && !interrupted) {
		if (!userQuestion \|\| handle.userCommitted) {
		if (!userQuestion \|\| !handle.userCommitted) {
		throw new Error('user speech should have been committed before using tools');
		@@ -590,3 +599,3 @@ }
		log()
		.child({ speechId, elapsed: Math.round(Date.now() * 1000 - startTime) / 1000 })
		.child({ speechId, elapsed: Math.round(Date.now() - startTime) })
		.debug('received first LLM token');
		@@ -593,0 +602,0 @@ }

dist/stt/index.d.ts

		export { type SpeechEvent, type SpeechData, type STTCapabilities, SpeechEventType, STT, SpeechStream, } from './stt.js';
		export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js';
		//# sourceMappingURL=index.d.ts.map

dist/stt/index.js

		@@ -5,2 +5,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
		export { SpeechEventType, STT, SpeechStream, } from './stt.js';
		export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js';
		//# sourceMappingURL=index.js.map

dist/stt/stt.d.ts

		import type { AudioFrame } from '@livekit/rtc-node';
		import type { AudioBuffer } from '../utils.js';
		import { AsyncIterableQueue } from '../utils.js';
		@@ -61,2 +62,4 @@ /** Indicates start/middle/end of speech */
		get capabilities(): STTCapabilities;
		/** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */
		abstract recognize(frame: AudioBuffer): Promise<SpeechEvent>;
		/**
		@@ -63,0 +66,0 @@ * Returns a {@link SpeechStream} that can be used to push audio frames and receive

dist/tokenize/token_stream.d.ts

		@@ -25,2 +25,3 @@ import { AsyncIterableQueue } from '../utils.js';
		pushText(text: string): void;
		flush(): void;
		close(): void;
		@@ -27,0 +28,0 @@ next(): Promise<IteratorResult<TokenData>>;

dist/tokenize/token_stream.js

		@@ -33,3 +33,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
		const tokens = this.#func(this.#inBuf);
		if (tokens.length === 0)
		if (tokens.length <= 1)
		break;
		@@ -112,2 +112,5 @@ if (this.#outBuf)
		}
		flush() {
		this.#stream.flush();
		}
		close() {
		@@ -114,0 +117,0 @@ super.close();

dist/tts/index.d.ts

		@@ -1,2 +0,3 @@
		export { type SynthesizedAudio, type TTSCapabilities, TTS, SynthesizeStream } from './tts.js';
		export { type SynthesizedAudio, type TTSCapabilities, TTS, SynthesizeStream, ChunkedStream, } from './tts.js';
		export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js';
		//# sourceMappingURL=index.d.ts.map

dist/tts/index.js

		// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
		//
		// SPDX-License-Identifier: Apache-2.0
		export { TTS, SynthesizeStream } from './tts.js';
		export { TTS, SynthesizeStream, ChunkedStream, } from './tts.js';
		export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js';
		//# sourceMappingURL=index.js.map

dist/tts/tts.d.ts

		@@ -41,2 +41,6 @@ import type { AudioFrame } from '@livekit/rtc-node';
		/**
		* Receives text and returns synthesis in the form of a {@link ChunkedStream}
		*/
		abstract synthesize(text: string): ChunkedStream;
		/**
		* Returns a {@link SynthesizeStream} that can be used to push text and receive audio data
		@@ -77,2 +81,26 @@ */
		}
		/**
		* An instance of a text-to-speech response, as an asynchronous iterable iterator.
		*
		* @example Looping through frames
		* ```ts
		* for await (const event of stream) {
		* await source.captureFrame(event.frame);
		* }
		* ```
		*
		* @remarks
		* This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that
		* exports its own child ChunkedStream class, which inherits this class's methods.
		*/
		export declare abstract class ChunkedStream implements AsyncIterableIterator<SynthesizedAudio> {
		protected queue: AsyncIterableQueue<SynthesizedAudio>;
		protected closed: boolean;
		/** Collect every frame into one in a single call */
		collect(): Promise<AudioFrame>;
		next(): Promise<IteratorResult<SynthesizedAudio>>;
		/** Close both the input and output of the TTS stream */
		close(): void;
		[Symbol.asyncIterator](): ChunkedStream;
		}
		//# sourceMappingURL=tts.d.ts.map

dist/tts/tts.js

		@@ -1,2 +0,2 @@
		import { AsyncIterableQueue } from '../utils.js';
		import { AsyncIterableQueue, mergeFrames } from '../utils.js';
		/**
		@@ -94,2 +94,39 @@ * An instance of a text-to-speech adapter.
		}
		/**
		* An instance of a text-to-speech response, as an asynchronous iterable iterator.
		*
		* @example Looping through frames
		* ```ts
		* for await (const event of stream) {
		* await source.captureFrame(event.frame);
		* }
		* ```
		*
		* @remarks
		* This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that
		* exports its own child ChunkedStream class, which inherits this class's methods.
		*/
		export class ChunkedStream {
		queue = new AsyncIterableQueue();
		closed = false;
		/** Collect every frame into one in a single call */
		async collect() {
		const frames = [];
		for await (const event of this) {
		frames.push(event.frame);
		}
		return mergeFrames(frames);
		}
		next() {
		return this.queue.next();
		}
		/** Close both the input and output of the TTS stream */
		close() {
		this.queue.close();
		this.closed = true;
		}
		[Symbol.asyncIterator]() {
		return this;
		}
		}
		//# sourceMappingURL=tts.js.map

package.json

		{
		"name": "@livekit/agents",
		"version": "0.4.3",
		"version": "0.4.4",
		"description": "LiveKit Agents - Node.js",
		@@ -5,0 +5,0 @@ "main": "dist/index.js",

src/pipeline/agent_output.ts

		@@ -137,3 +137,5 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
		for await (const audio of ttsStream) {
		if (cancelled \|\| audio === SynthesizeStream.END_OF_STREAM) break;
		if (cancelled \|\| audio === SynthesizeStream.END_OF_STREAM) {
		break;
		}
		handle.queue.put(audio.frame);
		@@ -160,14 +162,8 @@ }
		const readGeneratedAudio = async () => {
		let started = false;
		for await (const audio of ttsStream) {
		if (cancelled) break;
		if (audio === SynthesizeStream.END_OF_STREAM) {
		if (started) {
		break;
		} else {
		continue;
		}
		break;
		}
		handle.queue.put(audio.frame);
		started = true;
		}
		@@ -174,0 +170,0 @@ handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);

src/pipeline/agent_playout.ts

		@@ -151,3 +151,5 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
		for await (const frame of handle.playoutSource) {
		if (cancelled \|\| frame === SynthesisHandle.FLUSH_SENTINEL) break;
		if (cancelled \|\| frame === SynthesisHandle.FLUSH_SENTINEL) {
		break;
		}
		if (firstFrame) {
		@@ -154,0 +156,0 @@ this.#logger

src/pipeline/index.ts

		@@ -9,3 +9,2 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
		type BeforeLLMCallback,
		type VPAEvent,
		type VPACallbacks,
		@@ -15,3 +14,4 @@ type AgentCallContext,
		type VPAOptions,
		VPAEvent,
		VoicePipelineAgent,
		} from './pipeline_agent.js';

src/pipeline/pipeline_agent.ts

		@@ -23,3 +23,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
		import { log } from '../log.js';
		import type { STT } from '../stt/index.js';
		import { type STT, StreamAdapter as STTStreamAdapter } from '../stt/index.js';
		import {
		@@ -32,2 +32,3 @@ SentenceTokenizer as BasicSentenceTokenizer,
		import type { TTS } from '../tts/index.js';
		import { StreamAdapter as TTSStreamAdapter } from '../tts/index.js';
		import { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';
		@@ -257,2 +258,10 @@ import type { VAD, VADEvent } from '../vad.js';

		if (!stt.capabilities.streaming) {
		stt = new STTStreamAdapter(stt, vad);
		}

		if (!tts.capabilities.streaming) {
		tts = new TTSStreamAdapter(tts, new BasicSentenceTokenizer());
		}

		this.#vad = vad;
		@@ -599,2 +608,3 @@ this.#stt = stt;
		this.#transcribedText = this.#transcribedText.slice(userQuestion.length);
		handle.markUserCommitted();
		};
		@@ -625,3 +635,3 @@
		if (isUsingTools && !interrupted) {
		if (!userQuestion \|\| handle.userCommitted) {
		if (!userQuestion \|\| !handle.userCommitted) {
		throw new Error('user speech should have been committed before using tools');
		@@ -831,3 +841,3 @@ }
		log()
		.child({ speechId, elapsed: Math.round(Date.now() * 1000 - startTime) / 1000 })
		.child({ speechId, elapsed: Math.round(Date.now() - startTime) })
		.debug('received first LLM token');
		@@ -834,0 +844,0 @@ }

src/stt/index.ts

		@@ -13,1 +13,2 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
		} from './stt.js';
		export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js';

src/stt/stt.ts

		@@ -5,2 +5,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
		import type { AudioFrame } from '@livekit/rtc-node';
		import type { AudioBuffer } from '../utils.js';
		import { AsyncIterableQueue } from '../utils.js';
		@@ -77,2 +78,5 @@

		/** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */
		abstract recognize(frame: AudioBuffer): Promise<SpeechEvent>;

		/**
		@@ -79,0 +83,0 @@ * Returns a {@link SpeechStream} that can be used to push audio frames and receive

src/tokenize/token_stream.ts

		@@ -42,3 +42,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
		const tokens = this.#func(this.#inBuf);
		if (tokens.length === 0) break;
		if (tokens.length <= 1) break;

		@@ -134,2 +134,6 @@ if (this.#outBuf) this.#outBuf += ' ';

		flush() {
		this.#stream.flush();
		}

		close() {
		@@ -136,0 +140,0 @@ super.close();

src/tts/index.ts

		// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
		//
		// SPDX-License-Identifier: Apache-2.0
		export { type SynthesizedAudio, type TTSCapabilities, TTS, SynthesizeStream } from './tts.js';
		export {
		type SynthesizedAudio,
		type TTSCapabilities,
		TTS,
		SynthesizeStream,
		ChunkedStream,
		} from './tts.js';
		export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js';

src/tts/tts.ts

		@@ -5,3 +5,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
		import type { AudioFrame } from '@livekit/rtc-node';
		import { AsyncIterableQueue } from '../utils.js';
		import { AsyncIterableQueue, mergeFrames } from '../utils.js';

		@@ -65,2 +65,7 @@ /** SynthesizedAudio is a packet of speech synthesis as returned by the TTS. */
		/**
		* Receives text and returns synthesis in the form of a {@link ChunkedStream}
		*/
		abstract synthesize(text: string): ChunkedStream;

		/**
		* Returns a {@link SynthesizeStream} that can be used to push text and receive audio data
		@@ -144,1 +149,43 @@ */
		}

		/**
		* An instance of a text-to-speech response, as an asynchronous iterable iterator.
		*
		* @example Looping through frames
		* ```ts
		* for await (const event of stream) {
		* await source.captureFrame(event.frame);
		* }
		* ```
		*
		* @remarks
		* This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that
		* exports its own child ChunkedStream class, which inherits this class's methods.
		*/
		export abstract class ChunkedStream implements AsyncIterableIterator<SynthesizedAudio> {
		protected queue = new AsyncIterableQueue<SynthesizedAudio>();
		protected closed = false;

		/** Collect every frame into one in a single call */
		async collect(): Promise<AudioFrame> {
		const frames = [];
		for await (const event of this) {
		frames.push(event.frame);
		}
		return mergeFrames(frames);
		}

		next(): Promise<IteratorResult<SynthesizedAudio>> {
		return this.queue.next();
		}

		/** Close both the input and output of the TTS stream */
		close() {
		this.queue.close();
		this.closed = true;
		}

		[Symbol.asyncIterator](): ChunkedStream {
		return this;
		}
		}

.turbo/turbo-build.log