@livekit/agents
Advanced tools
Comparing version 0.4.3 to 0.4.4
# @livekit/agents | ||
## 0.4.4 | ||
### Patch Changes | ||
- add ChunkedStream, openai.TTS - [#155](https://github.com/livekit/agents-js/pull/155) ([@nbsp](https://github.com/nbsp)) | ||
- feat(stt): implement StreamAdapter - [#156](https://github.com/livekit/agents-js/pull/156) ([@nbsp](https://github.com/nbsp)) | ||
- export VPAEvent not as type - [#161](https://github.com/livekit/agents-js/pull/161) ([@nbsp](https://github.com/nbsp)) | ||
- add tts.StreamAdapter - [#156](https://github.com/livekit/agents-js/pull/156) ([@nbsp](https://github.com/nbsp)) | ||
## 0.4.3 | ||
@@ -4,0 +16,0 @@ |
@@ -110,4 +110,5 @@ import { log } from '../log.js'; | ||
for await (const audio of ttsStream) { | ||
if (cancelled || audio === SynthesizeStream.END_OF_STREAM) | ||
if (cancelled || audio === SynthesizeStream.END_OF_STREAM) { | ||
break; | ||
} | ||
handle.queue.put(audio.frame); | ||
@@ -128,3 +129,2 @@ } | ||
const readGeneratedAudio = async () => { | ||
let started = false; | ||
for await (const audio of ttsStream) { | ||
@@ -134,11 +134,5 @@ if (cancelled) | ||
if (audio === SynthesizeStream.END_OF_STREAM) { | ||
if (started) { | ||
break; | ||
} | ||
else { | ||
continue; | ||
} | ||
break; | ||
} | ||
handle.queue.put(audio.frame); | ||
started = true; | ||
} | ||
@@ -145,0 +139,0 @@ handle.queue.put(SynthesisHandle.FLUSH_SENTINEL); |
@@ -107,4 +107,5 @@ import EventEmitter from 'node:events'; | ||
for await (const frame of handle.playoutSource) { | ||
if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) | ||
if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) { | ||
break; | ||
} | ||
if (firstFrame) { | ||
@@ -111,0 +112,0 @@ this.#logger |
@@ -1,2 +0,2 @@ | ||
export { type AgentState, type BeforeTTSCallback, type BeforeLLMCallback, type VPAEvent, type VPACallbacks, type AgentCallContext, type AgentTranscriptionOptions, type VPAOptions, VoicePipelineAgent, } from './pipeline_agent.js'; | ||
export { type AgentState, type BeforeTTSCallback, type BeforeLLMCallback, type VPACallbacks, type AgentCallContext, type AgentTranscriptionOptions, type VPAOptions, VPAEvent, VoicePipelineAgent, } from './pipeline_agent.js'; | ||
//# sourceMappingURL=index.d.ts.map |
// SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
export { VoicePipelineAgent, } from './pipeline_agent.js'; | ||
export { VPAEvent, VoicePipelineAgent, } from './pipeline_agent.js'; | ||
//# sourceMappingURL=index.js.map |
@@ -6,3 +6,3 @@ import type { RemoteParticipant, Room } from '@livekit/rtc-node'; | ||
import { ChatContext, ChatMessage } from '../llm/index.js'; | ||
import type { STT } from '../stt/index.js'; | ||
import { type STT } from '../stt/index.js'; | ||
import type { SentenceTokenizer, WordTokenizer } from '../tokenize/tokenizer.js'; | ||
@@ -9,0 +9,0 @@ import type { TTS } from '../tts/index.js'; |
@@ -7,3 +7,5 @@ var _a; | ||
import { log } from '../log.js'; | ||
import { StreamAdapter as STTStreamAdapter } from '../stt/index.js'; | ||
import { SentenceTokenizer as BasicSentenceTokenizer, WordTokenizer as BasicWordTokenizer, hyphenateWord, } from '../tokenize/basic/index.js'; | ||
import { StreamAdapter as TTSStreamAdapter } from '../tts/index.js'; | ||
import { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js'; | ||
@@ -121,2 +123,8 @@ import { AgentOutput } from './agent_output.js'; | ||
this.#opts = { ...defaultVPAOptions, ...opts }; | ||
if (!stt.capabilities.streaming) { | ||
stt = new STTStreamAdapter(stt, vad); | ||
} | ||
if (!tts.capabilities.streaming) { | ||
tts = new TTSStreamAdapter(tts, new BasicSentenceTokenizer()); | ||
} | ||
this.#vad = vad; | ||
@@ -394,2 +402,3 @@ this.#stt = stt; | ||
this.#transcribedText = this.#transcribedText.slice(userQuestion.length); | ||
handle.markUserCommitted(); | ||
}; | ||
@@ -417,3 +426,3 @@ // wait for the playHandle to finish and check every 1s if user question should be committed | ||
if (isUsingTools && !interrupted) { | ||
if (!userQuestion || handle.userCommitted) { | ||
if (!userQuestion || !handle.userCommitted) { | ||
throw new Error('user speech should have been committed before using tools'); | ||
@@ -590,3 +599,3 @@ } | ||
log() | ||
.child({ speechId, elapsed: Math.round(Date.now() * 1000 - startTime) / 1000 }) | ||
.child({ speechId, elapsed: Math.round(Date.now() - startTime) }) | ||
.debug('received first LLM token'); | ||
@@ -593,0 +602,0 @@ } |
export { type SpeechEvent, type SpeechData, type STTCapabilities, SpeechEventType, STT, SpeechStream, } from './stt.js'; | ||
export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js'; | ||
//# sourceMappingURL=index.d.ts.map |
@@ -5,2 +5,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
export { SpeechEventType, STT, SpeechStream, } from './stt.js'; | ||
export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js'; | ||
//# sourceMappingURL=index.js.map |
import type { AudioFrame } from '@livekit/rtc-node'; | ||
import type { AudioBuffer } from '../utils.js'; | ||
import { AsyncIterableQueue } from '../utils.js'; | ||
@@ -61,2 +62,4 @@ /** Indicates start/middle/end of speech */ | ||
get capabilities(): STTCapabilities; | ||
/** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */ | ||
abstract recognize(frame: AudioBuffer): Promise<SpeechEvent>; | ||
/** | ||
@@ -63,0 +66,0 @@ * Returns a {@link SpeechStream} that can be used to push audio frames and receive |
@@ -25,2 +25,3 @@ import { AsyncIterableQueue } from '../utils.js'; | ||
pushText(text: string): void; | ||
flush(): void; | ||
close(): void; | ||
@@ -27,0 +28,0 @@ next(): Promise<IteratorResult<TokenData>>; |
@@ -33,3 +33,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
const tokens = this.#func(this.#inBuf); | ||
if (tokens.length === 0) | ||
if (tokens.length <= 1) | ||
break; | ||
@@ -112,2 +112,5 @@ if (this.#outBuf) | ||
} | ||
flush() { | ||
this.#stream.flush(); | ||
} | ||
close() { | ||
@@ -114,0 +117,0 @@ super.close(); |
@@ -1,2 +0,3 @@ | ||
export { type SynthesizedAudio, type TTSCapabilities, TTS, SynthesizeStream } from './tts.js'; | ||
export { type SynthesizedAudio, type TTSCapabilities, TTS, SynthesizeStream, ChunkedStream, } from './tts.js'; | ||
export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js'; | ||
//# sourceMappingURL=index.d.ts.map |
// SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
export { TTS, SynthesizeStream } from './tts.js'; | ||
export { TTS, SynthesizeStream, ChunkedStream, } from './tts.js'; | ||
export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js'; | ||
//# sourceMappingURL=index.js.map |
@@ -41,2 +41,6 @@ import type { AudioFrame } from '@livekit/rtc-node'; | ||
/** | ||
* Receives text and returns synthesis in the form of a {@link ChunkedStream} | ||
*/ | ||
abstract synthesize(text: string): ChunkedStream; | ||
/** | ||
* Returns a {@link SynthesizeStream} that can be used to push text and receive audio data | ||
@@ -77,2 +81,26 @@ */ | ||
} | ||
/** | ||
* An instance of a text-to-speech response, as an asynchronous iterable iterator. | ||
* | ||
* @example Looping through frames | ||
* ```ts | ||
* for await (const event of stream) { | ||
* await source.captureFrame(event.frame); | ||
* } | ||
* ``` | ||
* | ||
* @remarks | ||
* This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that | ||
* exports its own child ChunkedStream class, which inherits this class's methods. | ||
*/ | ||
export declare abstract class ChunkedStream implements AsyncIterableIterator<SynthesizedAudio> { | ||
protected queue: AsyncIterableQueue<SynthesizedAudio>; | ||
protected closed: boolean; | ||
/** Collect every frame into one in a single call */ | ||
collect(): Promise<AudioFrame>; | ||
next(): Promise<IteratorResult<SynthesizedAudio>>; | ||
/** Close both the input and output of the TTS stream */ | ||
close(): void; | ||
[Symbol.asyncIterator](): ChunkedStream; | ||
} | ||
//# sourceMappingURL=tts.d.ts.map |
@@ -1,2 +0,2 @@ | ||
import { AsyncIterableQueue } from '../utils.js'; | ||
import { AsyncIterableQueue, mergeFrames } from '../utils.js'; | ||
/** | ||
@@ -94,2 +94,39 @@ * An instance of a text-to-speech adapter. | ||
} | ||
/** | ||
* An instance of a text-to-speech response, as an asynchronous iterable iterator. | ||
* | ||
* @example Looping through frames | ||
* ```ts | ||
* for await (const event of stream) { | ||
* await source.captureFrame(event.frame); | ||
* } | ||
* ``` | ||
* | ||
* @remarks | ||
* This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that | ||
* exports its own child ChunkedStream class, which inherits this class's methods. | ||
*/ | ||
export class ChunkedStream { | ||
queue = new AsyncIterableQueue(); | ||
closed = false; | ||
/** Collect every frame into one in a single call */ | ||
async collect() { | ||
const frames = []; | ||
for await (const event of this) { | ||
frames.push(event.frame); | ||
} | ||
return mergeFrames(frames); | ||
} | ||
next() { | ||
return this.queue.next(); | ||
} | ||
/** Close both the input and output of the TTS stream */ | ||
close() { | ||
this.queue.close(); | ||
this.closed = true; | ||
} | ||
[Symbol.asyncIterator]() { | ||
return this; | ||
} | ||
} | ||
//# sourceMappingURL=tts.js.map |
{ | ||
"name": "@livekit/agents", | ||
"version": "0.4.3", | ||
"version": "0.4.4", | ||
"description": "LiveKit Agents - Node.js", | ||
@@ -5,0 +5,0 @@ "main": "dist/index.js", |
@@ -137,3 +137,5 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
for await (const audio of ttsStream) { | ||
if (cancelled || audio === SynthesizeStream.END_OF_STREAM) break; | ||
if (cancelled || audio === SynthesizeStream.END_OF_STREAM) { | ||
break; | ||
} | ||
handle.queue.put(audio.frame); | ||
@@ -160,14 +162,8 @@ } | ||
const readGeneratedAudio = async () => { | ||
let started = false; | ||
for await (const audio of ttsStream) { | ||
if (cancelled) break; | ||
if (audio === SynthesizeStream.END_OF_STREAM) { | ||
if (started) { | ||
break; | ||
} else { | ||
continue; | ||
} | ||
break; | ||
} | ||
handle.queue.put(audio.frame); | ||
started = true; | ||
} | ||
@@ -174,0 +170,0 @@ handle.queue.put(SynthesisHandle.FLUSH_SENTINEL); |
@@ -151,3 +151,5 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
for await (const frame of handle.playoutSource) { | ||
if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) break; | ||
if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) { | ||
break; | ||
} | ||
if (firstFrame) { | ||
@@ -154,0 +156,0 @@ this.#logger |
@@ -9,3 +9,2 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
type BeforeLLMCallback, | ||
type VPAEvent, | ||
type VPACallbacks, | ||
@@ -15,3 +14,4 @@ type AgentCallContext, | ||
type VPAOptions, | ||
VPAEvent, | ||
VoicePipelineAgent, | ||
} from './pipeline_agent.js'; |
@@ -23,3 +23,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
import { log } from '../log.js'; | ||
import type { STT } from '../stt/index.js'; | ||
import { type STT, StreamAdapter as STTStreamAdapter } from '../stt/index.js'; | ||
import { | ||
@@ -32,2 +32,3 @@ SentenceTokenizer as BasicSentenceTokenizer, | ||
import type { TTS } from '../tts/index.js'; | ||
import { StreamAdapter as TTSStreamAdapter } from '../tts/index.js'; | ||
import { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js'; | ||
@@ -257,2 +258,10 @@ import type { VAD, VADEvent } from '../vad.js'; | ||
if (!stt.capabilities.streaming) { | ||
stt = new STTStreamAdapter(stt, vad); | ||
} | ||
if (!tts.capabilities.streaming) { | ||
tts = new TTSStreamAdapter(tts, new BasicSentenceTokenizer()); | ||
} | ||
this.#vad = vad; | ||
@@ -599,2 +608,3 @@ this.#stt = stt; | ||
this.#transcribedText = this.#transcribedText.slice(userQuestion.length); | ||
handle.markUserCommitted(); | ||
}; | ||
@@ -625,3 +635,3 @@ | ||
if (isUsingTools && !interrupted) { | ||
if (!userQuestion || handle.userCommitted) { | ||
if (!userQuestion || !handle.userCommitted) { | ||
throw new Error('user speech should have been committed before using tools'); | ||
@@ -831,3 +841,3 @@ } | ||
log() | ||
.child({ speechId, elapsed: Math.round(Date.now() * 1000 - startTime) / 1000 }) | ||
.child({ speechId, elapsed: Math.round(Date.now() - startTime) }) | ||
.debug('received first LLM token'); | ||
@@ -834,0 +844,0 @@ } |
@@ -13,1 +13,2 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
} from './stt.js'; | ||
export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js'; |
@@ -5,2 +5,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
import type { AudioFrame } from '@livekit/rtc-node'; | ||
import type { AudioBuffer } from '../utils.js'; | ||
import { AsyncIterableQueue } from '../utils.js'; | ||
@@ -77,2 +78,5 @@ | ||
/** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */ | ||
abstract recognize(frame: AudioBuffer): Promise<SpeechEvent>; | ||
/** | ||
@@ -79,0 +83,0 @@ * Returns a {@link SpeechStream} that can be used to push audio frames and receive |
@@ -42,3 +42,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
const tokens = this.#func(this.#inBuf); | ||
if (tokens.length === 0) break; | ||
if (tokens.length <= 1) break; | ||
@@ -134,2 +134,6 @@ if (this.#outBuf) this.#outBuf += ' '; | ||
flush() { | ||
this.#stream.flush(); | ||
} | ||
close() { | ||
@@ -136,0 +140,0 @@ super.close(); |
// SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
export { type SynthesizedAudio, type TTSCapabilities, TTS, SynthesizeStream } from './tts.js'; | ||
export { | ||
type SynthesizedAudio, | ||
type TTSCapabilities, | ||
TTS, | ||
SynthesizeStream, | ||
ChunkedStream, | ||
} from './tts.js'; | ||
export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js'; |
@@ -5,3 +5,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
import type { AudioFrame } from '@livekit/rtc-node'; | ||
import { AsyncIterableQueue } from '../utils.js'; | ||
import { AsyncIterableQueue, mergeFrames } from '../utils.js'; | ||
@@ -65,2 +65,7 @@ /** SynthesizedAudio is a packet of speech synthesis as returned by the TTS. */ | ||
/** | ||
* Receives text and returns synthesis in the form of a {@link ChunkedStream} | ||
*/ | ||
abstract synthesize(text: string): ChunkedStream; | ||
/** | ||
* Returns a {@link SynthesizeStream} that can be used to push text and receive audio data | ||
@@ -144,1 +149,43 @@ */ | ||
} | ||
/** | ||
* An instance of a text-to-speech response, as an asynchronous iterable iterator. | ||
* | ||
* @example Looping through frames | ||
* ```ts | ||
* for await (const event of stream) { | ||
* await source.captureFrame(event.frame); | ||
* } | ||
* ``` | ||
* | ||
* @remarks | ||
* This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that | ||
* exports its own child ChunkedStream class, which inherits this class's methods. | ||
*/ | ||
export abstract class ChunkedStream implements AsyncIterableIterator<SynthesizedAudio> { | ||
protected queue = new AsyncIterableQueue<SynthesizedAudio>(); | ||
protected closed = false; | ||
/** Collect every frame into one in a single call */ | ||
async collect(): Promise<AudioFrame> { | ||
const frames = []; | ||
for await (const event of this) { | ||
frames.push(event.frame); | ||
} | ||
return mergeFrames(frames); | ||
} | ||
next(): Promise<IteratorResult<SynthesizedAudio>> { | ||
return this.queue.next(); | ||
} | ||
/** Close both the input and output of the TTS stream */ | ||
close() { | ||
this.queue.close(); | ||
this.closed = true; | ||
} | ||
[Symbol.asyncIterator](): ChunkedStream { | ||
return this; | ||
} | ||
} |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
800586
242
12771
4