@livekit/agents-plugin-openai
Advanced tools
Comparing version
@@ -193,4 +193,5 @@ import { llm } from '@livekit/agents'; | ||
#private; | ||
constructor(client: OpenAI, chatCtx: llm.ChatContext, fncCtx: llm.FunctionContext | undefined, opts: LLMOptions, parallelToolCalls?: boolean, temperature?: number, n?: number); | ||
label: string; | ||
constructor(llm: LLM, client: OpenAI, chatCtx: llm.ChatContext, fncCtx: llm.FunctionContext | undefined, opts: LLMOptions, parallelToolCalls?: boolean, temperature?: number, n?: number); | ||
} | ||
//# sourceMappingURL=llm.d.ts.map |
@@ -247,2 +247,3 @@ import { llm, log } from "@livekit/agents"; | ||
return new LLMStream( | ||
this, | ||
this.#client, | ||
@@ -265,4 +266,5 @@ chatCtx, | ||
#id = randomUUID(); | ||
constructor(client, chatCtx, fncCtx, opts, parallelToolCalls, temperature, n) { | ||
super(chatCtx, fncCtx); | ||
label = "openai.LLMStream"; | ||
constructor(llm2, client, chatCtx, fncCtx, opts, parallelToolCalls, temperature, n) { | ||
super(llm2, chatCtx, fncCtx); | ||
this.#client = client; | ||
@@ -269,0 +271,0 @@ this.#run(opts, n, parallelToolCalls, temperature); |
@@ -28,2 +28,4 @@ import { AsyncIterableQueue, Future, llm, multimodal } from '@livekit/agents'; | ||
doneFut: Future; | ||
createdTimestamp: number; | ||
firstTokenTimestamp?: number; | ||
} | ||
@@ -30,0 +32,0 @@ export interface RealtimeOutput { |
@@ -8,2 +8,3 @@ import { | ||
mergeFrames, | ||
metrics, | ||
multimodal | ||
@@ -689,3 +690,4 @@ } from "@livekit/agents"; | ||
output: [], | ||
doneFut | ||
doneFut, | ||
createdTimestamp: Date.now() | ||
}; | ||
@@ -704,3 +706,61 @@ this.#pendingResponses[newResponse.id] = newResponse; | ||
response.doneFut.resolve(); | ||
let metricsError; | ||
let cancelled = false; | ||
switch (response.status) { | ||
case "failed": { | ||
if (response.statusDetails.type !== "failed") break; | ||
const err = response.statusDetails.error; | ||
metricsError = new metrics.MultimodalLLMError({ | ||
type: response.statusDetails.type, | ||
code: err == null ? void 0 : err.code, | ||
message: err == null ? void 0 : err.message | ||
}); | ||
this.#logger.child({ code: err == null ? void 0 : err.code, error: err == null ? void 0 : err.message }).error("response generation failed"); | ||
break; | ||
} | ||
case "incomplete": { | ||
if (response.statusDetails.type !== "incomplete") break; | ||
const reason = response.statusDetails.reason; | ||
metricsError = new metrics.MultimodalLLMError({ | ||
type: response.statusDetails.type, | ||
reason | ||
}); | ||
this.#logger.child({ reason }).error("response generation incomplete"); | ||
break; | ||
} | ||
case "cancelled": { | ||
cancelled = true; | ||
break; | ||
} | ||
} | ||
this.emit("response_done", response); | ||
let ttft; | ||
if (response.firstTokenTimestamp) { | ||
ttft = response.firstTokenTimestamp - response.createdTimestamp; | ||
} | ||
const duration = Date.now() - response.createdTimestamp; | ||
const usage = response.usage; | ||
const metric = { | ||
timestamp: response.createdTimestamp, | ||
requestId: response.id, | ||
ttft, | ||
duration, | ||
cancelled, | ||
label: this.constructor.name, | ||
completionTokens: (usage == null ? void 0 : usage.output_tokens) || 0, | ||
promptTokens: (usage == null ? void 0 : usage.input_tokens) || 0, | ||
totalTokens: (usage == null ? void 0 : usage.total_tokens) || 0, | ||
tokensPerSecond: ((usage == null ? void 0 : usage.output_tokens) || 0) / duration * 1e3, | ||
error: metricsError, | ||
inputTokenDetails: { | ||
cachedTokens: (usage == null ? void 0 : usage.input_token_details.cached_tokens) || 0, | ||
textTokens: (usage == null ? void 0 : usage.input_token_details.text_tokens) || 0, | ||
audioTokens: (usage == null ? void 0 : usage.input_token_details.audio_tokens) || 0 | ||
}, | ||
outputTokenDetails: { | ||
textTokens: (usage == null ? void 0 : usage.output_token_details.text_tokens) || 0, | ||
audioTokens: (usage == null ? void 0 : usage.output_token_details.audio_tokens) || 0 | ||
} | ||
}; | ||
this.emit("metrics_collected", metric); | ||
} | ||
@@ -804,2 +864,3 @@ #handleResponseOutputItemAdded(event) { | ||
output == null ? void 0 : output.content.push(newContent); | ||
response.firstTokenTimestamp = Date.now(); | ||
this.emit("response_content_added", newContent); | ||
@@ -806,0 +867,0 @@ } |
@@ -14,2 +14,3 @@ import { type AudioBuffer, stt } from '@livekit/agents'; | ||
#private; | ||
label: string; | ||
/** | ||
@@ -38,3 +39,3 @@ * Create a new instance of OpenAI STT. | ||
}>): STT; | ||
recognize(buffer: AudioBuffer, language?: string): Promise<stt.SpeechEvent>; | ||
_recognize(buffer: AudioBuffer, language?: string): Promise<stt.SpeechEvent>; | ||
/** This method throws an error; streaming is unsupported on OpenAI STT. */ | ||
@@ -41,0 +42,0 @@ stream(): stt.SpeechStream; |
@@ -12,2 +12,3 @@ import { mergeFrames, stt } from "@livekit/agents"; | ||
#client; | ||
label = "openai.STT"; | ||
/** | ||
@@ -76,3 +77,3 @@ * Create a new instance of OpenAI STT. | ||
} | ||
async recognize(buffer, language) { | ||
async _recognize(buffer, language) { | ||
const config = this.#sanitizeOptions(language); | ||
@@ -79,0 +80,0 @@ buffer = mergeFrames(buffer); |
@@ -14,2 +14,3 @@ import { tts } from '@livekit/agents'; | ||
#private; | ||
label: string; | ||
/** | ||
@@ -33,4 +34,5 @@ * Create a new instance of OpenAI TTS. | ||
#private; | ||
constructor(stream: Promise<any>); | ||
label: string; | ||
constructor(tts: TTS, text: string, stream: Promise<any>); | ||
} | ||
//# sourceMappingURL=tts.d.ts.map |
@@ -15,2 +15,3 @@ import { AudioByteStream, tts } from "@livekit/agents"; | ||
#client; | ||
label = "openai.TTS"; | ||
/** | ||
@@ -39,2 +40,4 @@ * Create a new instance of OpenAI TTS. | ||
return new ChunkedStream( | ||
this, | ||
text, | ||
this.#client.audio.speech.create({ | ||
@@ -54,5 +57,6 @@ input: text, | ||
class ChunkedStream extends tts.ChunkedStream { | ||
label = "openai.ChunkedStream"; | ||
// set Promise<T> to any because OpenAI returns an annoying Response type | ||
constructor(stream) { | ||
super(); | ||
constructor(tts2, text, stream) { | ||
super(text, tts2); | ||
this.#run(stream); | ||
@@ -65,9 +69,14 @@ } | ||
const frames = audioByteStream.write(buffer); | ||
let lastFrame; | ||
const sendLastFrame = (segmentId, final) => { | ||
if (lastFrame) { | ||
this.queue.put({ requestId, segmentId, frame: lastFrame, final }); | ||
lastFrame = void 0; | ||
} | ||
}; | ||
for (const frame of frames) { | ||
this.queue.put({ | ||
frame, | ||
requestId, | ||
segmentId: requestId | ||
}); | ||
sendLastFrame(requestId, false); | ||
lastFrame = frame; | ||
} | ||
sendLastFrame(requestId, true); | ||
this.queue.close(); | ||
@@ -74,0 +83,0 @@ } |
{ | ||
"name": "@livekit/agents-plugin-openai", | ||
"version": "0.7.2", | ||
"version": "0.7.3", | ||
"description": "OpenAI plugin for LiveKit Node Agents", | ||
@@ -41,3 +41,3 @@ "main": "dist/index.js", | ||
"@livekit/rtc-node": "^0.12.1", | ||
"@livekit/agents": "^0.5.2x" | ||
"@livekit/agents": "^0.6.0x" | ||
}, | ||
@@ -44,0 +44,0 @@ "scripts": { |
@@ -401,2 +401,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
return new LLMStream( | ||
this, | ||
this.#client, | ||
@@ -420,4 +421,6 @@ chatCtx, | ||
#id = randomUUID(); | ||
label = 'openai.LLMStream'; | ||
constructor( | ||
llm: LLM, | ||
client: OpenAI, | ||
@@ -431,3 +434,3 @@ chatCtx: llm.ChatContext, | ||
) { | ||
super(chatCtx, fncCtx); | ||
super(llm, chatCtx, fncCtx); | ||
this.#client = client; | ||
@@ -434,0 +437,0 @@ this.#run(opts, n, parallelToolCalls, temperature); |
@@ -11,2 +11,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
mergeFrames, | ||
metrics, | ||
multimodal, | ||
@@ -44,2 +45,4 @@ } from '@livekit/agents'; | ||
doneFut: Future; | ||
createdTimestamp: number; | ||
firstTokenTimestamp?: number; | ||
} | ||
@@ -937,2 +940,3 @@ | ||
doneFut: doneFut, | ||
createdTimestamp: Date.now(), | ||
}; | ||
@@ -952,3 +956,66 @@ this.#pendingResponses[newResponse.id] = newResponse; | ||
response.doneFut.resolve(); | ||
let metricsError: Error | undefined; | ||
let cancelled = false; | ||
switch (response.status) { | ||
case 'failed': { | ||
if (response.statusDetails.type !== 'failed') break; | ||
const err = response.statusDetails.error; | ||
metricsError = new metrics.MultimodalLLMError({ | ||
type: response.statusDetails.type, | ||
code: err?.code, | ||
message: err?.message, | ||
}); | ||
this.#logger | ||
.child({ code: err?.code, error: err?.message }) | ||
.error('response generation failed'); | ||
break; | ||
} | ||
case 'incomplete': { | ||
if (response.statusDetails.type !== 'incomplete') break; | ||
const reason = response.statusDetails.reason; | ||
metricsError = new metrics.MultimodalLLMError({ | ||
type: response.statusDetails.type, | ||
reason, | ||
}); | ||
this.#logger.child({ reason }).error('response generation incomplete'); | ||
break; | ||
} | ||
case 'cancelled': { | ||
cancelled = true; | ||
break; | ||
} | ||
} | ||
this.emit('response_done', response); | ||
let ttft: number | undefined; | ||
if (response.firstTokenTimestamp) { | ||
ttft = response.firstTokenTimestamp - response.createdTimestamp; | ||
} | ||
const duration = Date.now() - response.createdTimestamp; | ||
const usage = response.usage; | ||
const metric: metrics.MultimodalLLMMetrics = { | ||
timestamp: response.createdTimestamp, | ||
requestId: response.id, | ||
ttft: ttft!, | ||
duration, | ||
cancelled, | ||
label: this.constructor.name, | ||
completionTokens: usage?.output_tokens || 0, | ||
promptTokens: usage?.input_tokens || 0, | ||
totalTokens: usage?.total_tokens || 0, | ||
tokensPerSecond: ((usage?.output_tokens || 0) / duration) * 1000, | ||
error: metricsError, | ||
inputTokenDetails: { | ||
cachedTokens: usage?.input_token_details.cached_tokens || 0, | ||
textTokens: usage?.input_token_details.text_tokens || 0, | ||
audioTokens: usage?.input_token_details.audio_tokens || 0, | ||
}, | ||
outputTokenDetails: { | ||
textTokens: usage?.output_token_details.text_tokens || 0, | ||
audioTokens: usage?.output_token_details.audio_tokens || 0, | ||
}, | ||
}; | ||
this.emit('metrics_collected', metric); | ||
} | ||
@@ -1069,2 +1136,3 @@ | ||
output?.content.push(newContent); | ||
response!.firstTokenTimestamp = Date.now(); | ||
this.emit('response_content_added', newContent); | ||
@@ -1071,0 +1139,0 @@ } |
@@ -28,2 +28,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
#client: OpenAI; | ||
label = 'openai.STT'; | ||
@@ -112,3 +113,3 @@ /** | ||
async recognize(buffer: AudioBuffer, language?: string): Promise<stt.SpeechEvent> { | ||
async _recognize(buffer: AudioBuffer, language?: string): Promise<stt.SpeechEvent> { | ||
const config = this.#sanitizeOptions(language); | ||
@@ -115,0 +116,0 @@ buffer = mergeFrames(buffer); |
@@ -5,2 +5,3 @@ // SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
import { AudioByteStream, tts } from '@livekit/agents'; | ||
import type { AudioFrame } from '@livekit/rtc-node'; | ||
import { randomUUID } from 'crypto'; | ||
@@ -32,2 +33,3 @@ import { OpenAI } from 'openai'; | ||
#client: OpenAI; | ||
label = 'openai.TTS'; | ||
@@ -63,2 +65,4 @@ /** | ||
return new ChunkedStream( | ||
this, | ||
text, | ||
this.#client.audio.speech.create({ | ||
@@ -80,5 +84,7 @@ input: text, | ||
export class ChunkedStream extends tts.ChunkedStream { | ||
label = 'openai.ChunkedStream'; | ||
// set Promise<T> to any because OpenAI returns an annoying Response type | ||
constructor(stream: Promise<any>) { | ||
super(); | ||
constructor(tts: TTS, text: string, stream: Promise<any>) { | ||
super(text, tts); | ||
this.#run(stream); | ||
@@ -93,11 +99,18 @@ } | ||
let lastFrame: AudioFrame | undefined; | ||
const sendLastFrame = (segmentId: string, final: boolean) => { | ||
if (lastFrame) { | ||
this.queue.put({ requestId, segmentId, frame: lastFrame, final }); | ||
lastFrame = undefined; | ||
} | ||
}; | ||
for (const frame of frames) { | ||
this.queue.put({ | ||
frame, | ||
requestId, | ||
segmentId: requestId, | ||
}); | ||
sendLastFrame(requestId, false); | ||
lastFrame = frame; | ||
} | ||
sendLastFrame(requestId, true); | ||
this.queue.close(); | ||
} | ||
} |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
496953
3.77%6940
3.43%