Socket
Book a DemoInstallSign in
Socket

stepfun-realtime-api

Package Overview
Dependencies
Maintainers
1
Versions
5
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

stepfun-realtime-api - npm Package Compare versions

Comparing version

to
0.1.1

2

dist/api.d.ts

@@ -14,3 +14,3 @@ import { RealtimeEventHandler } from "./event-handler";

connect({ model }?: {
model: string;
model: "step-1o-audio" | "step-audio-2" | "step-audio-2-mini";
}): Promise<void>;

@@ -17,0 +17,0 @@ disconnect(): void;

@@ -54,3 +54,3 @@ "use strict";

}
async connect({ model } = { model: "step-1o-audio" }) {
async connect({ model } = { "model": "step-1o-audio" }) {
if (this.isConnected()) {

@@ -57,0 +57,0 @@ console.warn("Already connected to WebSocket.");

@@ -30,3 +30,3 @@ import { ClientRealtimeEvent, ServerConversationItemCreated, ServerConversationItemDeleted, ServerConversationItemInputAudioTranscriptionCompleted, ServerConversationItemInputAudioTranscriptionDelta, ServerConversationItemTruncated, ServerInputAudioBufferCleared, ServerInputAudioBufferCommitted, ServerInputAudioBufferSpeechStarted, ServerInputAudioBufferSpeechStopped, ServerRealtimeEvent, ServerResponseAudioDelta, ServerResponseAudioDone, ServerResponseAudioTranscriptDelta, ServerResponseAudioTranscriptDone, ServerResponseContentPartAdded, ServerResponseContentPartDone, ServerResponseCreated, ServerResponseDone, ServerResponseError, ServerResponseFunctionCallArgumentsDelta, ServerResponseFunctionCallArgumentsDone, ServerResponseOutputItemAdded, ServerResponseOutputItemDone, ServerSessionCreated, ServerSessionUpdated, Session } from "./event";

getSessionID(): string;
connect(): Promise<void>;
connect(model?: "step-1o-audio" | "step-audio-2" | "step-audio-2-mini"): Promise<void>;
private registerAPIHandler;

@@ -33,0 +33,0 @@ /**

@@ -79,3 +79,6 @@ "use strict";

deleteItem(id) {
this.api.send({ type: event_1.ClientEventType.ConversationItemDelete, item_id: id });
this.api.send({
type: event_1.ClientEventType.ConversationItemDelete,
item_id: id,
});
}

@@ -98,4 +101,4 @@ async waitForSessionCreated() {

}
async connect() {
await this.api.connect();
async connect(model = "step-1o-audio") {
await this.api.connect({ model });
this.updateSession(this.session);

@@ -218,3 +221,5 @@ }

const item = this.conversation.itemMap.get(itemId);
if (item && item.formatted.audio && item.formatted.audio.length > 0) {
if (item &&
item.formatted.audio &&
item.formatted.audio.length > 0) {
// 计算该音频项目的播放时长(PCM16 格式,24kHz 采样率)

@@ -221,0 +226,0 @@ const audioBytes = item.formatted.audio.length;

@@ -37,3 +37,3 @@ # Stepfun Realtime API 阶跃星辰实时语音

// 连接到服务器
await client.connect();
await client.connect("step-1o-audio"); // 可选 step-audio-2-mini / step-audio-2
console.log("✅ 已连接到 Stepfun Realtime API");

@@ -40,0 +40,0 @@

@@ -5,3 +5,3 @@ {

"name": "stepfun-realtime-api",
"version": "0.1.0",
"version": "0.1.1",
"scripts": {

@@ -8,0 +8,0 @@ "build": "tsc && cp README.md dist/",

@@ -37,3 +37,3 @@ # Stepfun Realtime API 阶跃星辰实时语音

// 连接到服务器
await client.connect();
await client.connect("step-1o-audio"); // 可选 step-audio-2-mini / step-audio-2
console.log("✅ 已连接到 Stepfun Realtime API");

@@ -40,0 +40,0 @@

@@ -24,3 +24,3 @@ import { RealtimeEventHandler } from "./event-handler";

async connect({ model }: { model: string } = { model: "step-1o-audio" }): Promise<void> {
async connect({ model }: { model: "step-1o-audio" | "step-audio-2" | "step-audio-2-mini" } = { "model": "step-1o-audio" }): Promise<void> {
if (this.isConnected()) {

@@ -27,0 +27,0 @@ console.warn("Already connected to WebSocket.");

@@ -73,6 +73,9 @@ import { RealtimeAPI } from "./api";

};
this.api.on("server." + ServerEventType.SessionCreated, (ev: ServerRealtimeEvent<{ session: Session }>) => {
this.session.id = ev.session.id;
this.sessionCreated = true;
});
this.api.on(
"server." + ServerEventType.SessionCreated,
(ev: ServerRealtimeEvent<{ session: Session }>) => {
this.session.id = ev.session.id;
this.sessionCreated = true;
},
);
this.registerAPIHandler();

@@ -115,7 +118,15 @@ }

sendUserMessage(contents: ({ type: "text"; text: string } | { type: "input_audio"; audio: string })[]) {
sendUserMessage(
contents: (
| { type: "text"; text: string }
| { type: "input_audio"; audio: string }
)[],
) {
if (contents.length > 0) {
const event: ClientRealtimeEvent<{
item: {
content: ({ type: "text"; text: string } | { type: "input_audio"; audio: string })[];
content: (
| { type: "text"; text: string }
| { type: "input_audio"; audio: string }
)[];
type: "message";

@@ -134,3 +145,6 @@ role: "user";

deleteItem(id: string) {
this.api.send({ type: ClientEventType.ConversationItemDelete, item_id: id });
this.api.send({
type: ClientEventType.ConversationItemDelete,
item_id: id,
});
}

@@ -142,5 +156,4 @@

}
const event: ServerRealtimeEvent<{ session: Session }> = await this.api.waitForNext(
"server." + ServerEventType.SessionCreated
);
const event: ServerRealtimeEvent<{ session: Session }> =
await this.api.waitForNext("server." + ServerEventType.SessionCreated);
return event.session;

@@ -160,4 +173,4 @@ }

async connect() {
await this.api.connect();
async connect(model: "step-1o-audio" | "step-audio-2" | "step-audio-2-mini" = "step-1o-audio") {
await this.api.connect({ model });
this.updateSession(this.session);

@@ -186,18 +199,24 @@ }

// todo
this.api.on("server." + ServerEventType.InputAudioBufferSpeechStopped, (event: ServerEventType) =>
this.conversation.processEvent(event, this.audioBuffer)
this.api.on(
"server." + ServerEventType.InputAudioBufferSpeechStopped,
(event: ServerEventType) =>
this.conversation.processEvent(event, this.audioBuffer),
);
this.api.on("server." + ServerEventType.InputAudioBufferSpeechStarted, (event: ServerEventType) =>
this.api.dispatch("interrupted", {})
this.api.on(
"server." + ServerEventType.InputAudioBufferSpeechStarted,
(event: ServerEventType) => this.api.dispatch("interrupted", {}),
);
this.api.on("server." + ServerEventType.ConversationItemCreated, (event: ServerEventType) => {
const ret = this.conversation.processEvent(event);
if (ret && ret.item) {
this.api.dispatch("conversation.item.appended", event);
if (ret.item.status === "completed") {
this.api.dispatch("conversation.item.completed", event);
this.api.on(
"server." + ServerEventType.ConversationItemCreated,
(event: ServerEventType) => {
const ret = this.conversation.processEvent(event);
if (ret && ret.item) {
this.api.dispatch("conversation.item.appended", event);
if (ret.item.status === "completed") {
this.api.dispatch("conversation.item.completed", event);
}
}
}
});
},
);

@@ -210,53 +229,71 @@ handleAndDispatch(ServerEventType.ConversationItemDeleted);

handleAndDispatch(ServerEventType.ResponseAudioTranscriptDone);
handleAndDispatch(ServerEventType.ConversationItemInputAudioTranscriptionDelta);
handleAndDispatch(ServerEventType.ConversationItemInputAudioTranscriptionCompleted);
handleAndDispatch(
ServerEventType.ConversationItemInputAudioTranscriptionDelta,
);
handleAndDispatch(
ServerEventType.ConversationItemInputAudioTranscriptionCompleted,
);
handleAndDispatch(ServerEventType.ResponseFunctionCallArgumentsDelta);
handleAndDispatch(ServerEventType.ResponseFunctionCallArgumentsDone);
this.api.on("server." + ServerEventType.ResponseOutputItemDone, (event: ServerEventType) => {
const ret = this.conversation.processEvent(event);
if (ret && ret.item) {
if (ret.item.status === "completed") {
this.api.dispatch(LocalEventType.ConversationItemCompleted, event);
this.api.on(
"server." + ServerEventType.ResponseOutputItemDone,
(event: ServerEventType) => {
const ret = this.conversation.processEvent(event);
if (ret && ret.item) {
if (ret.item.status === "completed") {
this.api.dispatch(LocalEventType.ConversationItemCompleted, event);
}
}
}
});
},
);
// 处理音频播放完成事件
this.api.on("server." + ServerEventType.ResponseCreated, (event: ServerResponseCreated) => {
// 标记所有正在播放的响应为中断状态
for (const [responseId, state] of this.audioPlaybackState.entries()) {
if (!state.isResponseDone) {
state.isInterrupted = true;
this.api.on(
"server." + ServerEventType.ResponseCreated,
(event: ServerResponseCreated) => {
// 标记所有正在播放的响应为中断状态
for (const [responseId, state] of this.audioPlaybackState.entries()) {
if (!state.isResponseDone) {
state.isInterrupted = true;
}
}
}
// 初始化新响应的音频播放状态
this.audioPlaybackState.set(event.response.id, {
responseId: event.response.id,
isResponseDone: false,
isInterrupted: false,
});
});
// 初始化新响应的音频播放状态
this.audioPlaybackState.set(event.response.id, {
responseId: event.response.id,
isResponseDone: false,
isInterrupted: false,
});
},
);
this.api.on("server." + ServerEventType.ResponseDone, (event: ServerResponseDone) => {
// 标记响应完成
const playbackState = this.audioPlaybackState.get(event.response.id);
if (playbackState) {
playbackState.isResponseDone = true;
this.checkAndEmitAudioPlaybackCompleted(event.response.id);
}
});
this.api.on(
"server." + ServerEventType.ResponseDone,
(event: ServerResponseDone) => {
// 标记响应完成
const playbackState = this.audioPlaybackState.get(event.response.id);
if (playbackState) {
playbackState.isResponseDone = true;
this.checkAndEmitAudioPlaybackCompleted(event.response.id);
}
},
);
this.api.on("server." + ServerEventType.ResponseAudioDelta, (event: ServerResponseAudioDelta) => {
const playbackState = this.audioPlaybackState.get(event.response_id);
if (playbackState) {
// 记录第一个音频 delta 的时间
if (!playbackState.firstDeltaTime) {
playbackState.firstDeltaTime = Date.now();
this.api.on(
"server." + ServerEventType.ResponseAudioDelta,
(event: ServerResponseAudioDelta) => {
const playbackState = this.audioPlaybackState.get(event.response_id);
if (playbackState) {
// 记录第一个音频 delta 的时间
if (!playbackState.firstDeltaTime) {
playbackState.firstDeltaTime = Date.now();
}
}
}
});
},
);
this.api.on("server." + ServerEventType.ResponseDone, (event: ServerResponseDone) => {
this.api.on(
"server." + ServerEventType.ResponseDone,
(event: ServerResponseDone) => {
// 检查是否有音频播放状态

@@ -268,6 +305,10 @@ const playbackState = this.audioPlaybackState.get(event.response.id);

// 计算播放时长(从第一个音频delta到现在的时间 + 缓冲时间)
const playbackDuration = Date.now() - playbackState.firstDeltaTime + 50; // 添加50ms的缓冲时间
const playbackDuration =
Date.now() - playbackState.firstDeltaTime + 50; // 添加50ms的缓冲时间
playbackState.playbackTimeoutId = setTimeout(() => {
this.checkAndEmitAudioPlaybackCompleted(event.response.id, playbackDuration);
this.checkAndEmitAudioPlaybackCompleted(
event.response.id,
playbackDuration,
);
}, playbackDuration);

@@ -279,3 +320,4 @@ } else {

}
});
},
);
}

@@ -287,3 +329,6 @@

*/
private checkAndEmitAudioPlaybackCompleted(responseId: string, playbackDurationMs?: number) {
private checkAndEmitAudioPlaybackCompleted(
responseId: string,
playbackDurationMs?: number,
) {
const playbackState = this.audioPlaybackState.get(responseId);

@@ -300,3 +345,7 @@ if (playbackState && playbackState.isResponseDone) {

const item = this.conversation.itemMap.get(itemId);
if (item && item.formatted.audio && item.formatted.audio.length > 0) {
if (
item &&
item.formatted.audio &&
item.formatted.audio.length > 0
) {
// 计算该音频项目的播放时长(PCM16 格式,24kHz 采样率)

@@ -363,15 +412,27 @@ const audioBytes = item.formatted.audio.length;

on(event: "server.*", callback: (event: ServerRealtimeEvent<any>) => void): void;
on(event: "client.*", callback: (event: ClientRealtimeEvent<any>) => void): void;
on(
event: "server.*",
callback: (event: ServerRealtimeEvent<any>) => void,
): void;
on(
event: "client.*",
callback: (event: ClientRealtimeEvent<any>) => void,
): void;
on(
event: LocalEventType.ConversationItemAppended,
callback: (event: ServerRealtimeEvent<{ item: ServerItemType<`realtime.item`> }>) => void
callback: (
event: ServerRealtimeEvent<{ item: ServerItemType<`realtime.item`> }>,
) => void,
): void;
on(
event: LocalEventType.ConversationItemCompleted,
callback: (event: ServerRealtimeEvent<{ item: ServerItemType<`realtime.item`> }>) => void
callback: (
event: ServerRealtimeEvent<{ item: ServerItemType<`realtime.item`> }>,
) => void,
): void;
on(
event: LocalEventType.ConversationUpdated,
callback: (event: ServerRealtimeEvent<{ item: ServerItemType<`realtime.item`> }>) => void
callback: (
event: ServerRealtimeEvent<{ item: ServerItemType<`realtime.item`> }>,
) => void,
): void;

@@ -386,55 +447,115 @@ on(

is_interrupted: boolean;
}) => void
}) => void,
): void;
on(event: ServerEventType.SessionCreated, callback: (event: ServerSessionCreated) => void): void;
on(event: ServerEventType.SessionUpdated, callback: (event: ServerSessionUpdated) => void): void;
on(event: ServerEventType.ConversationItemCreated, callback: (event: ServerConversationItemCreated) => void): void;
on(
event: ServerEventType.SessionCreated,
callback: (event: ServerSessionCreated) => void,
): void;
on(
event: ServerEventType.SessionUpdated,
callback: (event: ServerSessionUpdated) => void,
): void;
on(
event: ServerEventType.ConversationItemCreated,
callback: (event: ServerConversationItemCreated) => void,
): void;
on(
event: ServerEventType.ConversationItemInputAudioTranscriptionCompleted,
callback: (event: ServerConversationItemInputAudioTranscriptionCompleted) => void
callback: (
event: ServerConversationItemInputAudioTranscriptionCompleted,
) => void,
): void;
on(
event: ServerEventType.ConversationItemInputAudioTranscriptionDelta,
callback: (event: ServerConversationItemInputAudioTranscriptionDelta) => void
callback: (
event: ServerConversationItemInputAudioTranscriptionDelta,
) => void,
): void;
on(
event: ServerEventType.InputAudioBufferSpeechStarted,
callback: (event: ServerInputAudioBufferSpeechStarted) => void
callback: (event: ServerInputAudioBufferSpeechStarted) => void,
): void;
on(
event: ServerEventType.InputAudioBufferSpeechStopped,
callback: (event: ServerInputAudioBufferSpeechStopped, inputBuffer: Buffer) => void
callback: (
event: ServerInputAudioBufferSpeechStopped,
inputBuffer: Buffer,
) => void,
): void;
on(
event: ServerEventType.InputAudioBufferCommitted,
callback: (event: ServerInputAudioBufferCommitted) => void
callback: (event: ServerInputAudioBufferCommitted) => void,
): void;
on(event: ServerEventType.InputAudioBufferCleared, callback: (event: ServerInputAudioBufferCleared) => void): void;
on(event: ServerEventType.ConversationItemDeleted, callback: (event: ServerConversationItemDeleted) => void): void;
on(
event: ServerEventType.InputAudioBufferCleared,
callback: (event: ServerInputAudioBufferCleared) => void,
): void;
on(
event: ServerEventType.ConversationItemDeleted,
callback: (event: ServerConversationItemDeleted) => void,
): void;
on(
event: ServerEventType.ConversationItemTruncated,
callback: (event: ServerConversationItemTruncated) => void
callback: (event: ServerConversationItemTruncated) => void,
): void;
on(event: ServerEventType.ResponseAudioDelta, callback: (event: ServerResponseAudioDelta) => void): void;
on(event: ServerEventType.ResponseAudioDone, callback: (event: ServerResponseAudioDone) => void): void;
on(event: ServerEventType.ResponseContentPartAdded, callback: (event: ServerResponseContentPartAdded) => void): void;
on(event: ServerEventType.ResponseContentPartDone, callback: (event: ServerResponseContentPartDone) => void): void;
on(event: ServerEventType.ResponseFunctionCallArgumentsDone, callback: (event: ServerResponseFunctionCallArgumentsDone) => void): void;
on(event: ServerEventType.ResponseFunctionCallArgumentsDelta, callback: (event: ServerResponseFunctionCallArgumentsDelta) => void): void;
on(
event: ServerEventType.ResponseAudioDelta,
callback: (event: ServerResponseAudioDelta) => void,
): void;
on(
event: ServerEventType.ResponseAudioDone,
callback: (event: ServerResponseAudioDone) => void,
): void;
on(
event: ServerEventType.ResponseContentPartAdded,
callback: (event: ServerResponseContentPartAdded) => void,
): void;
on(
event: ServerEventType.ResponseContentPartDone,
callback: (event: ServerResponseContentPartDone) => void,
): void;
on(
event: ServerEventType.ResponseFunctionCallArgumentsDone,
callback: (event: ServerResponseFunctionCallArgumentsDone) => void,
): void;
on(
event: ServerEventType.ResponseFunctionCallArgumentsDelta,
callback: (event: ServerResponseFunctionCallArgumentsDelta) => void,
): void;
on(
event: ServerEventType.ResponseAudioTranscriptDelta,
callback: (event: ServerResponseAudioTranscriptDelta) => void
callback: (event: ServerResponseAudioTranscriptDelta) => void,
): void;
on(
event: ServerEventType.ResponseAudioTranscriptDone,
callback: (event: ServerResponseAudioTranscriptDone) => void
callback: (event: ServerResponseAudioTranscriptDone) => void,
): void;
on(event: ServerEventType.ResponseOutputItemAdded, callback: (event: ServerResponseOutputItemAdded) => void): void;
on(event: ServerEventType.ResponseOutputItemDone, callback: (event: ServerResponseOutputItemDone) => void): void;
on(event: ServerEventType.ResponseCreated, callback: (event: ServerResponseCreated) => void): void;
on(event: ServerEventType.ResponseDone, callback: (event: ServerResponseDone) => void): void;
on(event: ServerEventType.Error, callback: (event: ServerResponseError) => void): void;
on(
event: ServerEventType | LocalEventType | ClientEventType | "server.*" | "client.*",
callback: (event: any, extra?: any) => void
event: ServerEventType.ResponseOutputItemAdded,
callback: (event: ServerResponseOutputItemAdded) => void,
): void;
on(
event: ServerEventType.ResponseOutputItemDone,
callback: (event: ServerResponseOutputItemDone) => void,
): void;
on(
event: ServerEventType.ResponseCreated,
callback: (event: ServerResponseCreated) => void,
): void;
on(
event: ServerEventType.ResponseDone,
callback: (event: ServerResponseDone) => void,
): void;
on(
event: ServerEventType.Error,
callback: (event: ServerResponseError) => void,
): void;
on(
event:
| ServerEventType
| LocalEventType
| ClientEventType
| "server.*"
| "client.*",
callback: (event: any, extra?: any) => void,
): void {

@@ -452,3 +573,11 @@ if (Object.values(ServerEventType).includes(event as any)) {

off(event: LocalEventType | ServerEventType | ClientEventType | "server.*" | "client.*", callback: Function): void {
off(
event:
| LocalEventType
| ServerEventType
| ClientEventType
| "server.*"
| "client.*",
callback: Function,
): void {
if (Object.values(ServerEventType).includes(event as any)) {

@@ -455,0 +584,0 @@ this.api.off("server." + event, callback);