@clawmate/clawmate
Advanced tools
| const DEFAULT_BASE_URL = "https://dashscope.aliyuncs.com/api/v1"; | ||
| function toOptionalString(value) { | ||
| return typeof value === "string" && value.trim() ? value.trim() : null; | ||
| } | ||
| function normalizeBaseUrl(baseUrl) { | ||
| return String(baseUrl || DEFAULT_BASE_URL).replace(/\/+$/, ""); | ||
| } | ||
| function buildCloneCustomizationUrl(baseUrl) { | ||
| return `${normalizeBaseUrl(baseUrl)}/services/audio/tts/customization`; | ||
| } | ||
| function normalizeClonePrefix(value) { | ||
| const normalized = String(value || "") | ||
| .trim() | ||
| .toLowerCase() | ||
| .replace(/[^a-z0-9]+/g, "") | ||
| .slice(0, 9); | ||
| return normalized || "clawmate"; | ||
| } | ||
| async function parseJsonBody(response) { | ||
| const requestId = | ||
| response.headers.get("x-dashscope-request-id") ?? | ||
| response.headers.get("x-request-id") ?? | ||
| null; | ||
| let rawText = ""; | ||
| let body = null; | ||
| try { | ||
| rawText = await response.text(); | ||
| body = rawText ? JSON.parse(rawText) : null; | ||
| } catch (error) { | ||
| const err = new Error("TTS provider 响应解析失败"); | ||
| err.code = "TTS_RESPONSE_PARSE_ERROR"; | ||
| err.requestId = requestId; | ||
| err.details = { | ||
| cause: error instanceof Error ? error.message : String(error), | ||
| responseText: rawText, | ||
| }; | ||
| throw err; | ||
| } | ||
| if (!response.ok) { | ||
| const err = new Error(toOptionalString(body?.message) ?? `TTS provider 请求失败: HTTP ${response.status}`); | ||
| err.code = toOptionalString(body?.code) ?? "TTS_PROVIDER_HTTP_ERROR"; | ||
| err.requestId = requestId; | ||
| err.details = body; | ||
| throw err; | ||
| } | ||
| return { requestId, body, rawText }; | ||
| } | ||
| async function createAliyunCloneVoiceModel(options) { | ||
| const fetchImpl = options.fetchImpl ?? fetch; | ||
| const response = await fetchImpl(buildCloneCustomizationUrl(options.baseUrl), { | ||
| method: "POST", | ||
| headers: { | ||
| Authorization: `Bearer ${options.apiKey}`, | ||
| "Content-Type": "application/json", | ||
| }, | ||
| body: JSON.stringify({ | ||
| model: "voice-enrollment", | ||
| input: { | ||
| action: "create_voice", | ||
| target_model: options.targetModel, | ||
| prefix: normalizeClonePrefix(options.speaker), | ||
| url: options.promptAudioUrl, | ||
| }, | ||
| }), | ||
| }); | ||
| const { requestId, body } = await parseJsonBody(response); | ||
| return { | ||
| requestId, | ||
| modelId: | ||
| toOptionalString(body?.output?.voice_id) ?? | ||
| toOptionalString(body?.output?.model_id) ?? | ||
| toOptionalString(body?.data?.model_id) ?? | ||
| toOptionalString(body?.data?.id), | ||
| taskId: toOptionalString(body?.output?.task_id) ?? toOptionalString(body?.data?.task_id), | ||
| status: toOptionalString(body?.output?.status) ?? toOptionalString(body?.data?.status), | ||
| raw: body, | ||
| }; | ||
| } | ||
| async function pollAliyunCloneVoiceModel(options) { | ||
| const fetchImpl = options.fetchImpl ?? fetch; | ||
| const maxAttempts = options.maxAttempts ?? 60; | ||
| const pollIntervalMs = options.pollIntervalMs ?? 3000; | ||
| for (let attempt = 0; attempt < maxAttempts; attempt += 1) { | ||
| const response = await fetchImpl(buildCloneCustomizationUrl(options.statusUrl), { | ||
| method: "POST", | ||
| headers: { | ||
| Authorization: `Bearer ${options.apiKey}`, | ||
| "Content-Type": "application/json", | ||
| }, | ||
| body: JSON.stringify({ | ||
| model: "voice-enrollment", | ||
| input: { | ||
| action: "query_voice", | ||
| voice_id: options.taskId, | ||
| }, | ||
| }), | ||
| }); | ||
| const { requestId, body } = await parseJsonBody(response); | ||
| const status = toOptionalString(body?.output?.status) ?? toOptionalString(body?.data?.status); | ||
| const modelId = | ||
| toOptionalString(body?.output?.voice_id) ?? | ||
| toOptionalString(body?.output?.model_id) ?? | ||
| toOptionalString(body?.data?.model_id) ?? | ||
| toOptionalString(body?.data?.id); | ||
| if (status === "OK" || status === "SUCCEEDED" || status === "SUCCESS" || modelId) { | ||
| return { | ||
| requestId, | ||
| modelId, | ||
| taskId: options.taskId, | ||
| status, | ||
| raw: body, | ||
| }; | ||
| } | ||
| if (status === "FAILED" || status === "UNDEPLOYED") { | ||
| const err = new Error("复刻语音模型创建失败"); | ||
| err.code = "TTS_CLONE_MODEL_CREATE_FAILED"; | ||
| err.requestId = requestId; | ||
| err.details = body; | ||
| throw err; | ||
| } | ||
| await new Promise((resolve) => setTimeout(resolve, pollIntervalMs)); | ||
| } | ||
| const err = new Error("等待复刻语音模型完成超时"); | ||
| err.code = "TTS_CLONE_MODEL_TIMEOUT"; | ||
| err.details = { | ||
| taskId: options.taskId, | ||
| maxAttempts, | ||
| pollIntervalMs, | ||
| }; | ||
| throw err; | ||
| } | ||
| module.exports = { | ||
| createAliyunCloneVoiceModel, | ||
| pollAliyunCloneVoiceModel, | ||
| }; |
| import os from "node:os"; | ||
| import path from "node:path"; | ||
| function resolveOpenClawHome(): string { | ||
| return process.env.OPENCLAW_HOME?.trim() || path.join(os.homedir(), ".openclaw"); | ||
| } | ||
| export function resolveGeneratedImageDir(now = new Date()): string { | ||
| const day = now.toISOString().slice(0, 10); | ||
| return path.join(resolveOpenClawHome(), "media", "clawmate-generated", day); | ||
| } | ||
| export function resolveGeneratedAudioDir(now = new Date()): string { | ||
| const day = now.toISOString().slice(0, 10); | ||
| return path.join(resolveOpenClawHome(), "media", "clawmate-voice", day); | ||
| } | ||
| export function resolveSoulMdPath(workspaceDir?: string): string { | ||
| if (typeof workspaceDir === "string" && workspaceDir.trim()) { | ||
| return path.join(workspaceDir, "SOUL.md"); | ||
| } | ||
| return path.join(resolveOpenClawHome(), "workspace", "SOUL.md"); | ||
| } |
| import { toOptionalString } from "./shared"; | ||
| interface ConfigWithApiKey { | ||
| apiKey?: string; | ||
| api_key?: string; | ||
| } | ||
| interface ConfigWithBaseUrl { | ||
| baseUrl?: string; | ||
| base_url?: string; | ||
| } | ||
| export function resolveDashScopeApiKey(config: ConfigWithApiKey): string | null { | ||
| return toOptionalString(config.apiKey ?? config.api_key ?? process.env.DASHSCOPE_API_KEY)?.trim() ?? null; | ||
| } | ||
| export function resolveFalApiKey(config: ConfigWithApiKey): string | null { | ||
| return toOptionalString(config.apiKey ?? config.api_key ?? process.env.FAL_KEY)?.trim() ?? null; | ||
| } | ||
| export function resolveOpenAiApiKey(config: ConfigWithApiKey): string | null { | ||
| return toOptionalString(config.apiKey ?? config.api_key ?? process.env.OPENAI_API_KEY)?.trim() ?? null; | ||
| } | ||
| export function resolveOpenAiBaseUrl(config: ConfigWithBaseUrl): string | null { | ||
| return toOptionalString(config.baseUrl ?? config.base_url ?? process.env.OPENAI_BASE_URL)?.trim() ?? null; | ||
| } |
| import crypto from "node:crypto"; | ||
| import { ClawMateError } from "../errors"; | ||
| import { createLogger } from "../logger"; | ||
| export interface CreateAliyunCloneVoiceModelOptions { | ||
| apiKey: string; | ||
| baseUrl: string; | ||
| targetModel: string; | ||
| speaker?: string; | ||
| promptAudioUrl: string; | ||
| promptText?: string; | ||
| fetchImpl?: typeof fetch; | ||
| } | ||
| export interface CreateAliyunCloneVoiceModelResult { | ||
| requestId: string | null; | ||
| modelId: string | null; | ||
| taskId: string | null; | ||
| status: string | null; | ||
| raw: unknown; | ||
| } | ||
| export interface PollAliyunCloneVoiceModelOptions { | ||
| apiKey: string; | ||
| statusUrl: string; | ||
| taskId: string; | ||
| pollIntervalMs?: number; | ||
| maxAttempts?: number; | ||
| fetchImpl?: typeof fetch; | ||
| } | ||
| export interface GenerateAliyunCloneTtsOptions { | ||
| text: string; | ||
| apiKey: string; | ||
| baseUrl: string; | ||
| model: string; | ||
| modelId: string; | ||
| speaker?: string; | ||
| fetchImpl?: typeof fetch; | ||
| } | ||
| export interface GenerateAliyunCloneTtsResult { | ||
| audioUrl: string; | ||
| requestId: string | null; | ||
| model: string; | ||
| voice: string; | ||
| } | ||
| interface CloneApiBody { | ||
| output?: { | ||
| audio?: { | ||
| url?: unknown; | ||
| }; | ||
| voice_id?: unknown; | ||
| model_id?: unknown; | ||
| task_id?: unknown; | ||
| status?: unknown; | ||
| }; | ||
| data?: { | ||
| id?: unknown; | ||
| task_id?: unknown; | ||
| status?: unknown; | ||
| model_id?: unknown; | ||
| }; | ||
| code?: unknown; | ||
| message?: unknown; | ||
| } | ||
| interface CloneWsEventEnvelope { | ||
| header?: { | ||
| event?: unknown; | ||
| task_id?: unknown; | ||
| }; | ||
| } | ||
| const logger = createLogger("clawmate-tts"); | ||
| function toOptionalString(value: unknown): string | null { | ||
| return typeof value === "string" && value.trim() ? value.trim() : null; | ||
| } | ||
| function normalizeBaseUrl(baseUrl: string): string { | ||
| return baseUrl.replace(/\/+$/, ""); | ||
| } | ||
| function buildGenerationUrl(baseUrl: string): string { | ||
| return `${normalizeBaseUrl(baseUrl)}/services/aigc/multimodal-generation/generation`; | ||
| } | ||
| function buildCloneCustomizationUrl(baseUrl: string): string { | ||
| return `${normalizeBaseUrl(baseUrl)}/services/audio/tts/customization`; | ||
| } | ||
| function buildWebsocketUrl(baseUrl: string): string { | ||
| const normalized = normalizeBaseUrl(baseUrl); | ||
| if (normalized.startsWith("https://")) { | ||
| return normalized.replace("https://", "wss://").replace(/\/api\/v1$/, "/api-ws/v1/inference"); | ||
| } | ||
| if (normalized.startsWith("http://")) { | ||
| return normalized.replace("http://", "ws://").replace(/\/api\/v1$/, "/api-ws/v1/inference"); | ||
| } | ||
| return normalized; | ||
| } | ||
| function normalizeClonePrefix(value: string | undefined): string { | ||
| const normalized = (value ?? "") | ||
| .trim() | ||
| .toLowerCase() | ||
| .replace(/[^a-z0-9]+/g, "") | ||
| .slice(0, 9); | ||
| return normalized || "clawmate"; | ||
| } | ||
| async function parseJsonBody(response: Response): Promise<{ requestId: string | null; body: CloneApiBody | null; rawText: string }> { | ||
| const requestId = | ||
| response.headers.get("x-dashscope-request-id") ?? | ||
| response.headers.get("x-request-id") ?? | ||
| null; | ||
| let rawText = ""; | ||
| let body: CloneApiBody | null = null; | ||
| try { | ||
| rawText = await response.text(); | ||
| body = rawText ? (JSON.parse(rawText) as CloneApiBody) : null; | ||
| } catch (error) { | ||
| throw new ClawMateError("TTS provider 响应解析失败", { | ||
| code: "TTS_RESPONSE_PARSE_ERROR", | ||
| transient: true, | ||
| requestId, | ||
| details: { | ||
| cause: error instanceof Error ? error.message : String(error), | ||
| responseText: rawText, | ||
| }, | ||
| }); | ||
| } | ||
| if (!response.ok) { | ||
| throw new ClawMateError(toOptionalString(body?.message) ?? `TTS provider 请求失败: HTTP ${response.status}`, { | ||
| code: toOptionalString(body?.code) ?? "TTS_PROVIDER_HTTP_ERROR", | ||
| transient: response.status >= 500, | ||
| requestId, | ||
| details: body, | ||
| }); | ||
| } | ||
| return { requestId, body, rawText }; | ||
| } | ||
| export async function createAliyunCloneVoiceModel( | ||
| options: CreateAliyunCloneVoiceModelOptions, | ||
| ): Promise<CreateAliyunCloneVoiceModelResult> { | ||
| const fetchImpl = options.fetchImpl ?? fetch; | ||
| const response = await fetchImpl(buildCloneCustomizationUrl(options.baseUrl), { | ||
| method: "POST", | ||
| headers: { | ||
| Authorization: `Bearer ${options.apiKey}`, | ||
| "Content-Type": "application/json", | ||
| }, | ||
| body: JSON.stringify({ | ||
| model: "voice-enrollment", | ||
| input: { | ||
| action: "create_voice", | ||
| target_model: options.targetModel, | ||
| prefix: normalizeClonePrefix(options.speaker), | ||
| url: options.promptAudioUrl, | ||
| }, | ||
| }), | ||
| }); | ||
| const { requestId, body } = await parseJsonBody(response); | ||
| return { | ||
| requestId, | ||
| modelId: | ||
| toOptionalString(body?.output?.voice_id) ?? | ||
| toOptionalString(body?.output?.model_id) ?? | ||
| toOptionalString(body?.data?.model_id) ?? | ||
| toOptionalString(body?.data?.id), | ||
| taskId: toOptionalString(body?.output?.task_id) ?? toOptionalString(body?.data?.task_id), | ||
| status: toOptionalString(body?.output?.status) ?? toOptionalString(body?.data?.status), | ||
| raw: body, | ||
| }; | ||
| } | ||
| export async function pollAliyunCloneVoiceModel( | ||
| options: PollAliyunCloneVoiceModelOptions, | ||
| ): Promise<CreateAliyunCloneVoiceModelResult> { | ||
| const fetchImpl = options.fetchImpl ?? fetch; | ||
| const maxAttempts = options.maxAttempts ?? 60; | ||
| const pollIntervalMs = options.pollIntervalMs ?? 3000; | ||
| for (let attempt = 0; attempt < maxAttempts; attempt += 1) { | ||
| const response = await fetchImpl(buildCloneCustomizationUrl(options.statusUrl), { | ||
| method: "POST", | ||
| headers: { | ||
| Authorization: `Bearer ${options.apiKey}`, | ||
| "Content-Type": "application/json", | ||
| }, | ||
| body: JSON.stringify({ | ||
| model: "voice-enrollment", | ||
| input: { | ||
| action: "query_voice", | ||
| voice_id: options.taskId, | ||
| }, | ||
| }), | ||
| }); | ||
| const { requestId, body } = await parseJsonBody(response); | ||
| const status = toOptionalString(body?.output?.status) ?? toOptionalString(body?.data?.status); | ||
| const modelId = | ||
| toOptionalString(body?.output?.voice_id) ?? | ||
| toOptionalString(body?.output?.model_id) ?? | ||
| toOptionalString(body?.data?.model_id) ?? | ||
| toOptionalString(body?.data?.id); | ||
| if (status === "OK" || status === "SUCCEEDED" || status === "SUCCESS" || modelId) { | ||
| return { | ||
| requestId, | ||
| modelId, | ||
| taskId: options.taskId, | ||
| status, | ||
| raw: body, | ||
| }; | ||
| } | ||
| if (status === "FAILED" || status === "UNDEPLOYED") { | ||
| throw new ClawMateError("复刻语音模型创建失败", { | ||
| code: "TTS_CLONE_MODEL_CREATE_FAILED", | ||
| requestId, | ||
| details: body, | ||
| }); | ||
| } | ||
| await new Promise((resolve) => setTimeout(resolve, pollIntervalMs)); | ||
| } | ||
| throw new ClawMateError("等待复刻语音模型完成超时", { | ||
| code: "TTS_CLONE_MODEL_TIMEOUT", | ||
| transient: true, | ||
| details: { | ||
| taskId: options.taskId, | ||
| maxAttempts, | ||
| pollIntervalMs, | ||
| }, | ||
| }); | ||
| } | ||
| async function resolveWebSocketConstructor(): Promise<typeof WebSocket> { | ||
| try { | ||
| const wsModule = await import("ws"); | ||
| const ctor = wsModule.WebSocket ?? wsModule.default; | ||
| if (ctor) { | ||
| return ctor as typeof WebSocket; | ||
| } | ||
| } catch { | ||
| // fall through to global WebSocket check | ||
| } | ||
| if (typeof WebSocket !== "undefined") { | ||
| return WebSocket; | ||
| } | ||
| throw new ClawMateError("当前环境缺少 WebSocket 支持,请安装 ws 依赖", { | ||
| code: "TTS_WEBSOCKET_UNAVAILABLE", | ||
| }); | ||
| } | ||
| function encodeAudioAsDataUrl(chunks: Uint8Array[]): string { | ||
| const buffer = Buffer.concat(chunks.map((chunk) => Buffer.from(chunk))); | ||
| return `data:audio/mpeg;base64,${buffer.toString("base64")}`; | ||
| } | ||
| export async function generateAliyunCloneTts( | ||
| options: GenerateAliyunCloneTtsOptions, | ||
| ): Promise<GenerateAliyunCloneTtsResult> { | ||
| const WebSocketCtor = await resolveWebSocketConstructor(); | ||
| const websocketUrl = buildWebsocketUrl(options.baseUrl); | ||
| const taskId = crypto.randomUUID().replace(/-/g, ""); | ||
| const audioChunks: Uint8Array[] = []; | ||
| return await new Promise<GenerateAliyunCloneTtsResult>((resolve, reject) => { | ||
| let settled = false; | ||
| let started = false; | ||
| let requestId: string | null = null; | ||
| const socket = new (WebSocketCtor as unknown as { | ||
| new (url: string, protocols?: string | string[], options?: { headers?: Record<string, string> }): WebSocket; | ||
| })(websocketUrl, undefined, { | ||
| headers: { | ||
| Authorization: `Bearer ${options.apiKey}`, | ||
| }, | ||
| }); | ||
| const finish = (handler: () => void) => { | ||
| if (settled) return; | ||
| settled = true; | ||
| handler(); | ||
| try { | ||
| socket.close(); | ||
| } catch { | ||
| // ignore close errors | ||
| } | ||
| }; | ||
| const sendJson = (payload: Record<string, unknown>) => { | ||
| socket.send(JSON.stringify(payload)); | ||
| }; | ||
| socket.binaryType = "arraybuffer"; | ||
| socket.onopen = () => { | ||
| sendJson({ | ||
| header: { | ||
| action: "run-task", | ||
| task_id: taskId, | ||
| streaming: "duplex", | ||
| }, | ||
| payload: { | ||
| model: options.model, | ||
| task_group: "audio", | ||
| task: "tts", | ||
| function: "SpeechSynthesizer", | ||
| input: {}, | ||
| parameters: { | ||
| voice: options.modelId, | ||
| volume: 50, | ||
| text_type: "PlainText", | ||
| sample_rate: 22050, | ||
| rate: 1, | ||
| format: "mp3", | ||
| pitch: 1, | ||
| seed: 0, | ||
| type: 0, | ||
| enable_ssml: true, | ||
| }, | ||
| }, | ||
| }); | ||
| }; | ||
| socket.onmessage = (event) => { | ||
| if (typeof event.data === "string") { | ||
| let envelope: CloneWsEventEnvelope | null = null; | ||
| try { | ||
| envelope = JSON.parse(event.data) as CloneWsEventEnvelope; | ||
| } catch { | ||
| logger.error("复刻语音 WebSocket 文本事件解析失败", { | ||
| responseText: event.data, | ||
| taskId, | ||
| }); | ||
| finish(() => | ||
| reject( | ||
| new ClawMateError("TTS provider 响应解析失败", { | ||
| code: "TTS_RESPONSE_PARSE_ERROR", | ||
| details: { responseText: event.data }, | ||
| }), | ||
| ), | ||
| ); | ||
| return; | ||
| } | ||
| const eventName = toOptionalString(envelope?.header?.event); | ||
| requestId = toOptionalString(envelope?.header?.task_id) ?? requestId; | ||
| logger.info("复刻语音 WebSocket 事件", { | ||
| event: eventName, | ||
| requestId, | ||
| envelope, | ||
| }); | ||
| if (eventName === "task-started") { | ||
| started = true; | ||
| sendJson({ | ||
| header: { | ||
| action: "continue-task", | ||
| task_id: taskId, | ||
| streaming: "duplex", | ||
| }, | ||
| payload: { | ||
| model: options.model, | ||
| task_group: "audio", | ||
| task: "tts", | ||
| function: "SpeechSynthesizer", | ||
| input: { | ||
| text: options.text, | ||
| }, | ||
| }, | ||
| }); | ||
| sendJson({ | ||
| header: { | ||
| action: "finish-task", | ||
| task_id: taskId, | ||
| streaming: "duplex", | ||
| }, | ||
| payload: { | ||
| input: {}, | ||
| }, | ||
| }); | ||
| return; | ||
| } | ||
| if (eventName === "task-finished") { | ||
| if (!audioChunks.length) { | ||
| finish(() => | ||
| reject( | ||
| new ClawMateError("TTS provider 响应中缺少 audio url", { | ||
| code: "TTS_AUDIO_URL_MISSING", | ||
| requestId, | ||
| }), | ||
| ), | ||
| ); | ||
| return; | ||
| } | ||
| finish(() => | ||
| resolve({ | ||
| audioUrl: encodeAudioAsDataUrl(audioChunks), | ||
| requestId, | ||
| model: options.model, | ||
| voice: options.speaker?.trim() || options.modelId, | ||
| }), | ||
| ); | ||
| return; | ||
| } | ||
| if (eventName === "task-failed") { | ||
| logger.error("复刻语音 WebSocket 任务失败", { | ||
| requestId, | ||
| envelope, | ||
| model: options.model, | ||
| modelId: options.modelId, | ||
| speaker: options.speaker, | ||
| }); | ||
| finish(() => | ||
| reject( | ||
| new ClawMateError("复刻语音合成失败", { | ||
| code: "TTS_PROVIDER_HTTP_ERROR", | ||
| requestId, | ||
| details: envelope, | ||
| }), | ||
| ), | ||
| ); | ||
| } | ||
| return; | ||
| } | ||
| if (event.data instanceof ArrayBuffer) { | ||
| audioChunks.push(new Uint8Array(event.data)); | ||
| return; | ||
| } | ||
| logger.error("复刻语音 WebSocket 返回了不支持的音频数据格式", { | ||
| requestId, | ||
| dataType: typeof event.data, | ||
| }); | ||
| finish(() => | ||
| reject( | ||
| new ClawMateError("TTS provider 返回了不支持的音频数据格式", { | ||
| code: "TTS_AUDIO_DATA_INVALID", | ||
| requestId, | ||
| details: { dataType: typeof event.data }, | ||
| }), | ||
| ), | ||
| ); | ||
| }; | ||
| socket.onerror = (event) => { | ||
| logger.error("复刻语音 WebSocket 连接失败", { | ||
| requestId, | ||
| taskId, | ||
| event, | ||
| model: options.model, | ||
| modelId: options.modelId, | ||
| started, | ||
| }); | ||
| finish(() => | ||
| reject( | ||
| new ClawMateError(started ? "复刻语音合成连接失败" : "复刻语音合成启动失败", { | ||
| code: "TTS_WEBSOCKET_ERROR", | ||
| requestId, | ||
| }), | ||
| ), | ||
| ); | ||
| }; | ||
| socket.onclose = (event) => { | ||
| if (settled) { | ||
| return; | ||
| } | ||
| logger.error("复刻语音 WebSocket 连接已关闭", { | ||
| requestId, | ||
| taskId, | ||
| code: event.code, | ||
| reason: event.reason, | ||
| wasClean: event.wasClean, | ||
| started, | ||
| }); | ||
| finish(() => | ||
| reject( | ||
| new ClawMateError("复刻语音合成连接已关闭", { | ||
| code: "TTS_WEBSOCKET_CLOSED", | ||
| requestId, | ||
| details: { | ||
| code: event.code, | ||
| reason: event.reason, | ||
| wasClean: event.wasClean, | ||
| started, | ||
| }, | ||
| }), | ||
| ), | ||
| ); | ||
| }; | ||
| }); | ||
| } |
| import path from "node:path"; | ||
| export interface TranscodeAudioOptions { | ||
| inputPath: string; | ||
| outputFormat: "wav" | "ogg" | "opus"; | ||
| } | ||
| export interface TranscodeAudioResult { | ||
| outputPath: string; | ||
| transcoded: boolean; | ||
| } | ||
| function buildOutputPath(inputPath: string, outputFormat: "wav" | "ogg" | "opus"): string { | ||
| const parsed = path.parse(inputPath); | ||
| if (outputFormat === "wav") { | ||
| return inputPath; | ||
| } | ||
| const ext = outputFormat === "opus" ? ".opus" : ".ogg"; | ||
| return path.join(parsed.dir, `${parsed.name}${ext}`); | ||
| } | ||
| export async function transcodeAudioWithFfmpeg(options: TranscodeAudioOptions): Promise<TranscodeAudioResult> { | ||
| if (options.outputFormat === "wav") { | ||
| return { | ||
| outputPath: options.inputPath, | ||
| transcoded: false, | ||
| }; | ||
| } | ||
| const outputPath = buildOutputPath(options.inputPath, options.outputFormat); | ||
| if (path.extname(options.inputPath).toLowerCase() === path.extname(outputPath).toLowerCase()) { | ||
| return { | ||
| outputPath: options.inputPath, | ||
| transcoded: false, | ||
| }; | ||
| } | ||
| return { | ||
| outputPath: options.inputPath, | ||
| transcoded: false, | ||
| }; | ||
| } |
+3
-4
| { | ||
| "name": "@clawmate/clawmate", | ||
| "version": "0.3.0", | ||
| "version": "0.3.1", | ||
| "description": "One-click installer for the ClawMate OpenClaw companion plugin", | ||
@@ -16,3 +16,2 @@ "license": "MIT", | ||
| "bin", | ||
| "packages/clawmate-companion/bin", | ||
| "packages/clawmate-companion/config", | ||
@@ -32,6 +31,6 @@ "packages/clawmate-companion/index.ts", | ||
| "clawmate:plugin:check": "node packages/clawmate-companion/scripts/check-manifest-id.mjs", | ||
| "clawmate:test": "node --import tsx --test packages/clawmate-companion/src/plugin.test.ts packages/clawmate-companion/src/plugin.tts.test.ts packages/clawmate-companion/src/cli.test.cjs packages/clawmate-companion/src/core/providers/openai-compatible.test.ts packages/clawmate-companion/src/core/providers/gemini.test.ts", | ||
| "clawmate:probe:openai": "cd packages/clawmate-companion && node --import tsx skills/clawmate-companion/scripts/probe-openai-edits.ts", | ||
| "clawmate:probe:tts": "cd packages/clawmate-companion && node --import tsx scripts/probe-qwen-tts.ts", | ||
| "clawmate:setup": "node packages/clawmate-companion/bin/cli.cjs", | ||
| "clawmate:probe:tts:clone": "cd packages/clawmate-companion && node --import tsx scripts/probe-aliyun-clone-tts.ts", | ||
| "clawmate:setup": "node bin/clawmate.cjs", | ||
| "release": "node scripts/release.mjs", | ||
@@ -38,0 +37,0 @@ "release:patch": "node scripts/release.mjs patch --publish", |
@@ -22,8 +22,23 @@ { | ||
| "enabled": false, | ||
| "model": "qwen3-tts-flash", | ||
| "voice": "Chelsie", | ||
| "languageType": "Chinese", | ||
| "apiKey": "YOUR_DASHSCOPE_API_KEY", | ||
| "baseUrl": "https://dashscope.aliyuncs.com/api/v1", | ||
| "degradeMessage": "语音暂时发送失败,我先打字陪你。" | ||
| "provider": "aliyun-official", | ||
| "outputFormat": "wav", | ||
| "degradeMessage": "语音暂时发送失败,我先打字陪你。", | ||
| "official": { | ||
| "model": "qwen3-tts-flash", | ||
| "voice": "Chelsie", | ||
| "languageType": "Chinese", | ||
| "apiKey": "YOUR_DASHSCOPE_API_KEY", | ||
| "baseUrl": "https://dashscope.aliyuncs.com/api/v1" | ||
| }, | ||
| "clone": { | ||
| "apiKey": "YOUR_DASHSCOPE_API_KEY", | ||
| "baseUrl": "https://dashscope.aliyuncs.com/api/v1", | ||
| "targetModel": "cosyvoice-v1", | ||
| "modelId": "", | ||
| "synthesisModel": "cosyvoice-clone-v1", | ||
| "speaker": "", | ||
| "promptAudioUrl": "", | ||
| "promptText": "", | ||
| "statusUrl": "https://dashscope.aliyuncs.com/api/v1" | ||
| } | ||
| }, | ||
@@ -30,0 +45,0 @@ "providers": { |
@@ -5,3 +5,3 @@ { | ||
| "description": "角色化自拍生成插件(Tool + Skill)", | ||
| "version": "0.3.0", | ||
| "version": "0.3.1", | ||
| "skills": [ | ||
@@ -94,2 +94,74 @@ "./skills" | ||
| }, | ||
| "provider": { | ||
| "type": "string", | ||
| "enum": [ | ||
| "aliyun-official", | ||
| "aliyun-clone" | ||
| ] | ||
| }, | ||
| "outputFormat": { | ||
| "type": "string", | ||
| "enum": [ | ||
| "wav", | ||
| "ogg", | ||
| "opus" | ||
| ] | ||
| }, | ||
| "degradeMessage": { | ||
| "type": "string" | ||
| }, | ||
| "official": { | ||
| "type": "object", | ||
| "additionalProperties": false, | ||
| "properties": { | ||
| "model": { | ||
| "type": "string" | ||
| }, | ||
| "voice": { | ||
| "type": "string" | ||
| }, | ||
| "languageType": { | ||
| "type": "string" | ||
| }, | ||
| "apiKey": { | ||
| "type": "string" | ||
| }, | ||
| "baseUrl": { | ||
| "type": "string" | ||
| } | ||
| } | ||
| }, | ||
| "clone": { | ||
| "type": "object", | ||
| "additionalProperties": false, | ||
| "properties": { | ||
| "apiKey": { | ||
| "type": "string" | ||
| }, | ||
| "baseUrl": { | ||
| "type": "string" | ||
| }, | ||
| "targetModel": { | ||
| "type": "string" | ||
| }, | ||
| "modelId": { | ||
| "type": "string" | ||
| }, | ||
| "synthesisModel": { | ||
| "type": "string" | ||
| }, | ||
| "speaker": { | ||
| "type": "string" | ||
| }, | ||
| "promptAudioUrl": { | ||
| "type": "string" | ||
| }, | ||
| "promptText": { | ||
| "type": "string" | ||
| }, | ||
| "statusUrl": { | ||
| "type": "string" | ||
| } | ||
| } | ||
| }, | ||
| "model": { | ||
@@ -109,5 +181,2 @@ "type": "string" | ||
| "type": "string" | ||
| }, | ||
| "degradeMessage": { | ||
| "type": "string" | ||
| } | ||
@@ -205,2 +274,74 @@ } | ||
| }, | ||
| "provider": { | ||
| "type": "string", | ||
| "enum": [ | ||
| "aliyun-official", | ||
| "aliyun-clone" | ||
| ] | ||
| }, | ||
| "outputFormat": { | ||
| "type": "string", | ||
| "enum": [ | ||
| "wav", | ||
| "ogg", | ||
| "opus" | ||
| ] | ||
| }, | ||
| "degradeMessage": { | ||
| "type": "string" | ||
| }, | ||
| "official": { | ||
| "type": "object", | ||
| "additionalProperties": false, | ||
| "properties": { | ||
| "model": { | ||
| "type": "string" | ||
| }, | ||
| "voice": { | ||
| "type": "string" | ||
| }, | ||
| "languageType": { | ||
| "type": "string" | ||
| }, | ||
| "apiKey": { | ||
| "type": "string" | ||
| }, | ||
| "baseUrl": { | ||
| "type": "string" | ||
| } | ||
| } | ||
| }, | ||
| "clone": { | ||
| "type": "object", | ||
| "additionalProperties": false, | ||
| "properties": { | ||
| "apiKey": { | ||
| "type": "string" | ||
| }, | ||
| "baseUrl": { | ||
| "type": "string" | ||
| }, | ||
| "targetModel": { | ||
| "type": "string" | ||
| }, | ||
| "modelId": { | ||
| "type": "string" | ||
| }, | ||
| "synthesisModel": { | ||
| "type": "string" | ||
| }, | ||
| "speaker": { | ||
| "type": "string" | ||
| }, | ||
| "promptAudioUrl": { | ||
| "type": "string" | ||
| }, | ||
| "promptText": { | ||
| "type": "string" | ||
| }, | ||
| "statusUrl": { | ||
| "type": "string" | ||
| } | ||
| } | ||
| }, | ||
| "model": { | ||
@@ -220,5 +361,2 @@ "type": "string" | ||
| "type": "string" | ||
| }, | ||
| "degradeMessage": { | ||
| "type": "string" | ||
| } | ||
@@ -225,0 +363,0 @@ } |
| { | ||
| "name": "@clawmate/clawmate-companion", | ||
| "version": "0.3.0", | ||
| "version": "0.3.1", | ||
| "private": true, | ||
| "type": "module", | ||
| "description": "ClawMate companion plugin for OpenClaw", | ||
| "bin": { | ||
| "clawmate": "./bin/cli.cjs" | ||
| }, | ||
| "openclaw": { | ||
@@ -16,3 +13,2 @@ "extensions": [ | ||
| "scripts": { | ||
| "setup": "node ./bin/cli.cjs", | ||
| "check:manifest-id": "node ./scripts/check-manifest-id.mjs" | ||
@@ -22,4 +18,5 @@ }, | ||
| "@google/genai": "^1.44.0", | ||
| "openai": "^6.22.0" | ||
| "openai": "^6.22.0", | ||
| "ws": "^8.18.0" | ||
| } | ||
| } |
@@ -24,3 +24,3 @@ --- | ||
| - Celebration | ||
| - Missing the user / playful affection / light撒娇 style | ||
| - Missing the user / playful affection / intimate interaction | ||
@@ -41,3 +41,3 @@ ## When Not to Use | ||
| 1. Decide whether this reply should be voice-only. | ||
| 1. Decide whether this reply should use voice. | ||
| 2. If not, reply normally in text and do not call the tool. | ||
@@ -68,15 +68,11 @@ 3. If yes, write a short spoken version and call: | ||
| 1. Do not send the same content as visible text | ||
| 2. Output a separate line: | ||
| 2. If `delivery.delivered` is `true`, the plugin has already sent the voice message directly, so do not send it again | ||
| 3. If `delivery.delivered` is not `true`, the tool returns a local audio path in `audioPath` | ||
| 4. You must immediately use that path to send the voice message to the user | ||
| 5. If `nextAction = use_audio_path_to_send_voice` is present, follow it literally and do not just repeat the path | ||
| ```text | ||
| MEDIA: <audioPath> | ||
| ``` | ||
| Use `mediaLine` directly when available. | ||
| ### On failure | ||
| 1. Use the returned `message` to continue naturally in text | ||
| 2. Do not fabricate audio paths | ||
| 3. Do not output `MEDIA:` on failure | ||
| 2. Do not fabricate audio paths or delivery syntax | ||
@@ -88,2 +84,2 @@ ## Prohibited Actions | ||
| - Do not invent local file paths | ||
| - Do not overuse voice when the moment is clearly better served by text | ||
| - Do not turn every reply into voice |
@@ -66,16 +66,13 @@ --- | ||
| 1. 不要再发送同内容可见文字 | ||
| 2. 单独输出一行: | ||
| 2. 如果工具结果里的 `delivery.delivered` 为 `true`,表示插件已直接把语音发出,此时不要再补发一次 | ||
| 3. 如果工具结果里的 `delivery.delivered` 不为 `true`,工具结果会返回本地音频路径 `audioPath` | ||
| 4. 此时必须立刻使用这个路径对应的音频,把语音发给用户 | ||
| 5. 若结果里出现 `nextAction = use_audio_path_to_send_voice`,按该指令执行,不要只复述路径 | ||
| ```text | ||
| MEDIA: <audioPath> | ||
| ``` | ||
| 如果工具结果里有 `mediaLine`,直接使用它。 | ||
| ### 失败时 | ||
| 1. 使用返回的 `message` 自然降级成文字回复 | ||
| 2. 不要伪造音频路径 | ||
| 3. 失败时不要输出 `MEDIA:` | ||
| 2. 不要伪造音频路径或发送协议 | ||
| ## 禁止行为 | ||
@@ -82,0 +79,0 @@ |
@@ -61,7 +61,6 @@ --- | ||
| "promptGuide": { | ||
| "style": "photorealistic | anime", | ||
| "requiredFields": ["scene", "action", "expression", "outfit", "lighting", "camera", "..."], | ||
| "requiredFields": ["scene", "action", "expression", "outfit", "lighting", "camera", "realism"], | ||
| "rules": ["single scene only", "..."], | ||
| "wordRange": "50-80 english words", | ||
| "example": "<style-specific example prompt>" | ||
| "example": "Photorealistic direct selfie, ..." | ||
| } | ||
@@ -79,3 +78,2 @@ } | ||
| - Every `modeGuide.requirements` item must have corresponding description in prompt, cannot be omitted | ||
| - **Follow the style indicated by `promptGuide.style`** — use style-appropriate vocabulary and visual descriptions (e.g., "natural skin texture" for photorealistic, "consistent 2D anime look" for anime) | ||
@@ -103,3 +101,2 @@ **Prompt generation requirements**: | ||
| // Step 2 (call after generating prompt based on returned package) | ||
| // Example for photorealistic style: | ||
| clawmate_generate_selfie({ | ||
@@ -109,7 +106,2 @@ prompt: "Photorealistic direct selfie, studying at a university library desk in the afternoon, open laptop and coffee cup in background, wearing comfortable hoodie, soft window light with warm ambient fill, focused but relaxed expression, medium close-up framing, natural skin texture, candid daily-life photo style", | ||
| }) | ||
| // Example for anime style: | ||
| clawmate_generate_selfie({ | ||
| prompt: "Anime-style direct selfie, studying at a university library desk in the afternoon, open laptop and coffee cup in background, wearing comfortable hoodie, soft window light with warm ambient fill, focused but relaxed expression, medium close-up framing, consistent 2D anime look matching reference image style", | ||
| mode: "direct" | ||
| }) | ||
| ``` | ||
@@ -121,8 +113,8 @@ | ||
| 1. First give a natural text reply (e.g., `"Here you go~"`) | ||
| 2. Must output a separate line: `MEDIA: <imageUrl>` (directly use `mediaLine` or `imageUrl`) | ||
| 3. Don't modify image path, don't replace `MEDIA:` line with Markdown image syntax | ||
| 2. The tool returns a local image path in `imagePath` | ||
| 3. Use the image referenced by that path and send it to the user | ||
| **On failure** (`ok: false`): | ||
| 1. Use the `message` in the return to continue conversation | ||
| 2. Don't fabricate image URLs, don't output `MEDIA:` line | ||
| 2. Don't fabricate image URLs, file paths, or delivery syntax | ||
| 3. Naturally transition to other topics | ||
@@ -137,8 +129,1 @@ | ||
| - **Prohibit omitting mode**: Both tools must pass mode parameter | ||
| ## Important Notes | ||
| 1. On success, must output `MEDIA: <path>`, this is OpenClaw's image recognition format | ||
| 2. Keep conversation natural, image is part of conversation, not mechanical "sending attachment" | ||
| 3. On failure, gracefully degrade to text, don't interrupt conversation | ||
| 4. Always maintain consistent style as specified by `promptGuide.style`, single scene, consistent lighting |
@@ -109,8 +109,8 @@ --- | ||
| 1. 先给一句自然文本回复(如 `"来啦~"`) | ||
| 2. 必须单独输出一行:`MEDIA: <imageUrl>`(直接用 `mediaLine` 或 `imageUrl`) | ||
| 3. 不要改图片路径,不要用 Markdown 图片语法替代 `MEDIA:` 行 | ||
| 2. 工具结果会返回本地图片路径 `imagePath` | ||
| 3. 使用这个路径对应的图片,把图片发给用户 | ||
| **失败时**(`ok: false`): | ||
| 1. 使用返回中的 `message` 继续对话 | ||
| 2. 不要杜撰图片 URL,不要输出 `MEDIA:` 行 | ||
| 2. 不要杜撰图片 URL、文件路径或发送协议 | ||
| 3. 自然过渡到其他话题 | ||
@@ -126,7 +126,2 @@ | ||
| ## 注意事项 | ||
| 1. 成功时必须输出 `MEDIA: <path>`,这是 OpenClaw 的图片识别格式 | ||
| 2. 保持对话自然,图片是对话的一部分,不是机械"发送附件" | ||
| 3. 失败时优雅降级为文本,不要中断对话 | ||
| 4. 始终保持写实、生活化、单场景、一致光线 |
@@ -157,7 +157,2 @@ import fs from "node:fs/promises"; | ||
| export async function readReferenceImageBase64(referencePath: string): Promise<string> { | ||
| const data = await fs.readFile(referencePath); | ||
| return data.toString("base64"); | ||
| } | ||
| export async function readReferenceImagesBase64(referencePaths: string[]): Promise<string[]> { | ||
@@ -164,0 +159,0 @@ if (!Array.isArray(referencePaths) || referencePaths.length === 0) { |
@@ -94,25 +94,21 @@ import fs from "node:fs/promises"; | ||
| function normalizeBaseUrl(value: unknown, fallback: string): string { | ||
| return typeof value === "string" && value.trim() ? value.trim().replace(/\/+$/, "") : fallback; | ||
| } | ||
| function normalizeTts(value: unknown): TtsConfig { | ||
| const source = asObject(value); | ||
| const normalizedBaseUrl = | ||
| typeof source.baseUrl === "string" && source.baseUrl.trim() | ||
| ? source.baseUrl.trim().replace(/\/+$/, "") | ||
| : "https://dashscope.aliyuncs.com/api/v1"; | ||
| const official = asObject(source.official); | ||
| const clone = asObject(source.clone); | ||
| const legacyBaseUrl = normalizeBaseUrl(source.baseUrl, "https://dashscope.aliyuncs.com/api/v1"); | ||
| const provider = | ||
| source.provider === "aliyun-clone" || source.provider === "aliyun-official" | ||
| ? source.provider | ||
| : "aliyun-official"; | ||
| const outputFormat = source.outputFormat === "ogg" || source.outputFormat === "opus" ? source.outputFormat : "wav"; | ||
| return { | ||
| enabled: Boolean(source.enabled), | ||
| model: | ||
| typeof source.model === "string" && source.model.trim() | ||
| ? source.model.trim() | ||
| : "qwen3-tts-flash", | ||
| voice: | ||
| typeof source.voice === "string" && source.voice.trim() | ||
| ? source.voice.trim() | ||
| : "Chelsie", | ||
| languageType: | ||
| typeof source.languageType === "string" && source.languageType.trim() | ||
| ? source.languageType.trim() | ||
| : "Chinese", | ||
| apiKey: normalizeText(source.apiKey), | ||
| baseUrl: normalizedBaseUrl, | ||
| provider, | ||
| outputFormat, | ||
| degradeMessage: | ||
@@ -122,2 +118,47 @@ typeof source.degradeMessage === "string" && source.degradeMessage.trim() | ||
| : "语音暂时发送失败,我先打字陪你。", | ||
| official: { | ||
| model: | ||
| typeof official.model === "string" && official.model.trim() | ||
| ? official.model.trim() | ||
| : typeof source.model === "string" && source.model.trim() | ||
| ? source.model.trim() | ||
| : "qwen3-tts-flash", | ||
| voice: | ||
| typeof official.voice === "string" && official.voice.trim() | ||
| ? official.voice.trim() | ||
| : typeof source.voice === "string" && source.voice.trim() | ||
| ? source.voice.trim() | ||
| : "Chelsie", | ||
| languageType: | ||
| typeof official.languageType === "string" && official.languageType.trim() | ||
| ? official.languageType.trim() | ||
| : typeof source.languageType === "string" && source.languageType.trim() | ||
| ? source.languageType.trim() | ||
| : "Chinese", | ||
| apiKey: | ||
| typeof official.apiKey === "string" && official.apiKey.trim() | ||
| ? official.apiKey.trim() | ||
| : normalizeText(source.apiKey), | ||
| baseUrl: normalizeBaseUrl(official.baseUrl, legacyBaseUrl), | ||
| }, | ||
| clone: { | ||
| apiKey: | ||
| typeof clone.apiKey === "string" && clone.apiKey.trim() | ||
| ? clone.apiKey.trim() | ||
| : normalizeText(source.apiKey), | ||
| baseUrl: normalizeBaseUrl(clone.baseUrl, legacyBaseUrl), | ||
| targetModel: | ||
| typeof clone.targetModel === "string" && clone.targetModel.trim() ? clone.targetModel.trim() : "cosyvoice-v1", | ||
| modelId: normalizeText(clone.modelId), | ||
| synthesisModel: | ||
| typeof clone.synthesisModel === "string" && clone.synthesisModel.trim() | ||
| ? clone.synthesisModel.trim() | ||
| : typeof clone.targetModel === "string" && clone.targetModel.trim() | ||
| ? clone.targetModel.trim() | ||
| : "cosyvoice-v1", | ||
| speaker: normalizeText(clone.speaker), | ||
| promptAudioUrl: normalizeText(clone.promptAudioUrl), | ||
| promptText: normalizeText(clone.promptText), | ||
| statusUrl: normalizeBaseUrl(clone.statusUrl, legacyBaseUrl), | ||
| }, | ||
| }; | ||
@@ -124,0 +165,0 @@ } |
@@ -135,3 +135,2 @@ import path from "node:path"; | ||
| const referenceImageBase64 = referenceImageBase64List[0] ?? ""; | ||
| const referenceImageDataUrl = referenceImageDataUrls[0] ?? ""; | ||
@@ -167,3 +166,2 @@ if (referencePaths.length === 0) { | ||
| referenceImageBase64List, | ||
| referenceImageDataUrl, | ||
| referenceImageDataUrls, | ||
@@ -170,0 +168,0 @@ timeState: timeState.key, |
@@ -15,2 +15,3 @@ import { ProviderError } from "../errors"; | ||
| } from "./shared"; | ||
| import { resolveDashScopeApiKey } from "./env"; | ||
@@ -121,4 +122,3 @@ interface DashScopeAliyunProviderConfig extends ProviderConfig { | ||
| const endpoint = toOptionalString(config.endpoint)?.trim() ?? "/services/aigc/multimodal-generation/generation"; | ||
| const apiKey = | ||
| toOptionalString(config.apiKey ?? config.api_key)?.trim() ?? toOptionalString(process.env.DASHSCOPE_API_KEY)?.trim(); | ||
| const apiKey = resolveDashScopeApiKey(config); | ||
| const model = toOptionalString(config.model)?.trim(); | ||
@@ -212,8 +212,3 @@ const timeoutMs = toFiniteNumber(config.timeoutMs ?? config.timeout_ms); | ||
| } | ||
| const fallback = dedupeNonEmptyStrings( | ||
| Array.isArray(payload.referenceImageDataUrls) && payload.referenceImageDataUrls.length > 0 | ||
| ? payload.referenceImageDataUrls | ||
| : [payload.referenceImageDataUrl], | ||
| ); | ||
| return fallback; | ||
| return dedupeNonEmptyStrings(payload.referenceImageDataUrls); | ||
| } | ||
@@ -220,0 +215,0 @@ |
@@ -16,2 +16,3 @@ import { ProviderError } from "../errors"; | ||
| } from "./shared"; | ||
| import { resolveFalApiKey } from "./env"; | ||
@@ -104,3 +105,3 @@ interface FalProviderConfig extends ProviderConfig { | ||
| const endpoint = normalizeEndpoint(config); | ||
| const apiKey = toOptionalString(config.apiKey ?? config.api_key ?? process.env.FAL_KEY)?.trim(); | ||
| const apiKey = resolveFalApiKey(config); | ||
| const timeoutMs = toFiniteNumber(config.timeoutMs ?? config.timeout_ms); | ||
@@ -186,8 +187,3 @@ const numImages = toFiniteNumber(config.numImages ?? config.num_images); | ||
| } | ||
| const fallback = dedupeNonEmptyStrings( | ||
| Array.isArray(payload.referenceImageDataUrls) && payload.referenceImageDataUrls.length > 0 | ||
| ? payload.referenceImageDataUrls | ||
| : [payload.referenceImageDataUrl], | ||
| ); | ||
| return fallback; | ||
| return dedupeNonEmptyStrings(payload.referenceImageDataUrls); | ||
| } | ||
@@ -194,0 +190,0 @@ |
@@ -85,7 +85,3 @@ import { GoogleGenAI } from "@google/genai"; | ||
| function buildReferenceParts(payload: GenerateRequest): Array<Record<string, unknown>> { | ||
| const referenceImages = dedupeNonEmptyStrings( | ||
| Array.isArray(payload.referenceImageDataUrls) && payload.referenceImageDataUrls.length > 0 | ||
| ? payload.referenceImageDataUrls | ||
| : [payload.referenceImageDataUrl], | ||
| ); | ||
| const referenceImages = dedupeNonEmptyStrings(payload.referenceImageDataUrls); | ||
@@ -92,0 +88,0 @@ return referenceImages.map((imageUrl, index) => { |
@@ -23,2 +23,3 @@ import { ProviderError } from "../errors"; | ||
| const transient = options.transient !== false; | ||
| const echoReferenceDataUrl = options.echoReferenceDataUrl !== false; | ||
@@ -51,4 +52,9 @@ let submitFailLeft = failSubmitTimes; | ||
| const imageUrl = options.echoReferenceDataUrl | ||
| ? payload.referenceImageDataUrl | ||
| const referenceImage = | ||
| payload.referenceImageDataUrls.find((value) => typeof value === "string" && value.trim()) || | ||
| payload.referencePath?.trim() || | ||
| payload.referencePaths.find((value) => typeof value === "string" && value.trim()) || | ||
| ""; | ||
| const imageUrl = echoReferenceDataUrl && referenceImage | ||
| ? referenceImage | ||
| : `mock://${name}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}/image.png`; | ||
@@ -55,0 +61,0 @@ const requestId = `${name}-req-${Math.random().toString(36).slice(2, 10)}`; |
@@ -190,7 +190,3 @@ import { ProviderError } from "../errors"; | ||
| } | ||
| return dedupeNonEmptyStrings( | ||
| Array.isArray(payload.referenceImageDataUrls) && payload.referenceImageDataUrls.length > 0 | ||
| ? payload.referenceImageDataUrls | ||
| : [payload.referenceImageDataUrl], | ||
| ); | ||
| return dedupeNonEmptyStrings(payload.referenceImageDataUrls); | ||
| } | ||
@@ -197,0 +193,0 @@ |
@@ -14,2 +14,3 @@ import OpenAI, { APIConnectionError, APIConnectionTimeoutError, APIError, toFile } from "openai"; | ||
| } from "./shared"; | ||
| import { resolveOpenAiApiKey, resolveOpenAiBaseUrl } from "./env"; | ||
@@ -42,4 +43,4 @@ interface OpenAICompatibleProviderConfig extends ProviderConfig { | ||
| const name = config.name; | ||
| const apiKey = toOptionalString(config.apiKey ?? config.api_key ?? process.env.OPENAI_API_KEY)?.trim(); | ||
| const baseURL = toOptionalString(config.baseUrl ?? config.base_url ?? process.env.OPENAI_BASE_URL)?.trim() ?? null; | ||
| const apiKey = resolveOpenAiApiKey(config); | ||
| const baseURL = resolveOpenAiBaseUrl(config); | ||
| const timeoutMs = toFiniteNumber(config.timeoutMs ?? config.timeout_ms); | ||
@@ -242,7 +243,3 @@ const model = toOptionalString(config.model)?.trim() ?? "gpt-image-1.5"; | ||
| } | ||
| return dedupeNonEmptyStrings( | ||
| Array.isArray(payload.referenceImageDataUrls) && payload.referenceImageDataUrls.length > 0 | ||
| ? payload.referenceImageDataUrls | ||
| : [payload.referenceImageDataUrl], | ||
| ); | ||
| return dedupeNonEmptyStrings(payload.referenceImageDataUrls); | ||
| } | ||
@@ -249,0 +246,0 @@ |
@@ -132,8 +132,3 @@ import { ProviderError } from "../errors"; | ||
| } | ||
| const fallback = dedupeNonEmptyStrings( | ||
| Array.isArray(payload.referenceImageDataUrls) && payload.referenceImageDataUrls.length > 0 | ||
| ? payload.referenceImageDataUrls | ||
| : [payload.referenceImageDataUrl], | ||
| ); | ||
| return fallback; | ||
| return dedupeNonEmptyStrings(payload.referenceImageDataUrls); | ||
| } | ||
@@ -140,0 +135,0 @@ |
| import { ClawMateError } from "./errors"; | ||
| import { generateAliyunCloneTts } from "./tts/aliyun-clone"; | ||
| import { generateAliyunTts } from "./tts/aliyun"; | ||
@@ -31,3 +32,4 @@ import type { ClawMateConfig, GenerateTtsResult } from "./types"; | ||
| const apiKey = config.tts.apiKey.trim(); | ||
| const apiKey = | ||
| config.tts.provider === "aliyun-clone" ? config.tts.clone.apiKey.trim() : config.tts.official.apiKey.trim(); | ||
| if (!apiKey) { | ||
@@ -42,11 +44,6 @@ return { | ||
| try { | ||
| const result = await generateAliyunTts({ | ||
| text: trimmedText, | ||
| model: config.tts.model, | ||
| voice: config.tts.voice, | ||
| languageType: config.tts.languageType, | ||
| apiKey, | ||
| baseUrl: config.tts.baseUrl, | ||
| fetchImpl, | ||
| }); | ||
| const result = | ||
| config.tts.provider === "aliyun-clone" | ||
| ? await generateCloneTts(trimmedText, config, fetchImpl) | ||
| : await generateOfficialTts(trimmedText, config, fetchImpl); | ||
@@ -75,2 +72,32 @@ return { | ||
| } | ||
| async function generateOfficialTts(text: string, config: ClawMateConfig, fetchImpl?: typeof fetch) { | ||
| return generateAliyunTts({ | ||
| text, | ||
| model: config.tts.official.model, | ||
| voice: config.tts.official.voice, | ||
| languageType: config.tts.official.languageType, | ||
| apiKey: config.tts.official.apiKey, | ||
| baseUrl: config.tts.official.baseUrl, | ||
| fetchImpl, | ||
| }); | ||
| } | ||
| async function generateCloneTts(text: string, config: ClawMateConfig, fetchImpl?: typeof fetch) { | ||
| if (!config.tts.clone.modelId.trim()) { | ||
| throw new ClawMateError("请先完成复刻音色模型创建并配置 modelId", { | ||
| code: "TTS_CLONE_MODEL_ID_MISSING", | ||
| }); | ||
| } | ||
| return generateAliyunCloneTts({ | ||
| text, | ||
| apiKey: config.tts.clone.apiKey, | ||
| baseUrl: config.tts.clone.baseUrl, | ||
| model: config.tts.clone.synthesisModel, | ||
| modelId: config.tts.clone.modelId, | ||
| speaker: config.tts.clone.speaker, | ||
| fetchImpl, | ||
| }); | ||
| } | ||
| } |
@@ -52,4 +52,7 @@ export interface TimeStateDefinition { | ||
| export interface TtsConfig { | ||
| enabled: boolean; | ||
| export type TtsProviderType = "aliyun-official" | "aliyun-clone"; | ||
| export type TtsOutputFormat = "wav" | "ogg" | "opus"; | ||
| export interface OfficialTtsConfig { | ||
| model: string; | ||
@@ -60,3 +63,23 @@ voice: string; | ||
| baseUrl: string; | ||
| } | ||
| export interface CloneTtsConfig { | ||
| apiKey: string; | ||
| baseUrl: string; | ||
| targetModel: string; | ||
| modelId: string; | ||
| synthesisModel: string; | ||
| speaker: string; | ||
| promptAudioUrl: string; | ||
| promptText: string; | ||
| statusUrl: string; | ||
| } | ||
| export interface TtsConfig { | ||
| enabled: boolean; | ||
| provider: TtsProviderType; | ||
| outputFormat: TtsOutputFormat; | ||
| degradeMessage: string; | ||
| official: OfficialTtsConfig; | ||
| clone: CloneTtsConfig; | ||
| } | ||
@@ -93,3 +116,2 @@ | ||
| referenceImageBase64List: string[]; | ||
| referenceImageDataUrl: string; | ||
| referenceImageDataUrls: string[]; | ||
@@ -96,0 +118,0 @@ timeState: string; |
@@ -9,2 +9,4 @@ import path from "node:path"; | ||
| import { generateTts } from "./core/tts"; | ||
| import { transcodeAudioWithFfmpeg } from "./core/tts/ffmpeg"; | ||
| import { resolveGeneratedAudioDir, resolveGeneratedImageDir, resolveSoulMdPath } from "./core/openclaw-paths"; | ||
| import { loadCharacterAssets, listCharacters } from "./core/characters"; | ||
@@ -39,2 +41,4 @@ import { createCharacter } from "./core/character-creator"; | ||
| enabled?: boolean; | ||
| provider?: string; | ||
| outputFormat?: string; | ||
| model?: string; | ||
@@ -46,2 +50,20 @@ voice?: string; | ||
| degradeMessage?: string; | ||
| official?: { | ||
| model?: string; | ||
| voice?: string; | ||
| languageType?: string; | ||
| apiKey?: string; | ||
| baseUrl?: string; | ||
| }; | ||
| clone?: { | ||
| apiKey?: string; | ||
| baseUrl?: string; | ||
| targetModel?: string; | ||
| modelId?: string; | ||
| synthesisModel?: string; | ||
| speaker?: string; | ||
| promptAudioUrl?: string; | ||
| promptText?: string; | ||
| statusUrl?: string; | ||
| }; | ||
| }; | ||
@@ -107,2 +129,14 @@ } | ||
| interface DirectVoiceMessagePayload { | ||
| agentId?: string; | ||
| sessionId?: string; | ||
| channelId?: string; | ||
| agentAccountId?: string; | ||
| requesterSenderId?: string; | ||
| senderIsOwner?: boolean; | ||
| audioPath: string; | ||
| mimeType: string; | ||
| text?: string; | ||
| } | ||
| interface OpenClawPluginApiLike { | ||
@@ -121,2 +155,4 @@ resolvePath: (input: string) => string; | ||
| registerTool: (tool: OpenClawPluginToolLike | OpenClawPluginToolFactoryLike) => void; | ||
| sendVoiceMessage?: (payload: DirectVoiceMessagePayload) => Promise<unknown> | unknown; | ||
| sendAudioMessage?: (payload: DirectVoiceMessagePayload) => Promise<unknown> | unknown; | ||
| } | ||
@@ -207,2 +243,22 @@ | ||
| function detectAudioMimeFromPath(audioPath: string): string { | ||
| const ext = path.extname(audioPath).toLowerCase(); | ||
| if (ext === ".ogg" || ext === ".opus") { | ||
| return "audio/ogg"; | ||
| } | ||
| if (ext === ".mp3") { | ||
| return "audio/mpeg"; | ||
| } | ||
| if (ext === ".m4a") { | ||
| return "audio/mp4"; | ||
| } | ||
| if (ext === ".aac") { | ||
| return "audio/aac"; | ||
| } | ||
| if (ext === ".flac") { | ||
| return "audio/flac"; | ||
| } | ||
| return "audio/wav"; | ||
| } | ||
| function normalizeRawBase64(text: string): string { | ||
@@ -224,22 +280,2 @@ return text.replace(/\s+/g, "").replace(/^[("'\s]+|[)"'\s]+$/g, ""); | ||
| function resolveGeneratedImageDir(now = new Date()): string { | ||
| const openClawHome = process.env.OPENCLAW_HOME?.trim() || path.join(os.homedir(), ".openclaw"); | ||
| const day = now.toISOString().slice(0, 10); | ||
| return path.join(openClawHome, "media", "clawmate-generated", day); | ||
| } | ||
| function resolveGeneratedAudioDir(now = new Date()): string { | ||
| const openClawHome = process.env.OPENCLAW_HOME?.trim() || path.join(os.homedir(), ".openclaw"); | ||
| const day = now.toISOString().slice(0, 10); | ||
| return path.join(openClawHome, "media", "clawmate-voice", day); | ||
| } | ||
| function resolveSoulMdPath(workspaceDir?: string): string { | ||
| if (typeof workspaceDir === "string" && workspaceDir.trim()) { | ||
| return path.join(workspaceDir, "SOUL.md"); | ||
| } | ||
| const openClawHome = process.env.OPENCLAW_HOME?.trim() || path.join(os.homedir(), ".openclaw"); | ||
| return path.join(openClawHome, "workspace", "SOUL.md"); | ||
| } | ||
| function buildSoulPersonaSection(characterId: string, personaText: string): string { | ||
@@ -526,3 +562,7 @@ return [ | ||
| async function persistAudioToLocal(audioRef: string, requestId: string | null): Promise<string> { | ||
| async function persistAudioToLocal( | ||
| audioRef: string, | ||
| requestId: string | null, | ||
| outputFormat: "wav" | "ogg" | "opus" = "wav", | ||
| ): Promise<string> { | ||
| const trimmed = audioRef.trim(); | ||
@@ -533,21 +573,28 @@ if (!trimmed) { | ||
| const localPath = resolveExistingLocalPath(trimmed); | ||
| if (localPath) { | ||
| await fs.access(localPath); | ||
| return localPath; | ||
| let localPath: string | null = null; | ||
| const existingLocalPath = resolveExistingLocalPath(trimmed); | ||
| if (existingLocalPath) { | ||
| await fs.access(existingLocalPath); | ||
| localPath = existingLocalPath; | ||
| } else if (AUDIO_DATA_URL_PATTERN.test(trimmed)) { | ||
| localPath = await persistDataUrlAudio(trimmed, requestId); | ||
| } else if (HTTP_URL_PATTERN.test(trimmed)) { | ||
| localPath = await persistRemoteAudio(trimmed, requestId); | ||
| } else if (isLikelyRawBase64(trimmed)) { | ||
| localPath = await persistRawBase64Audio(trimmed, requestId); | ||
| } | ||
| if (AUDIO_DATA_URL_PATTERN.test(trimmed)) { | ||
| return persistDataUrlAudio(trimmed, requestId); | ||
| if (!localPath) { | ||
| throw new Error("unsupported audio reference format"); | ||
| } | ||
| if (HTTP_URL_PATTERN.test(trimmed)) { | ||
| return persistRemoteAudio(trimmed, requestId); | ||
| try { | ||
| const transcoded = await transcodeAudioWithFfmpeg({ | ||
| inputPath: localPath, | ||
| outputFormat, | ||
| }); | ||
| return transcoded.outputPath; | ||
| } catch { | ||
| return localPath; | ||
| } | ||
| if (isLikelyRawBase64(trimmed)) { | ||
| return persistRawBase64Audio(trimmed, requestId); | ||
| } | ||
| throw new Error("unsupported audio reference format"); | ||
| } | ||
@@ -725,8 +772,23 @@ | ||
| enabled: false, | ||
| model: "qwen3-tts-flash", | ||
| voice: "Chelsie", | ||
| languageType: "Chinese", | ||
| apiKey: "", | ||
| baseUrl: "https://dashscope.aliyuncs.com/api/v1", | ||
| provider: "aliyun-official", | ||
| outputFormat: "wav", | ||
| degradeMessage: "语音暂时发送失败,我先打字陪你。", | ||
| official: { | ||
| model: "qwen3-tts-flash", | ||
| voice: "Chelsie", | ||
| languageType: "Chinese", | ||
| apiKey: "", | ||
| baseUrl: "https://dashscope.aliyuncs.com/api/v1", | ||
| }, | ||
| clone: { | ||
| apiKey: "", | ||
| baseUrl: "https://dashscope.aliyuncs.com/api/v1", | ||
| targetModel: "cosyvoice-v1", | ||
| modelId: "", | ||
| synthesisModel: "cosyvoice-clone-v1", | ||
| speaker: "", | ||
| promptAudioUrl: "", | ||
| promptText: "", | ||
| statusUrl: "https://dashscope.aliyuncs.com/api/v1", | ||
| }, | ||
| }, | ||
@@ -797,9 +859,8 @@ }; | ||
| if (result.ok) { | ||
| const imageUrl = await persistImageToLocal(result.imageUrl, result.requestId); | ||
| const imagePath = await persistImageToLocal(result.imageUrl, result.requestId); | ||
| return JSON.stringify({ | ||
| ok: true, | ||
| imageUrl, | ||
| imageMarkdown: ``, | ||
| mediaLine: `MEDIA: ${imageUrl}`, | ||
| imagePath, | ||
| imageUrl: imagePath, | ||
| provider: result.provider, | ||
@@ -824,17 +885,43 @@ requestId: result.requestId, | ||
| async function formatTtsResult(result: GenerateTtsResult): Promise<string> { | ||
| type TtsDeliveryStatus = { | ||
| attempted: boolean; | ||
| delivered: boolean; | ||
| method: string | null; | ||
| reason?: string; | ||
| }; | ||
| type TtsToolPayload = | ||
| | { | ||
| ok: true; | ||
| audioPath: string; | ||
| requestId: string | null; | ||
| model: string; | ||
| voice: string; | ||
| delivery?: TtsDeliveryStatus; | ||
| nextAction?: string; | ||
| } | ||
| | { | ||
| ok: false; | ||
| message: string; | ||
| error: string; | ||
| requestId: string | null; | ||
| }; | ||
| async function formatTtsResult( | ||
| result: GenerateTtsResult, | ||
| outputFormat: "wav" | "ogg" | "opus" = "wav", | ||
| ): Promise<TtsToolPayload> { | ||
| if (result.ok) { | ||
| const audioPath = await persistAudioToLocal(result.audioUrl, result.requestId); | ||
| return JSON.stringify({ | ||
| const audioPath = await persistAudioToLocal(result.audioUrl, result.requestId, outputFormat); | ||
| return { | ||
| ok: true, | ||
| audioPath, | ||
| mediaLine: `MEDIA: ${audioPath}`, | ||
| requestId: result.requestId, | ||
| model: result.model, | ||
| voice: result.voice, | ||
| }); | ||
| }; | ||
| } | ||
| const failure = result as GenerateTtsFailure; | ||
| return JSON.stringify({ | ||
| return { | ||
| ok: false, | ||
@@ -844,5 +931,59 @@ message: failure.message, | ||
| requestId: failure.requestId ?? null, | ||
| }); | ||
| }; | ||
| } | ||
| async function tryAutoSendVoiceMessage( | ||
| api: OpenClawPluginApiLike, | ||
| scope: OpenClawToolContextLike, | ||
| audioPath: string, | ||
| text: string, | ||
| ): Promise<TtsDeliveryStatus> { | ||
| const payload: DirectVoiceMessagePayload = { | ||
| agentId: scope.agentId, | ||
| sessionId: scope.sessionId, | ||
| channelId: scope.messageChannel, | ||
| agentAccountId: scope.agentAccountId, | ||
| requesterSenderId: scope.requesterSenderId, | ||
| senderIsOwner: scope.senderIsOwner, | ||
| audioPath, | ||
| mimeType: detectAudioMimeFromPath(audioPath), | ||
| text, | ||
| }; | ||
| if (typeof api.sendVoiceMessage === "function") { | ||
| try { | ||
| await api.sendVoiceMessage(payload); | ||
| return { attempted: true, delivered: true, method: "sendVoiceMessage" }; | ||
| } catch (error) { | ||
| return { | ||
| attempted: true, | ||
| delivered: false, | ||
| method: "sendVoiceMessage", | ||
| reason: error instanceof Error ? error.message : String(error), | ||
| }; | ||
| } | ||
| } | ||
| if (typeof api.sendAudioMessage === "function") { | ||
| try { | ||
| await api.sendAudioMessage(payload); | ||
| return { attempted: true, delivered: true, method: "sendAudioMessage" }; | ||
| } catch (error) { | ||
| return { | ||
| attempted: true, | ||
| delivered: false, | ||
| method: "sendAudioMessage", | ||
| reason: error instanceof Error ? error.message : String(error), | ||
| }; | ||
| } | ||
| } | ||
| return { | ||
| attempted: false, | ||
| delivered: false, | ||
| method: null, | ||
| reason: "RUNTIME_CAPABILITY_MISSING", | ||
| }; | ||
| } | ||
| export default function registerClawMateCompanion(api: OpenClawPluginApiLike): void { | ||
@@ -977,3 +1118,3 @@ const pluginRoot = resolvePluginRoot(api); | ||
| name: "clawmate_generate_selfie", | ||
| description: "接收模型生成的完整英文提示词,调用图像生成服务生成 ClawMate 自拍图并返回结构化结果", | ||
| description: "接收模型生成的完整英文提示词,调用图像生成服务生成 ClawMate 自拍图并返回包含本地图片路径的结构化结果", | ||
| parameters: { | ||
@@ -1064,3 +1205,3 @@ type: "object", | ||
| name: "clawmate_generate_tts", | ||
| description: "接收适合口播的短文本,调用阿里云千问 TTS 生成语音并返回本地媒体路径", | ||
| description: "接收适合口播的短文本,调用阿里云千问 TTS 生成语音并返回本地音频路径", | ||
| parameters: { | ||
@@ -1090,5 +1231,11 @@ type: "object", | ||
| let text: string; | ||
| let payload: TtsToolPayload; | ||
| try { | ||
| text = await formatTtsResult(result); | ||
| payload = await formatTtsResult(result, config.tts.outputFormat); | ||
| if (payload.ok) { | ||
| payload.delivery = await tryAutoSendVoiceMessage(api, toolScope, payload.audioPath, params.text ?? ""); | ||
| if (!payload.delivery.delivered) { | ||
| payload.nextAction = "use_audio_path_to_send_voice"; | ||
| } | ||
| } | ||
| } catch (error) { | ||
@@ -1104,3 +1251,3 @@ const remoteAudioUrl = | ||
| }); | ||
| text = JSON.stringify({ | ||
| payload = { | ||
| ok: false, | ||
@@ -1110,3 +1257,3 @@ message: config.tts.degradeMessage, | ||
| requestId: result.ok ? result.requestId ?? null : null, | ||
| }); | ||
| }; | ||
| } | ||
@@ -1118,3 +1265,3 @@ | ||
| type: "text", | ||
| text, | ||
| text: JSON.stringify(payload), | ||
| }, | ||
@@ -1121,0 +1268,0 @@ ], |
+73
-3
@@ -68,3 +68,12 @@ # ClawMate | ||
| <img width="1583" height="877" alt="image" src="https://github.com/user-attachments/assets/48d58c79-8de3-45c9-920c-1ba348678855" /> | ||
| <img width="1594" height="880" alt="image" src="https://github.com/user-attachments/assets/fd103615-f9ba-4197-ac73-c3bd2a0c63fd" /> | ||
| **社群优秀样例哈哈,真的很不错** | ||
| >主动发图、主动关环,给你的 OpenClaw 注入一个有温度的灵魂 | ||
| <img width="1570" height="790" alt="d0862aec4a9e3c973a16795a153767af" src="https://github.com/user-attachments/assets/f3510187-05fa-4c14-afe4-f06a0b00d022" /> | ||
| --- | ||
@@ -86,2 +95,63 @@ | ||
| ### 复刻音色安装说明(阿里云 CosyVoice Clone) | ||
| 如果你在安装向导中选择 [`复刻音色`](bin/clawmate.cjs:264),建议先理解下面几个字段。 | ||
| - [`复刻目标模型`](bin/clawmate.cjs:270) | ||
| - 推荐默认值:`cosyvoice-v3.5-plus` | ||
| - 常见可选值:`cosyvoice-v3.5-plus`、`cosyvoice-v2` | ||
| - 这是“拿什么底座去做声音复刻”,通常直接使用推荐值即可。 | ||
| - [`已存在的复刻模型 ID`](bin/clawmate.cjs:272) | ||
| - 如果你已经在阿里云控制台创建过复刻音色,就把已有 `modelId` 填进来。 | ||
| - 如果留空,表示你准备后续通过脚本或平台流程生成。 | ||
| - [`合成模型名称`](bin/clawmate.cjs:274) | ||
| - 这是后续“真正发起语音合成”时要调用的模型名,不是角色名。 | ||
| - 一般保持默认 `cosyvoice-clone-v1` 即可。 | ||
| - [`说话人名称`](bin/clawmate.cjs:276) | ||
| - 这是你给这份复刻声音起的内部标识,方便区分多个音色。 | ||
| - 例如可以填角色名、昵称或项目代号,如 `mghus`。 | ||
| - [`示例音频 URL`](bin/clawmate.cjs:278) | ||
| - 这里要填“参考音频的公网直链 URL”。 | ||
| - 最稳妥的做法是把音频上传到阿里云 OSS,并获取永久公开链接。 | ||
| - 你也可以先去阿里云百炼体验中心测试复刻流程: | ||
| - `https://bailian.console.aliyun.com/cn-beijing?spm=5176.29619931.J_PvCec88exbQTi-U433Fxg.5.9f2110d7c0UDIv&tab=model#/efm/model_experience_center/voice?currentTab=voiceTts&primary=cloning&secondary=clone` | ||
| - [`示例音频对应文本`](bin/clawmate.cjs:280) | ||
| - 这里填参考音频里“实际说了什么”。 | ||
| - 必须尽量与音频逐字一致,否则复刻质量会明显变差。 | ||
| - [`任务查询 Base URL`](bin/clawmate.cjs:282) | ||
| - 一般直接留空即可。 | ||
| - 只有你使用了代理网关、自建中转层或者特殊 Base URL 时才需要改。 | ||
| ### 如何获取示例音频的公网 URL | ||
| 推荐用阿里云 OSS: | ||
| 1. 打开 OSS 控制台: | ||
| - `https://oss.console.aliyun.com/overview` | ||
| 2. 创建 Bucket | ||
| 3. 上传你的参考音频文件 | ||
| 4. 把对象设置为“公共读”或使用带长期有效期的分享方式 | ||
| 5. 复制该音频对象的公网访问 URL | ||
| 6. 把这个 URL 填到 [`示例音频 URL`](bin/clawmate.cjs:278) | ||
| 建议参考音频满足: | ||
| - 时长适中,尽量 10~30 秒 | ||
| - 人声清晰、背景噪音少 | ||
| - 只包含一个说话人 | ||
| - 文本内容完整可转写 | ||
| ### 推荐测试顺序 | ||
| 1. 先在阿里云百炼体验中心试一遍复刻音色流程 | ||
| 2. 准备好 OSS 公网音频链接 | ||
| 3. 再运行 [`npm run clawmate:setup`](package.json:34) | ||
| 4. 在安装器里选择 [`复刻音色`](bin/clawmate.cjs:264) | ||
| 5. 如果需要离线验证,可使用探测脚本 [`npm run clawmate:probe:tts:clone`](package.json:34) | ||
| 安装完成后,对你的 Agent 说: | ||
@@ -316,2 +386,3 @@ | ||
| ClawMate/ | ||
| ├── bin/clawmate.cjs # 安装向导 | ||
| └── packages/clawmate-companion/ | ||
@@ -324,3 +395,2 @@ ├── src/core/ # 核心逻辑(pipeline、router、providers) | ||
| │ └── brooke/ | ||
| └── bin/cli.cjs # 安装向导 | ||
| ``` | ||
@@ -334,3 +404,3 @@ | ||
| `Agent 判断 -> Tool 合成 -> 插件落盘 -> 返回 MEDIA` | ||
| `Agent 判断 -> Tool 合成 -> 插件落盘 -> 返回本地路径` | ||
@@ -341,3 +411,3 @@ 1. `before_agent_start` 会注入角色 `SOUL.md`;启用 TTS 时,还会追加一条轻量提示,让 Agent 在合适的时候使用 `clawmate-companion-tts` skill。 | ||
| 4. 插件收到远端音频地址后,会将音频下载到本地 `~/.openclaw/media/clawmate-voice/YYYY-MM-DD/`。 | ||
| 5. Tool 最终返回 `MEDIA: <audioPath>`;成功时只发送语音,不重复发送同内容文字,失败时则降级为普通文字回复。 | ||
| 5. Tool 最终返回本地 `audioPath`;具体如何把这段音频发到不同渠道,由上层运行时决定。成功时不应重复发送同内容文字,失败时则降级为普通文字回复。 | ||
@@ -344,0 +414,0 @@ |
Sorry, the diff of this file is too big to display
| import test from "node:test"; | ||
| import assert from "node:assert/strict"; | ||
| import { createGeminiProvider } from "./gemini"; | ||
| import type { GenerateRequest } from "../types"; | ||
| const PNG_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVQIHWP4//8/AwAI/AL+X2NDNwAAAABJRU5ErkJggg=="; | ||
| const PNG_DATA_URL = `data:image/png;base64,${PNG_BASE64}`; | ||
| function makePayload(referenceImages: string[] = [PNG_DATA_URL], prompt = "draw a portrait"): GenerateRequest { | ||
| return { | ||
| characterId: "brooke", | ||
| prompt, | ||
| mode: "mirror", | ||
| referencePath: referenceImages.length > 0 ? "C:\\reference.png" : "", | ||
| referencePaths: referenceImages.length > 0 ? ["C:\\reference.png"] : [], | ||
| referenceImageBase64: referenceImages.length > 0 ? PNG_BASE64 : "", | ||
| referenceImageBase64List: referenceImages.length > 0 ? [PNG_BASE64] : [], | ||
| referenceImageDataUrl: referenceImages[0] ?? "", | ||
| referenceImageDataUrls: referenceImages, | ||
| timeState: "night", | ||
| meta: { | ||
| state: "night", | ||
| roleName: "Brooke", | ||
| eventSource: "test", | ||
| }, | ||
| }; | ||
| } | ||
| test("gemini uses the SDK default endpoint when baseUrl is not configured", async () => { | ||
| let capturedConfig: Record<string, unknown> | null = null; | ||
| let capturedRequest: Record<string, unknown> | null = null; | ||
| const provider = createGeminiProvider( | ||
| { | ||
| name: "gemini", | ||
| apiKey: "test-key", | ||
| model: "gemini-3.1-flash-image-preview", | ||
| }, | ||
| (config) => { | ||
| capturedConfig = config as unknown as Record<string, unknown>; | ||
| return { | ||
| models: { | ||
| generateContent: async (request) => { | ||
| capturedRequest = request as unknown as Record<string, unknown>; | ||
| return { | ||
| responseId: "gemini-default-req", | ||
| candidates: [ | ||
| { | ||
| content: { | ||
| parts: [ | ||
| { | ||
| inlineData: { | ||
| mimeType: "image/png", | ||
| data: PNG_BASE64, | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| }, | ||
| ], | ||
| }; | ||
| }, | ||
| }, | ||
| }; | ||
| }, | ||
| ); | ||
| const result = await provider.generate(makePayload([])); | ||
| assert.equal(capturedConfig?.baseUrl, null); | ||
| assert.equal(capturedConfig?.model, "gemini-3.1-flash-image-preview"); | ||
| assert.deepEqual(capturedRequest?.config, { | ||
| responseModalities: ["IMAGE"], | ||
| }); | ||
| assert.deepEqual(capturedRequest?.contents, [ | ||
| { | ||
| role: "user", | ||
| parts: [{ text: "draw a portrait" }], | ||
| }, | ||
| ]); | ||
| assert.equal(result.requestId, "gemini-default-req"); | ||
| assert.equal(result.imageUrl, PNG_DATA_URL); | ||
| }); | ||
| test("gemini forwards a configured custom BaseURL", async () => { | ||
| let capturedConfig: Record<string, unknown> | null = null; | ||
| const provider = createGeminiProvider( | ||
| { | ||
| name: "gemini", | ||
| apiKey: "test-key", | ||
| model: "custom-gemini-image-model", | ||
| baseUrl: "https://proxy.example.com/", | ||
| }, | ||
| (config) => { | ||
| capturedConfig = config as unknown as Record<string, unknown>; | ||
| return { | ||
| models: { | ||
| generateContent: async () => ({ | ||
| responseId: "gemini-custom-req", | ||
| candidates: [ | ||
| { | ||
| content: { | ||
| parts: [ | ||
| { | ||
| inlineData: { | ||
| mimeType: "image/png", | ||
| data: PNG_BASE64, | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| }, | ||
| ], | ||
| }), | ||
| }, | ||
| }; | ||
| }, | ||
| ); | ||
| const result = await provider.generate(makePayload([])); | ||
| assert.equal(capturedConfig?.baseUrl, "https://proxy.example.com"); | ||
| assert.equal(capturedConfig?.model, "custom-gemini-image-model"); | ||
| assert.equal(result.requestId, "gemini-custom-req"); | ||
| assert.equal(result.imageUrl, PNG_DATA_URL); | ||
| }); | ||
| test("gemini includes reference images as inline image parts", async () => { | ||
| let capturedRequest: Record<string, unknown> | null = null; | ||
| const provider = createGeminiProvider( | ||
| { | ||
| name: "gemini", | ||
| apiKey: "test-key", | ||
| model: "gemini-2.5-flash-image", | ||
| }, | ||
| () => ({ | ||
| models: { | ||
| generateContent: async (request) => { | ||
| capturedRequest = request as unknown as Record<string, unknown>; | ||
| return { | ||
| candidates: [ | ||
| { | ||
| content: { | ||
| parts: [ | ||
| { | ||
| inlineData: { | ||
| mimeType: "image/png", | ||
| data: PNG_BASE64, | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| }, | ||
| ], | ||
| }; | ||
| }, | ||
| }, | ||
| }), | ||
| ); | ||
| await provider.generate(makePayload([PNG_DATA_URL, PNG_DATA_URL])); | ||
| assert.deepEqual(capturedRequest?.contents, [ | ||
| { | ||
| role: "user", | ||
| parts: [ | ||
| { | ||
| inlineData: { | ||
| mimeType: "image/png", | ||
| data: PNG_BASE64, | ||
| }, | ||
| }, | ||
| { | ||
| text: "draw a portrait", | ||
| }, | ||
| ], | ||
| }, | ||
| ]); | ||
| }); | ||
| test("gemini keeps custom model strings unchanged", async () => { | ||
| let capturedRequest: Record<string, unknown> | null = null; | ||
| const provider = createGeminiProvider( | ||
| { | ||
| name: "gemini", | ||
| apiKey: "test-key", | ||
| model: "my-company/gemini-image-proxy", | ||
| }, | ||
| () => ({ | ||
| models: { | ||
| generateContent: async (request) => { | ||
| capturedRequest = request as unknown as Record<string, unknown>; | ||
| return { | ||
| candidates: [ | ||
| { | ||
| content: { | ||
| parts: [ | ||
| { | ||
| inlineData: { | ||
| mimeType: "image/png", | ||
| data: PNG_BASE64, | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| }, | ||
| ], | ||
| }; | ||
| }, | ||
| }, | ||
| }), | ||
| ); | ||
| await provider.generate(makePayload([])); | ||
| assert.equal(capturedRequest?.model, "my-company/gemini-image-proxy"); | ||
| }); | ||
| test("gemini fails when the response contains no image payload", async () => { | ||
| const provider = createGeminiProvider( | ||
| { | ||
| name: "gemini", | ||
| apiKey: "test-key", | ||
| model: "gemini-3-pro-image-preview", | ||
| }, | ||
| () => ({ | ||
| models: { | ||
| generateContent: async () => ({ | ||
| responseId: "gemini-no-image", | ||
| candidates: [ | ||
| { | ||
| content: { | ||
| parts: [ | ||
| { | ||
| text: "safety filtered", | ||
| }, | ||
| ], | ||
| }, | ||
| }, | ||
| ], | ||
| }), | ||
| }, | ||
| }), | ||
| ); | ||
| await assert.rejects( | ||
| () => provider.generate(makePayload([])), | ||
| (error: unknown) => { | ||
| assert.equal(typeof error, "object"); | ||
| const resolved = error as { code?: string; requestId?: string | null; details?: Record<string, unknown> }; | ||
| assert.equal(resolved.code, "PROVIDER_IMAGE_URL_MISSING"); | ||
| assert.equal(resolved.requestId, "gemini-no-image"); | ||
| assert.deepEqual(resolved.details, { | ||
| textPreview: ["safety filtered"], | ||
| }); | ||
| return true; | ||
| }, | ||
| ); | ||
| }); |
| import test from "node:test"; | ||
| import assert from "node:assert/strict"; | ||
| import { createOpenAICompatibleProvider } from "./openai-compatible"; | ||
| import type { GenerateRequest } from "../types"; | ||
| const PNG_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVQIHWP4//8/AwAI/AL+X2NDNwAAAABJRU5ErkJggg=="; | ||
| const PNG_DATA_URL = `data:image/png;base64,${PNG_BASE64}`; | ||
| function makePayload(referenceImages: string[] = [PNG_DATA_URL]): GenerateRequest { | ||
| return { | ||
| characterId: "brooke", | ||
| prompt: "draw a portrait", | ||
| mode: "mirror", | ||
| referencePath: referenceImages.length > 0 ? "C:\\reference.png" : "", | ||
| referencePaths: referenceImages.length > 0 ? ["C:\\reference.png"] : [], | ||
| referenceImageBase64: referenceImages.length > 0 ? PNG_BASE64 : "", | ||
| referenceImageBase64List: referenceImages.length > 0 ? [PNG_BASE64] : [], | ||
| referenceImageDataUrl: referenceImages[0] ?? "", | ||
| referenceImageDataUrls: referenceImages, | ||
| timeState: "night", | ||
| meta: { | ||
| state: "night", | ||
| roleName: "Brooke", | ||
| eventSource: "test", | ||
| }, | ||
| }; | ||
| } | ||
| function jsonResponse(body: unknown, requestId: string, status = 200): Response { | ||
| return new Response(JSON.stringify(body), { | ||
| status, | ||
| headers: { | ||
| "content-type": "application/json", | ||
| "x-request-id": requestId, | ||
| }, | ||
| }); | ||
| } | ||
| function toUrl(input: string | URL | Request): string { | ||
| if (typeof input === "string") { | ||
| return input; | ||
| } | ||
| if (input instanceof URL) { | ||
| return input.href; | ||
| } | ||
| return input.url; | ||
| } | ||
| test("openai-compatible prefers /images/edits when a reference image is available", async () => { | ||
| const calls: string[] = []; | ||
| const provider = createOpenAICompatibleProvider( | ||
| { | ||
| name: "openai", | ||
| apiKey: "sk-test", | ||
| baseUrl: "https://api.openai.com/v1", | ||
| model: "gpt-image-1.5", | ||
| }, | ||
| async (input) => { | ||
| const url = toUrl(input); | ||
| if (url === "data:,") { | ||
| return globalThis.fetch(url); | ||
| } | ||
| calls.push(url); | ||
| if (url.endsWith("/images/edits")) { | ||
| return jsonResponse({ data: [{ url: "https://example.com/edited.png" }] }, "edit-req"); | ||
| } | ||
| throw new Error(`unexpected url: ${url}`); | ||
| }, | ||
| ); | ||
| const result = await provider.generate(makePayload()); | ||
| assert.equal(result.requestId, "edit-req"); | ||
| assert.equal(result.imageUrl, "https://example.com/edited.png"); | ||
| assert.deepEqual(calls, ["https://api.openai.com/v1/images/edits"]); | ||
| }); | ||
| test("openai-compatible falls back to /chat/completions when /images/edits fails", async () => { | ||
| const calls: string[] = []; | ||
| const provider = createOpenAICompatibleProvider( | ||
| { | ||
| name: "openai", | ||
| apiKey: "sk-test", | ||
| baseUrl: "https://api.openai.com/v1", | ||
| model: "gpt-image-1.5", | ||
| }, | ||
| async (input) => { | ||
| const url = toUrl(input); | ||
| if (url === "data:,") { | ||
| return globalThis.fetch(url); | ||
| } | ||
| calls.push(url); | ||
| if (url.endsWith("/images/edits")) { | ||
| return jsonResponse({ error: { message: "unsupported" } }, "edit-fail", 400); | ||
| } | ||
| if (url.endsWith("/chat/completions")) { | ||
| return jsonResponse( | ||
| { | ||
| choices: [ | ||
| { | ||
| message: { | ||
| content: "https://example.com/fallback.png", | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| "chat-req", | ||
| ); | ||
| } | ||
| throw new Error(`unexpected url: ${url}`); | ||
| }, | ||
| ); | ||
| const result = await provider.generate(makePayload()); | ||
| assert.equal(result.requestId, "chat-req"); | ||
| assert.equal(result.imageUrl, "https://example.com/fallback.png"); | ||
| assert.deepEqual(calls, ["https://api.openai.com/v1/images/edits", "https://api.openai.com/v1/chat/completions"]); | ||
| }); | ||
| test("openai-compatible skips /images/edits when no reference image is available", async () => { | ||
| const calls: string[] = []; | ||
| const provider = createOpenAICompatibleProvider( | ||
| { | ||
| name: "openai", | ||
| apiKey: "sk-test", | ||
| baseUrl: "https://api.openai.com/v1", | ||
| model: "gpt-image-1.5", | ||
| }, | ||
| async (input) => { | ||
| const url = toUrl(input); | ||
| if (url === "data:,") { | ||
| return globalThis.fetch(url); | ||
| } | ||
| calls.push(url); | ||
| if (url.endsWith("/chat/completions")) { | ||
| return jsonResponse( | ||
| { | ||
| choices: [ | ||
| { | ||
| message: { | ||
| content: "https://example.com/text-only.png", | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| "chat-only-req", | ||
| ); | ||
| } | ||
| throw new Error(`unexpected url: ${url}`); | ||
| }, | ||
| ); | ||
| const result = await provider.generate(makePayload([])); | ||
| assert.equal(result.requestId, "chat-only-req"); | ||
| assert.equal(result.imageUrl, "https://example.com/text-only.png"); | ||
| assert.deepEqual(calls, ["https://api.openai.com/v1/chat/completions"]); | ||
| }); |
Sorry, the diff of this file is too big to display
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
17371249
0.21%51
4.08%8894
10.02%415
20.29%13
-13.33%40
17.65%