🚀 Socket Launch Week Day 5:Introducing Repository Access Permissions and Custom Roles.Learn more
Sign In

@marswave/coli

Package Overview
Dependencies
Maintainers
4
Versions
18
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@marswave/coli - npm Package Compare versions

Comparing version
0.0.13
to
0.0.14
+2
distribution/source/_api/constants.d.ts
import type { SpeakerLanguage } from './types.js';
export declare const defaultSpeaker: Record<SpeakerLanguage, string>;
export const defaultSpeaker = {
en: 'chat-girl-105-cn',
zh: 'leo-9328b6d2',
ja: 'tianzhongdunzi-5d612542',
};
import { type KyInstance } from 'ky';
import type { ApiResponse, SpeakerLanguage } from './types.js';
export * from './constants.js';
export type * from './types.js';
export type ListenHubApiOptions = {
apiKey: string;
baseUrl?: string;
};
export declare class ListenHubApi {
api: KyInstance;
constructor({ apiKey, baseUrl }: ListenHubApiOptions);
/**
* Get a list of available speakers.
* @param options - The options for the speakers request.
* @param options.language - Optional. The language of the speakers to get, defaults to English.
* @returns A list of available speakers.
* @see {@link https://listenhub.ai/docs/en/openapi/api-reference/speakers#list-available-speakers|List Available Speakers}
*/
getAvailableSpeakers(options?: {
language?: SpeakerLanguage;
}): Promise<ApiResponse<{
items: Array<{
name: string;
speakerId: string;
demoAudioUrl: string;
gender: string;
language: SpeakerLanguage;
}>;
}>>;
/**
* Generate audio from text using the Streaming TTS API.
* @param options - The options for the TTS request.
* @param options.input - The text to generate audio from.
* @param options.voice - The `speakerId` to use for the TTS.
* @param options.model - Optional. The model to use for the TTS, defaults to `flowtts`.
* @returns A readable stream of the MP3 audio.
* @see {@link https://listenhub.ai/docs/en/openapi/api-reference/flowspeech|Streaming TTS}
*/
tts(options: {
input: string;
voice: string;
model?: string;
}): Promise<ReadableStream<Uint8Array<ArrayBuffer>>>;
}
import ky from 'ky';
export * from './constants.js';
export class ListenHubApi {
api;
constructor({ apiKey, baseUrl }) {
this.api = ky.extend({
prefixUrl: baseUrl ?? 'https://api.marswave.ai/openapi',
headers: {
// eslint-disable-next-line @typescript-eslint/naming-convention
Authorization: `Bearer ${apiKey}`,
},
});
}
/**
* Get a list of available speakers.
* @param options - The options for the speakers request.
* @param options.language - Optional. The language of the speakers to get, defaults to English.
* @returns A list of available speakers.
* @see {@link https://listenhub.ai/docs/en/openapi/api-reference/speakers#list-available-speakers|List Available Speakers}
*/
async getAvailableSpeakers(options) {
return this.api.get('v1/speakers/list', { searchParams: options }).json();
}
/**
* Generate audio from text using the Streaming TTS API.
* @param options - The options for the TTS request.
* @param options.input - The text to generate audio from.
* @param options.voice - The `speakerId` to use for the TTS.
* @param options.model - Optional. The model to use for the TTS, defaults to `flowtts`.
* @returns A readable stream of the MP3 audio.
* @see {@link https://listenhub.ai/docs/en/openapi/api-reference/flowspeech|Streaming TTS}
*/
async tts(options) {
// eslint-disable-next-line @typescript-eslint/await-thenable
const response = await this.api.post('v1/tts', { json: options });
if (!response.body)
throw new Error('Empty response body from TTS API');
return response.body;
}
}
export type ApiResponse<T> = {
code: number;
message: string;
data: T;
};
export type SpeakerLanguage = 'en' | 'zh' | 'ja';
import type { Command } from 'commander';
export declare function register(program: Command): void;
import { Buffer } from 'node:buffer';
import process from 'node:process';
import { runAsr } from './asr.js';
import { ensureModels, ensureVadModel } from './models.js';
import { streamAsr } from './stream-asr.js';
export function register(program) {
program
.command('asr')
.description('Transcribe an audio file using speech recognition')
.argument('<file>', 'Audio file to transcribe')
.option('-j, --json', 'Output result in JSON format', false)
.option('--model <name>', 'Model to use: whisper, sensevoice', 'sensevoice')
.action(async (file, options) => {
const { model } = options;
if (model !== 'whisper' && model !== 'sensevoice') {
throw new Error(`Unknown model "${model}". Use "whisper" or "sensevoice".`);
}
await ensureModels([model]);
await runAsr(file, { json: options.json, model });
});
program
.command('asr-stream')
.description('Stream speech recognition from stdin (expects 16kHz mono s16le PCM)')
.option('-j, --json', 'Output each result as a JSON line', false)
.option('--vad', 'Enable voice activity detection', false)
.option('--asr-interval-ms <ms>', 'Recognition interval in ms (ignored with --vad)', '1000')
.action(async (options) => {
await ensureModels();
if (options.vad) {
await ensureVadModel();
}
async function* stdinAudio() {
// eslint-disable-next-line @typescript-eslint/await-thenable
for await (const chunk of process.stdin) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const buf = Buffer.from(chunk);
const pcm = new Int16Array(buf.buffer, buf.byteOffset, buf.byteLength / 2);
const float32 = new Float32Array(pcm.length);
for (const [i, sample] of pcm.entries()) {
float32[i] = sample / 32_768;
}
yield float32;
}
}
await streamAsr(stdinAudio(), {
vad: options.vad || undefined,
asrIntervalMs: Number(options.asrIntervalMs),
onResult(result) {
if (options.json) {
console.log(JSON.stringify(result));
}
else {
console.log(result.text);
}
},
});
});
}
export { ensureModels, ensureVadModel, getModelPath, getVadModelPath, modelDisplayNames, } from './models.js';
export { streamAsr, type AsrStreamResult, type StreamAsrOptions, type VadOptions, } from './stream-asr.js';
export { ensureModels, ensureVadModel, getModelPath, getVadModelPath, modelDisplayNames, } from './models.js';
export { streamAsr, } from './stream-asr.js';
type ModelName = 'whisper' | 'sensevoice';
export type AsrOptions = {
json: boolean;
model: ModelName;
};
export declare function runAsr(filePath: string, options: AsrOptions): Promise<void>;
export {};
import fs from 'node:fs';
import { createRequire } from 'node:module';
import os from 'node:os';
import path from 'node:path';
import { execa } from 'execa';
import { getModelPath, modelDisplayNames } from './models.js';
const require = createRequire(import.meta.url);
// Loaded lazily to avoid loading the native addon until needed
let _sherpaOnnx;
function sherpaOnnx() {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
_sherpaOnnx ??= require('sherpa-onnx-node');
return _sherpaOnnx;
}
async function convertToWav(inputPath) {
const outputPath = path.join(os.tmpdir(), `coli-${Date.now()}.wav`);
try {
// eslint-disable-next-line @typescript-eslint/await-thenable
await execa('ffmpeg', [
'-i',
inputPath,
'-ar',
'16000',
'-ac',
'1',
'-f',
'wav',
'-acodec',
'pcm_s16le',
outputPath,
'-y',
]);
}
catch {
throw new Error('Failed to convert audio file. Please make sure ffmpeg is installed.\n' +
' brew install ffmpeg # macOS\n' +
' sudo apt install ffmpeg # Debian/Ubuntu');
}
return outputPath;
}
function createRecognizer(model) {
const modelDir = getModelPath(model);
const onnx = sherpaOnnx();
if (model === 'whisper') {
return new onnx.OfflineRecognizer({
featConfig: { sampleRate: 16_000, featureDim: 80 },
modelConfig: {
whisper: {
encoder: path.join(modelDir, 'tiny.en-encoder.int8.onnx'),
decoder: path.join(modelDir, 'tiny.en-decoder.int8.onnx'),
},
tokens: path.join(modelDir, 'tiny.en-tokens.txt'),
numThreads: 2,
provider: 'cpu',
debug: 0,
},
});
}
return new onnx.OfflineRecognizer({
featConfig: { sampleRate: 16_000, featureDim: 80 },
modelConfig: {
senseVoice: {
model: path.join(modelDir, 'model.int8.onnx'),
useInverseTextNormalization: 1,
},
tokens: path.join(modelDir, 'tokens.txt'),
numThreads: 2,
provider: 'cpu',
debug: 0,
},
});
}
export async function runAsr(filePath, options) {
const resolvedPath = path.resolve(filePath);
if (!fs.existsSync(resolvedPath)) {
throw new Error(`File not found: ${resolvedPath}`);
}
const ext = path.extname(resolvedPath).toLowerCase();
let wavPath;
let needsCleanup = false;
if (ext === '.wav') {
wavPath = resolvedPath;
}
else {
wavPath = await convertToWav(resolvedPath);
needsCleanup = true;
}
try {
const onnx = sherpaOnnx();
const recognizer = createRecognizer(options.model);
const stream = recognizer.createStream();
const wave = onnx.readWave(wavPath);
stream.acceptWaveform({ sampleRate: wave.sampleRate, samples: wave.samples });
recognizer.decode(stream);
const result = recognizer.getResult(stream);
if (options.json) {
console.log(JSON.stringify({
text: result.text.trim(),
model: modelDisplayNames[options.model],
lang: result.lang || undefined,
emotion: result.emotion || undefined,
event: result.event || undefined,
tokens: result.tokens,
timestamps: result.timestamps,
duration: wave.samples.length / wave.sampleRate,
}, null, 2));
}
else {
console.log(result.text.trim());
}
}
finally {
if (needsCleanup && fs.existsSync(wavPath)) {
fs.unlinkSync(wavPath);
}
}
}
type ModelName = 'whisper' | 'sensevoice';
export declare const modelDisplayNames: Record<ModelName, string>;
export declare function getModelPath(model: ModelName): string;
export declare function ensureModels(modelNames?: ModelName[]): Promise<void>;
export declare function getVadModelPath(): string;
export declare function ensureVadModel(): Promise<void>;
export {};
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import process from 'node:process';
import { execa } from 'execa';
const modelsDirectory = path.join(os.homedir(), '.coli', 'models');
const models = {
whisper: {
dirName: 'sherpa-onnx-whisper-tiny.en',
url: 'https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2',
checkFile: 'tiny.en-encoder.int8.onnx',
},
sensevoice: {
dirName: 'sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17',
url: 'https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2',
checkFile: 'model.int8.onnx',
},
};
export const modelDisplayNames = {
whisper: 'whisper-tiny.en',
sensevoice: 'sensevoice-small',
};
export function getModelPath(model) {
return path.join(modelsDirectory, models[model].dirName);
}
function isModelInstalled(entry) {
const modelDir = path.join(modelsDirectory, entry.dirName);
return fs.existsSync(path.join(modelDir, entry.checkFile));
}
async function downloadModel(entry) {
const { dirName, url } = entry;
console.log(`Downloading ${dirName}...`);
fs.mkdirSync(modelsDirectory, { recursive: true });
const tarPath = path.join(modelsDirectory, `${dirName}.tar.bz2`);
const response = await fetch(url, { redirect: 'follow' });
if (!response.ok || !response.body) {
throw new Error(`Failed to download model: ${response.statusText}`);
}
const contentLength = Number(response.headers.get('content-length') ?? 0);
const reader = response.body.getReader();
const fileHandle = fs.openSync(tarPath, 'w');
let downloaded = 0;
try {
for (;;) {
const { done, value } = await reader.read(); // eslint-disable-line no-await-in-loop
if (done) {
break;
}
fs.writeSync(fileHandle, value);
downloaded += value.length;
if (contentLength > 0) {
const percent = ((downloaded / contentLength) * 100).toFixed(1);
const mb = (downloaded / (1024 * 1024)).toFixed(1);
const totalMb = (contentLength / (1024 * 1024)).toFixed(1);
process.stdout.write(`\r ${mb} MB / ${totalMb} MB (${percent}%)`);
}
}
}
finally {
fs.closeSync(fileHandle);
}
process.stdout.write('\n');
console.log(' Extracting...');
// eslint-disable-next-line @typescript-eslint/await-thenable
await execa('tar', ['xjf', tarPath, '-C', modelsDirectory]);
fs.unlinkSync(tarPath);
console.log(` ${dirName} ready.\n`);
}
export async function ensureModels(modelNames = ['sensevoice']) {
const pending = modelNames
.map((name) => models[name])
.filter((entry) => !isModelInstalled(entry));
for (const entry of pending) {
await downloadModel(entry); // eslint-disable-line no-await-in-loop
}
}
const vadModelFile = 'silero_vad.onnx';
const vadModelUrl = 'https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx';
export function getVadModelPath() {
return path.join(modelsDirectory, vadModelFile);
}
export async function ensureVadModel() {
const modelPath = getVadModelPath();
if (fs.existsSync(modelPath)) {
return;
}
console.log(`Downloading ${vadModelFile}...`);
fs.mkdirSync(modelsDirectory, { recursive: true });
const response = await fetch(vadModelUrl, { redirect: 'follow' });
if (!response.ok || !response.body) {
throw new Error(`Failed to download VAD model: ${response.statusText}`);
}
const contentLength = Number(response.headers.get('content-length') ?? 0);
const reader = response.body.getReader();
const fileHandle = fs.openSync(modelPath, 'w');
let downloaded = 0;
try {
for (;;) {
const { done, value } = await reader.read(); // eslint-disable-line no-await-in-loop
if (done) {
break;
}
fs.writeSync(fileHandle, value);
downloaded += value.length;
if (contentLength > 0) {
const percent = ((downloaded / contentLength) * 100).toFixed(1);
const kb = (downloaded / 1024).toFixed(0);
const totalKb = (contentLength / 1024).toFixed(0);
process.stdout.write(`\r ${kb} KB / ${totalKb} KB (${percent}%)`);
}
}
}
finally {
fs.closeSync(fileHandle);
}
process.stdout.write('\n');
console.log(` ${vadModelFile} ready.\n`);
}
export type AsrStreamResult = {
text: string;
lang: string;
emotion: string;
event: string;
tokens: string[];
timestamps: number[];
isFinal: boolean;
};
export type VadOptions = {
threshold?: number;
minSpeechDuration?: number;
minSilenceDuration?: number;
maxSpeechDuration?: number;
enableExternalBuffer?: boolean;
};
export type StreamAsrOptions = {
sampleRate?: number;
asrIntervalMs?: number;
vad?: boolean | VadOptions;
onResult: (result: AsrStreamResult) => void;
};
export declare function streamAsr(audio: AsyncIterable<Float32Array>, options: StreamAsrOptions): Promise<void>;
import { createRequire } from 'node:module';
import path from 'node:path';
import { getModelPath, getVadModelPath } from './models.js';
const require = createRequire(import.meta.url);
let _sherpaOnnx;
function sherpaOnnx() {
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
_sherpaOnnx ??= require('sherpa-onnx-node');
return _sherpaOnnx;
}
const defaultSampleRate = 16_000;
const defaultAsrIntervalMs = 1000;
function createRecognizer() {
const modelDir = getModelPath('sensevoice');
const onnx = sherpaOnnx();
return new onnx.OfflineRecognizer({
featConfig: { sampleRate: defaultSampleRate, featureDim: 80 },
modelConfig: {
senseVoice: {
model: path.join(modelDir, 'model.int8.onnx'),
useInverseTextNormalization: 1,
},
tokens: path.join(modelDir, 'tokens.txt'),
numThreads: 2,
provider: 'cpu',
debug: 0,
},
});
}
function recognize(recognizer, samples) {
const stream = recognizer.createStream();
stream.acceptWaveform({ sampleRate: defaultSampleRate, samples });
recognizer.decode(stream);
return recognizer.getResult(stream);
}
function mergeBuffers(buffers, totalLength) {
if (buffers.length === 1 && buffers[0]) {
return buffers[0];
}
const merged = new Float32Array(totalLength);
let offset = 0;
for (const buf of buffers) {
merged.set(buf, offset);
offset += buf.length;
}
return merged;
}
function createVad(vadOptions) {
const onnx = sherpaOnnx();
return new onnx.Vad({
sileroVad: {
model: getVadModelPath(),
threshold: vadOptions.threshold ?? 0.5,
minSpeechDuration: vadOptions.minSpeechDuration ?? 0.25,
minSilenceDuration: vadOptions.minSilenceDuration ?? 0.5,
maxSpeechDuration: vadOptions.maxSpeechDuration ?? 15,
windowSize: 512,
},
sampleRate: defaultSampleRate,
debug: 0,
numThreads: 1,
}, 60);
}
function emitResult(result, isFinal, onResult) {
const text = result.text.trim();
if (text) {
onResult({ ...result, text, isFinal });
}
}
async function streamWithVad(audio, options, vadOptions) {
const recognizer = createRecognizer();
const vad = createVad(vadOptions);
const { windowSize } = vad.config.sileroVad;
let pending = new Float32Array(0);
function drainSegments() {
while (!vad.isEmpty()) {
const segment = vad.front(vadOptions.enableExternalBuffer);
vad.pop();
emitResult(recognize(recognizer, segment.samples), true, options.onResult);
}
}
// eslint-disable-next-line @typescript-eslint/await-thenable
for await (const chunk of audio) {
const combined = new Float32Array(pending.length + chunk.length);
combined.set(pending);
combined.set(chunk, pending.length);
pending = combined;
while (pending.length >= windowSize) {
vad.acceptWaveform(pending.subarray(0, windowSize));
pending = pending.subarray(windowSize);
drainSegments();
}
}
if (pending.length > 0) {
const padded = new Float32Array(windowSize);
padded.set(pending);
vad.acceptWaveform(padded);
}
vad.flush();
drainSegments();
}
async function streamWithInterval(audio, options) {
const inputSampleRate = options.sampleRate ?? defaultSampleRate;
const intervalMs = options.asrIntervalMs ?? defaultAsrIntervalMs;
const chunkInterval = (defaultSampleRate * intervalMs) / 1000;
const recognizer = createRecognizer();
const buffers = [];
let totalSamples = 0;
let lastRecognizedAt = 0;
let lastText = '';
// eslint-disable-next-line @typescript-eslint/await-thenable
for await (const chunk of audio) {
buffers.push(chunk);
totalSamples += chunk.length;
const samplesForInterval = (chunkInterval * inputSampleRate) / defaultSampleRate;
if (totalSamples - lastRecognizedAt >= samplesForInterval) {
lastRecognizedAt = totalSamples;
const merged = mergeBuffers(buffers, totalSamples);
const result = recognize(recognizer, merged);
const text = result.text.trim();
if (text && text !== lastText) {
lastText = text;
options.onResult({ ...result, text, isFinal: false });
}
}
}
const merged = mergeBuffers(buffers, totalSamples);
if (merged.length > 0) {
emitResult(recognize(recognizer, merged), true, options.onResult);
}
}
export async function streamAsr(audio, options) {
if (options.vad) {
const vadOptions = typeof options.vad === 'object' ? options.vad : {};
return streamWithVad(audio, options, vadOptions);
}
return streamWithInterval(audio, options);
}
#!/usr/bin/env node
export {};
#!/usr/bin/env node
import { Command } from 'commander';
import { register as registerAsr } from './asr/_cli.js';
import { register as registerCloudTts } from './cloud-tts/_cli.js';
import { register as registerTts } from './tts/_cli.js';
const program = new Command();
program.name('coli').description('Core CLI for Cola');
registerAsr(program);
registerTts(program);
registerCloudTts(program);
program.parse();
import type { Command } from 'commander';
export declare function register(program: Command): void;
import process from 'node:process';
import { defaultSpeaker } from '../_api/constants.js';
import { listSpeakers, runCloudTts } from './cloud-tts.js';
function getApiKey(options) {
const key = options.apiKey ?? process.env['COLI_LISTENHUB_API_KEY'];
if (!key) {
throw new Error('API key required. Use --api-key or set COLI_LISTENHUB_API_KEY environment variable. Get an API key from https://listenhub.ai/settings/api-keys');
}
return key;
}
function getBaseUrl(options) {
return options.baseUrl ?? process.env['COLI_TTS_BASE_URL'];
}
export function register(program) {
program
.command('cloud-tts')
.description('Generate speech using ListenHub OpenAPI')
.argument('[text]', 'Text to synthesize')
.option('--api-key <key>', 'ListenHub API key (or set COLI_LISTENHUB_API_KEY environment variable)')
.option('--voice <id>', 'Speaker ID to use')
.option('--model <name>', 'Model to use (default: flowtts)')
.option('--base-url <url>', 'Base URL for TTS API (or set COLI_TTS_BASE_URL environment variable)')
.option('-o, --output <file>', 'Save audio to file')
.option('--list-speakers', 'List available speakers')
.option('--language <lang>', 'Speaker language (en, zh, ja)')
.option('-j, --json', 'Output in JSON format (use with --list-speakers)')
.action(async (text, options) => {
if (options.listSpeakers) {
const apiKey = getApiKey(options);
const baseUrl = getBaseUrl(options);
const speakers = await listSpeakers({
apiKey,
baseUrl,
language: options.language,
});
if (options.json) {
console.log(JSON.stringify(speakers, null, 2));
}
else {
for (const speaker of speakers) {
console.log(`${speaker.name}\t${speaker.speakerId}\t${speaker.gender}\t${speaker.language}`);
}
}
return;
}
if (!text) {
throw new Error('Please provide text to synthesize.');
}
const voice = options.voice ??
(options.language && defaultSpeaker[options.language]);
if (!voice) {
throw new Error('Please specify a speaker with --voice or a language with --language. Use --list-speakers to see available speakers.');
}
const apiKey = getApiKey(options);
const baseUrl = getBaseUrl(options);
await runCloudTts(text, {
apiKey,
baseUrl,
voice,
model: options.model,
output: options.output,
});
});
}
export { listSpeakers, runCloudTts, type CloudTtsOptions, type ListSpeakersOptions, } from './cloud-tts.js';
export { listSpeakers, runCloudTts, } from './cloud-tts.js';
import type { SpeakerLanguage } from '../_api/types.js';
export type CloudTtsOptions = {
apiKey: string;
voice: string;
model?: string;
output?: string;
baseUrl?: string;
};
export type ListSpeakersOptions = {
apiKey: string;
language?: SpeakerLanguage;
baseUrl?: string;
};
export declare function listSpeakers(options: ListSpeakersOptions): Promise<{
name: string;
speakerId: string;
demoAudioUrl: string;
gender: string;
language: SpeakerLanguage;
}[]>;
export declare function runCloudTts(text: string, options: CloudTtsOptions): Promise<void>;
import { Buffer } from 'node:buffer';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { Writable } from 'node:stream';
import { execa } from 'execa';
import { ListenHubApi } from '../_api/listenhub-openapi.js';
export async function listSpeakers(options) {
const api = new ListenHubApi({
apiKey: options.apiKey,
baseUrl: options.baseUrl,
});
const result = await api.getAvailableSpeakers({
language: options.language,
});
return result.data.items;
}
async function collectStream(stream) {
const chunks = [];
const reader = stream.getReader();
for (;;) {
const { done, value } = await reader.read(); // eslint-disable-line no-await-in-loop
if (done)
break;
chunks.push(value);
}
return Buffer.concat(chunks);
}
export async function runCloudTts(text, options) {
const api = new ListenHubApi({
apiKey: options.apiKey,
baseUrl: options.baseUrl,
});
const stream = await api.tts({
input: text,
voice: options.voice,
model: options.model,
});
if (options.output) {
const fileStream = fs.createWriteStream(options.output);
await stream.pipeTo(Writable.toWeb(fileStream));
return;
}
const mp3Path = path.join(os.tmpdir(), `coli-cloud-tts-${Date.now()}.mp3`);
const audio = await collectStream(stream);
fs.writeFileSync(mp3Path, audio);
try {
// eslint-disable-next-line @typescript-eslint/await-thenable
await execa('afplay', [mp3Path]);
}
finally {
fs.unlinkSync(mp3Path);
}
}
export * from './_api/listenhub-openapi.js';
export * from './asr/_index.js';
export * from './cloud-tts/_index.js';
export * from './tts/_index.js';
export * from './_api/listenhub-openapi.js';
export * from './asr/_index.js';
export * from './cloud-tts/_index.js';
export * from './tts/_index.js';
import type { Command } from 'commander';
export declare function register(program: Command): void;
import { getVoices, runTts } from './tts.js';
export function register(program) {
program
.command('tts')
.description('Speak text using text-to-speech (macOS only)')
.argument('[text]', 'Text to speak')
.option('-v, --voice <name>', 'Voice to use, defaults to macOS system voice')
.option('-r, --rate <wpm>', 'Speech rate in words per minute', Number)
.option('-o, --output <file>', 'Save audio to file instead of speaking')
.option('--list-voices', 'List available voices')
.option('-j, --json', 'Output in JSON format (use with --list-voices)')
.action(async (text, options) => {
if (options.listVoices) {
const voices = await getVoices();
if (options.json) {
console.log(JSON.stringify(voices, null, 2));
}
else {
for (const voice of voices) {
console.log(`${voice.name}\t${voice.languageCode}\t${voice.example}`);
}
}
return;
}
if (!text) {
throw new Error('Please provide text to speak.');
}
await runTts(text, options);
});
}
export { getVoices, runTts, type TtsOptions } from './tts.js';
export { getVoices, runTts } from './tts.js';
import { type Voice } from 'mac-say';
export type TtsOptions = {
voice?: string;
rate?: number;
output?: string;
};
export declare function getVoices(): Promise<Voice[]>;
export declare function runTts(text: string, options?: TtsOptions): Promise<void>;
import { getVoices as macGetVoices, say } from 'mac-say';
export async function getVoices() {
return macGetVoices();
}
export async function runTts(text, options = {}) {
await say(text, {
voice: options.voice,
rate: options.rate,
outputFile: options.output,
});
}
+10
-6

@@ -8,2 +8,3 @@ # Cloud TTS

- A ListenHub API key. Pass it via `--api-key` or set the `COLI_LISTENHUB_API_KEY` environment variable.
- Optionally, a custom base URL via `--base-url` or the `COLI_TTS_BASE_URL` environment variable.

@@ -39,2 +40,3 @@ ## CLI

--model <name> Model to use (default: flowtts)
--base-url <url> Base URL for TTS API (or set COLI_TTS_BASE_URL)
-o, --output <file> Save audio to file

@@ -70,2 +72,3 @@ --list-speakers List available speakers

| `language` | `'en' \| 'zh' \| 'ja'` | Filter speakers by language. Omit to list all. |
| `baseUrl` | `string` | Custom base URL for TTS API (optional) |

@@ -89,7 +92,8 @@ ### `runCloudTts(text, options)`

| Property | Type | Description |
| -------- | -------- | ---------------------------------------------- |
| `apiKey` | `string` | ListenHub API key |
| `voice` | `string` | Speaker ID (from `listSpeakers`) |
| `model` | `string` | Model to use (optional, defaults to `flowtts`) |
| `output` | `string` | Save to file instead of playing directly |
| Property | Type | Description |
| --------- | -------- | ---------------------------------------------- |
| `apiKey` | `string` | ListenHub API key |
| `voice` | `string` | Speaker ID (from `listSpeakers`) |
| `model` | `string` | Model to use (optional, defaults to `flowtts`) |
| `output` | `string` | Save to file instead of playing directly |
| `baseUrl` | `string` | Custom base URL for TTS API (optional) |

@@ -11,2 +11,8 @@ # ListenHub OpenAPI

const api = new ListenHubApi({apiKey: 'lh_sk_...'});
// Or with a custom base URL
const api = new ListenHubApi({
apiKey: 'lh_sk_...',
baseUrl: 'https://custom-api.example.com/openapi',
});
```

@@ -13,0 +19,0 @@

{
"name": "@marswave/coli",
"private": false,
"version": "0.0.13",
"version": "0.0.14",
"description": "A CLI for the Cola",
"repository": "marswaveai/coli",
"type": "module",
"bin": "distribution/cli.js",
"exports": "./distribution/index.js",
"types": "distribution",
"bin": "distribution/source/cli.js",
"exports": "./distribution/source/index.js",
"types": "distribution/source",
"files": [

@@ -19,3 +19,4 @@ "distribution",

"clean": "del-cli distribution",
"build": "node --run clean && tsc && chmod +x distribution/cli.js",
"dev": "node --run clean && tsc --watch",
"build": "node --run clean && tsc && chmod +x distribution/source/cli.js",
"pretest": "node --run build",

@@ -29,3 +30,3 @@ "test": "xo"

"mac-say": "^0.3.3",
"sherpa-onnx-node": "^1.12.29"
"sherpa-onnx-node": "^1.12.33"
},

@@ -36,5 +37,5 @@ "devDependencies": {

"del-cli": "^7.0.0",
"typescript": "^5.9.3",
"xo": "^1.2.3"
"typescript": "^6.0.2",
"xo": "^2.0.2"
}
}
import type { SpeakerLanguage } from './types.js';
export declare const defaultSpeaker: Record<SpeakerLanguage, string>;
export const defaultSpeaker = {
en: 'chat-girl-105-cn',
zh: 'leo-9328b6d2',
ja: 'tianzhongdunzi-5d612542',
};
import { type KyInstance } from 'ky';
import type { ApiResponse, SpeakerLanguage } from './types.js';
export * from './constants.js';
export type * from './types.js';
export type ListenHubApiOptions = {
apiKey: string;
};
export declare class ListenHubApi {
api: KyInstance;
constructor({ apiKey }: ListenHubApiOptions);
/**
* Get a list of available speakers.
* @param options - The options for the speakers request.
* @param options.language - Optional. The language of the speakers to get, defaults to English.
* @returns A list of available speakers.
* @see {@link https://listenhub.ai/docs/en/openapi/api-reference/speakers#list-available-speakers|List Available Speakers}
*/
getAvailableSpeakers(options?: {
language?: SpeakerLanguage;
}): Promise<ApiResponse<{
items: Array<{
name: string;
speakerId: string;
demoAudioUrl: string;
gender: string;
language: SpeakerLanguage;
}>;
}>>;
/**
* Generate audio from text using the Streaming TTS API.
* @param options - The options for the TTS request.
* @param options.input - The text to generate audio from.
* @param options.voice - The `speakerId` to use for the TTS.
* @param options.model - Optional. The model to use for the TTS, defaults to `flowtts`.
* @returns A readable stream of the MP3 audio.
* @see {@link https://listenhub.ai/docs/en/openapi/api-reference/flowspeech|Streaming TTS}
*/
tts(options: {
input: string;
voice: string;
model?: string;
}): Promise<ReadableStream<Uint8Array<ArrayBuffer>>>;
}
import ky from 'ky';
export * from './constants.js';
export class ListenHubApi {
api;
constructor({ apiKey }) {
this.api = ky.extend({
prefixUrl: 'https://api.marswave.ai/openapi',
headers: {
// eslint-disable-next-line @typescript-eslint/naming-convention
Authorization: `Bearer ${apiKey}`,
},
});
}
/**
* Get a list of available speakers.
* @param options - The options for the speakers request.
* @param options.language - Optional. The language of the speakers to get, defaults to English.
* @returns A list of available speakers.
* @see {@link https://listenhub.ai/docs/en/openapi/api-reference/speakers#list-available-speakers|List Available Speakers}
*/
async getAvailableSpeakers(options) {
return this.api.get('v1/speakers/list', { searchParams: options }).json();
}
/**
* Generate audio from text using the Streaming TTS API.
* @param options - The options for the TTS request.
* @param options.input - The text to generate audio from.
* @param options.voice - The `speakerId` to use for the TTS.
* @param options.model - Optional. The model to use for the TTS, defaults to `flowtts`.
* @returns A readable stream of the MP3 audio.
* @see {@link https://listenhub.ai/docs/en/openapi/api-reference/flowspeech|Streaming TTS}
*/
async tts(options) {
const response = await this.api.post('v1/tts', { json: options });
if (!response.body)
throw new Error('Empty response body from TTS API');
return response.body;
}
}
export type ApiResponse<T> = {
code: number;
message: string;
data: T;
};
export type SpeakerLanguage = 'en' | 'zh' | 'ja';
import type { Command } from 'commander';
export declare function register(program: Command): void;
import { Buffer } from 'node:buffer';
import process from 'node:process';
import { runAsr } from './asr.js';
import { ensureModels, ensureVadModel } from './models.js';
import { streamAsr } from './stream-asr.js';
export function register(program) {
program
.command('asr')
.description('Transcribe an audio file using speech recognition')
.argument('<file>', 'Audio file to transcribe')
.option('-j, --json', 'Output result in JSON format', false)
.option('--model <name>', 'Model to use: whisper, sensevoice', 'sensevoice')
.action(async (file, options) => {
const { model } = options;
if (model !== 'whisper' && model !== 'sensevoice') {
throw new Error(`Unknown model "${model}". Use "whisper" or "sensevoice".`);
}
await ensureModels([model]);
await runAsr(file, { json: options.json, model });
});
program
.command('asr-stream')
.description('Stream speech recognition from stdin (expects 16kHz mono s16le PCM)')
.option('-j, --json', 'Output each result as a JSON line', false)
.option('--vad', 'Enable voice activity detection', false)
.option('--asr-interval-ms <ms>', 'Recognition interval in ms (ignored with --vad)', '1000')
.action(async (options) => {
await ensureModels();
if (options.vad) {
await ensureVadModel();
}
async function* stdinAudio() {
for await (const chunk of process.stdin) {
const buf = Buffer.from(chunk);
const pcm = new Int16Array(buf.buffer, buf.byteOffset, buf.byteLength / 2);
const float32 = new Float32Array(pcm.length);
for (const [i, sample] of pcm.entries()) {
float32[i] = sample / 32_768;
}
yield float32;
}
}
await streamAsr(stdinAudio(), {
vad: options.vad || undefined,
asrIntervalMs: Number(options.asrIntervalMs),
onResult(result) {
if (options.json) {
console.log(JSON.stringify(result));
}
else {
console.log(result.text);
}
},
});
});
}
export { ensureModels, ensureVadModel, getModelPath, getVadModelPath, modelDisplayNames, } from './models.js';
export { streamAsr, type AsrStreamResult, type StreamAsrOptions, type VadOptions, } from './stream-asr.js';
export { ensureModels, ensureVadModel, getModelPath, getVadModelPath, modelDisplayNames, } from './models.js';
export { streamAsr, } from './stream-asr.js';
type ModelName = 'whisper' | 'sensevoice';
export type AsrOptions = {
json: boolean;
model: ModelName;
};
export declare function runAsr(filePath: string, options: AsrOptions): Promise<void>;
export {};
import fs from 'node:fs';
import { createRequire } from 'node:module';
import os from 'node:os';
import path from 'node:path';
import { execa } from 'execa';
import { getModelPath, modelDisplayNames } from './models.js';
const require = createRequire(import.meta.url);
// Loaded lazily to avoid loading the native addon until needed
let _sherpaOnnx;
function sherpaOnnx() {
_sherpaOnnx ??= require('sherpa-onnx-node');
return _sherpaOnnx;
}
async function convertToWav(inputPath) {
const outputPath = path.join(os.tmpdir(), `coli-${Date.now()}.wav`);
try {
await execa('ffmpeg', [
'-i',
inputPath,
'-ar',
'16000',
'-ac',
'1',
'-f',
'wav',
'-acodec',
'pcm_s16le',
outputPath,
'-y',
]);
}
catch {
throw new Error('Failed to convert audio file. Please make sure ffmpeg is installed.\n' +
' brew install ffmpeg # macOS\n' +
' sudo apt install ffmpeg # Debian/Ubuntu');
}
return outputPath;
}
function createRecognizer(model) {
const modelDir = getModelPath(model);
const onnx = sherpaOnnx();
if (model === 'whisper') {
return new onnx.OfflineRecognizer({
featConfig: { sampleRate: 16_000, featureDim: 80 },
modelConfig: {
whisper: {
encoder: path.join(modelDir, 'tiny.en-encoder.int8.onnx'),
decoder: path.join(modelDir, 'tiny.en-decoder.int8.onnx'),
},
tokens: path.join(modelDir, 'tiny.en-tokens.txt'),
numThreads: 2,
provider: 'cpu',
debug: 0,
},
});
}
return new onnx.OfflineRecognizer({
featConfig: { sampleRate: 16_000, featureDim: 80 },
modelConfig: {
senseVoice: {
model: path.join(modelDir, 'model.int8.onnx'),
useInverseTextNormalization: 1,
},
tokens: path.join(modelDir, 'tokens.txt'),
numThreads: 2,
provider: 'cpu',
debug: 0,
},
});
}
export async function runAsr(filePath, options) {
const resolvedPath = path.resolve(filePath);
if (!fs.existsSync(resolvedPath)) {
throw new Error(`File not found: ${resolvedPath}`);
}
const ext = path.extname(resolvedPath).toLowerCase();
let wavPath;
let needsCleanup = false;
if (ext === '.wav') {
wavPath = resolvedPath;
}
else {
wavPath = await convertToWav(resolvedPath);
needsCleanup = true;
}
try {
const onnx = sherpaOnnx();
const recognizer = createRecognizer(options.model);
const stream = recognizer.createStream();
const wave = onnx.readWave(wavPath);
stream.acceptWaveform({ sampleRate: wave.sampleRate, samples: wave.samples });
recognizer.decode(stream);
const result = recognizer.getResult(stream);
if (options.json) {
console.log(JSON.stringify({
text: result.text.trim(),
model: modelDisplayNames[options.model],
lang: result.lang || undefined,
emotion: result.emotion || undefined,
event: result.event || undefined,
tokens: result.tokens,
timestamps: result.timestamps,
duration: wave.samples.length / wave.sampleRate,
}, null, 2));
}
else {
console.log(result.text.trim());
}
}
finally {
if (needsCleanup && fs.existsSync(wavPath)) {
fs.unlinkSync(wavPath);
}
}
}
type ModelName = 'whisper' | 'sensevoice';
export declare const modelDisplayNames: Record<ModelName, string>;
export declare function getModelPath(model: ModelName): string;
export declare function ensureModels(modelNames?: ModelName[]): Promise<void>;
export declare function getVadModelPath(): string;
export declare function ensureVadModel(): Promise<void>;
export {};
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import process from 'node:process';
import { execa } from 'execa';
const modelsDirectory = path.join(os.homedir(), '.coli', 'models');
const models = {
whisper: {
dirName: 'sherpa-onnx-whisper-tiny.en',
url: 'https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2',
checkFile: 'tiny.en-encoder.int8.onnx',
},
sensevoice: {
dirName: 'sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17',
url: 'https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2',
checkFile: 'model.int8.onnx',
},
};
export const modelDisplayNames = {
whisper: 'whisper-tiny.en',
sensevoice: 'sensevoice-small',
};
export function getModelPath(model) {
return path.join(modelsDirectory, models[model].dirName);
}
function isModelInstalled(entry) {
const modelDir = path.join(modelsDirectory, entry.dirName);
return fs.existsSync(path.join(modelDir, entry.checkFile));
}
async function downloadModel(entry) {
const { dirName, url } = entry;
console.log(`Downloading ${dirName}...`);
fs.mkdirSync(modelsDirectory, { recursive: true });
const tarPath = path.join(modelsDirectory, `${dirName}.tar.bz2`);
const response = await fetch(url, { redirect: 'follow' });
if (!response.ok || !response.body) {
throw new Error(`Failed to download model: ${response.statusText}`);
}
const contentLength = Number(response.headers.get('content-length') ?? 0);
const reader = response.body.getReader();
const fileHandle = fs.openSync(tarPath, 'w');
let downloaded = 0;
try {
for (;;) {
const { done, value } = await reader.read(); // eslint-disable-line no-await-in-loop
if (done) {
break;
}
fs.writeSync(fileHandle, value);
downloaded += value.length;
if (contentLength > 0) {
const percent = ((downloaded / contentLength) * 100).toFixed(1);
const mb = (downloaded / (1024 * 1024)).toFixed(1);
const totalMb = (contentLength / (1024 * 1024)).toFixed(1);
process.stdout.write(`\r ${mb} MB / ${totalMb} MB (${percent}%)`);
}
}
}
finally {
fs.closeSync(fileHandle);
}
process.stdout.write('\n');
console.log(' Extracting...');
await execa('tar', ['xjf', tarPath, '-C', modelsDirectory]);
fs.unlinkSync(tarPath);
console.log(` ${dirName} ready.\n`);
}
export async function ensureModels(modelNames = ['sensevoice']) {
const pending = modelNames
.map((name) => models[name])
.filter((entry) => !isModelInstalled(entry));
for (const entry of pending) {
await downloadModel(entry); // eslint-disable-line no-await-in-loop
}
}
const vadModelFile = 'silero_vad.onnx';
const vadModelUrl = 'https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx';
export function getVadModelPath() {
return path.join(modelsDirectory, vadModelFile);
}
export async function ensureVadModel() {
const modelPath = getVadModelPath();
if (fs.existsSync(modelPath)) {
return;
}
console.log(`Downloading ${vadModelFile}...`);
fs.mkdirSync(modelsDirectory, { recursive: true });
const response = await fetch(vadModelUrl, { redirect: 'follow' });
if (!response.ok || !response.body) {
throw new Error(`Failed to download VAD model: ${response.statusText}`);
}
const contentLength = Number(response.headers.get('content-length') ?? 0);
const reader = response.body.getReader();
const fileHandle = fs.openSync(modelPath, 'w');
let downloaded = 0;
try {
for (;;) {
const { done, value } = await reader.read(); // eslint-disable-line no-await-in-loop
if (done) {
break;
}
fs.writeSync(fileHandle, value);
downloaded += value.length;
if (contentLength > 0) {
const percent = ((downloaded / contentLength) * 100).toFixed(1);
const kb = (downloaded / 1024).toFixed(0);
const totalKb = (contentLength / 1024).toFixed(0);
process.stdout.write(`\r ${kb} KB / ${totalKb} KB (${percent}%)`);
}
}
}
finally {
fs.closeSync(fileHandle);
}
process.stdout.write('\n');
console.log(` ${vadModelFile} ready.\n`);
}
export type AsrStreamResult = {
text: string;
lang: string;
emotion: string;
event: string;
tokens: string[];
timestamps: number[];
isFinal: boolean;
};
export type VadOptions = {
threshold?: number;
minSpeechDuration?: number;
minSilenceDuration?: number;
maxSpeechDuration?: number;
enableExternalBuffer?: boolean;
};
export type StreamAsrOptions = {
sampleRate?: number;
asrIntervalMs?: number;
vad?: boolean | VadOptions;
onResult: (result: AsrStreamResult) => void;
};
export declare function streamAsr(audio: AsyncIterable<Float32Array>, options: StreamAsrOptions): Promise<void>;
import { createRequire } from 'node:module';
import path from 'node:path';
import { getModelPath, getVadModelPath } from './models.js';
const require = createRequire(import.meta.url);
let _sherpaOnnx;
function sherpaOnnx() {
_sherpaOnnx ??= require('sherpa-onnx-node');
return _sherpaOnnx;
}
const defaultSampleRate = 16_000;
const defaultAsrIntervalMs = 1000;
function createRecognizer() {
const modelDir = getModelPath('sensevoice');
const onnx = sherpaOnnx();
return new onnx.OfflineRecognizer({
featConfig: { sampleRate: defaultSampleRate, featureDim: 80 },
modelConfig: {
senseVoice: {
model: path.join(modelDir, 'model.int8.onnx'),
useInverseTextNormalization: 1,
},
tokens: path.join(modelDir, 'tokens.txt'),
numThreads: 2,
provider: 'cpu',
debug: 0,
},
});
}
function recognize(recognizer, samples) {
const stream = recognizer.createStream();
stream.acceptWaveform({ sampleRate: defaultSampleRate, samples });
recognizer.decode(stream);
return recognizer.getResult(stream);
}
function mergeBuffers(buffers, totalLength) {
if (buffers.length === 1 && buffers[0]) {
return buffers[0];
}
const merged = new Float32Array(totalLength);
let offset = 0;
for (const buf of buffers) {
merged.set(buf, offset);
offset += buf.length;
}
return merged;
}
function createVad(vadOptions) {
const onnx = sherpaOnnx();
return new onnx.Vad({
sileroVad: {
model: getVadModelPath(),
threshold: vadOptions.threshold ?? 0.5,
minSpeechDuration: vadOptions.minSpeechDuration ?? 0.25,
minSilenceDuration: vadOptions.minSilenceDuration ?? 0.5,
maxSpeechDuration: vadOptions.maxSpeechDuration ?? 15,
windowSize: 512,
},
sampleRate: defaultSampleRate,
debug: 0,
numThreads: 1,
}, 60);
}
function emitResult(result, isFinal, onResult) {
const text = result.text.trim();
if (text) {
onResult({ ...result, text, isFinal });
}
}
async function streamWithVad(audio, options, vadOptions) {
const recognizer = createRecognizer();
const vad = createVad(vadOptions);
const { windowSize } = vad.config.sileroVad;
let pending = new Float32Array(0);
function drainSegments() {
while (!vad.isEmpty()) {
const segment = vad.front(vadOptions.enableExternalBuffer);
vad.pop();
emitResult(recognize(recognizer, segment.samples), true, options.onResult);
}
}
for await (const chunk of audio) {
const combined = new Float32Array(pending.length + chunk.length);
combined.set(pending);
combined.set(chunk, pending.length);
pending = combined;
while (pending.length >= windowSize) {
vad.acceptWaveform(pending.subarray(0, windowSize));
pending = pending.subarray(windowSize);
drainSegments();
}
}
if (pending.length > 0) {
const padded = new Float32Array(windowSize);
padded.set(pending);
vad.acceptWaveform(padded);
}
vad.flush();
drainSegments();
}
async function streamWithInterval(audio, options) {
const inputSampleRate = options.sampleRate ?? defaultSampleRate;
const intervalMs = options.asrIntervalMs ?? defaultAsrIntervalMs;
const chunkInterval = (defaultSampleRate * intervalMs) / 1000;
const recognizer = createRecognizer();
const buffers = [];
let totalSamples = 0;
let lastRecognizedAt = 0;
let lastText = '';
for await (const chunk of audio) {
buffers.push(chunk);
totalSamples += chunk.length;
const samplesForInterval = (chunkInterval * inputSampleRate) / defaultSampleRate;
if (totalSamples - lastRecognizedAt >= samplesForInterval) {
lastRecognizedAt = totalSamples;
const merged = mergeBuffers(buffers, totalSamples);
const result = recognize(recognizer, merged);
const text = result.text.trim();
if (text && text !== lastText) {
lastText = text;
options.onResult({ ...result, text, isFinal: false });
}
}
}
const merged = mergeBuffers(buffers, totalSamples);
if (merged.length > 0) {
emitResult(recognize(recognizer, merged), true, options.onResult);
}
}
export async function streamAsr(audio, options) {
if (options.vad) {
const vadOptions = typeof options.vad === 'object' ? options.vad : {};
return streamWithVad(audio, options, vadOptions);
}
return streamWithInterval(audio, options);
}
#!/usr/bin/env node
export {};
#!/usr/bin/env node
import { Command } from 'commander';
import { register as registerAsr } from './asr/_cli.js';
import { register as registerCloudTts } from './cloud-tts/_cli.js';
import { register as registerTts } from './tts/_cli.js';
const program = new Command();
program.name('coli').description('Core CLI for Cola');
registerAsr(program);
registerTts(program);
registerCloudTts(program);
program.parse();
import type { Command } from 'commander';
export declare function register(program: Command): void;
import process from 'node:process';
import { defaultSpeaker } from '../_api/constants.js';
import { listSpeakers, runCloudTts } from './cloud-tts.js';
function getApiKey(options) {
const key = options.apiKey ?? process.env['COLI_LISTENHUB_API_KEY'];
if (!key) {
throw new Error('API key required. Use --api-key or set COLI_LISTENHUB_API_KEY environment variable. Get an API key from https://listenhub.ai/settings/api-keys');
}
return key;
}
export function register(program) {
program
.command('cloud-tts')
.description('Generate speech using ListenHub OpenAPI')
.argument('[text]', 'Text to synthesize')
.option('--api-key <key>', 'ListenHub API key (or set COLI_LISTENHUB_API_KEY environment variable)')
.option('--voice <id>', 'Speaker ID to use')
.option('--model <name>', 'Model to use (default: flowtts)')
.option('-o, --output <file>', 'Save audio to file')
.option('--list-speakers', 'List available speakers')
.option('--language <lang>', 'Speaker language (en, zh, ja)')
.option('-j, --json', 'Output in JSON format (use with --list-speakers)')
.action(async (text, options) => {
if (options.listSpeakers) {
const apiKey = getApiKey(options);
const speakers = await listSpeakers({
apiKey,
language: options.language,
});
if (options.json) {
console.log(JSON.stringify(speakers, null, 2));
}
else {
for (const speaker of speakers) {
console.log(`${speaker.name}\t${speaker.speakerId}\t${speaker.gender}\t${speaker.language}`);
}
}
return;
}
if (!text) {
throw new Error('Please provide text to synthesize.');
}
const voice = options.voice ??
(options.language && defaultSpeaker[options.language]);
if (!voice) {
throw new Error('Please specify a speaker with --voice or a language with --language. Use --list-speakers to see available speakers.');
}
const apiKey = getApiKey(options);
await runCloudTts(text, {
apiKey,
voice,
model: options.model,
output: options.output,
});
});
}
export { listSpeakers, runCloudTts, type CloudTtsOptions, type ListSpeakersOptions, } from './cloud-tts.js';
export { listSpeakers, runCloudTts, } from './cloud-tts.js';
import type { SpeakerLanguage } from '../_api/types.js';
export type CloudTtsOptions = {
apiKey: string;
voice: string;
model?: string;
output?: string;
};
export type ListSpeakersOptions = {
apiKey: string;
language?: SpeakerLanguage;
};
export declare function listSpeakers(options: ListSpeakersOptions): Promise<{
name: string;
speakerId: string;
demoAudioUrl: string;
gender: string;
language: SpeakerLanguage;
}[]>;
export declare function runCloudTts(text: string, options: CloudTtsOptions): Promise<void>;
import { Buffer } from 'node:buffer';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { Writable } from 'node:stream';
import { execa } from 'execa';
import { ListenHubApi } from '../_api/listenhub-openapi.js';
export async function listSpeakers(options) {
const api = new ListenHubApi({ apiKey: options.apiKey });
const result = await api.getAvailableSpeakers({
language: options.language,
});
return result.data.items;
}
async function collectStream(stream) {
const chunks = [];
const reader = stream.getReader();
for (;;) {
const { done, value } = await reader.read(); // eslint-disable-line no-await-in-loop
if (done)
break;
chunks.push(value);
}
return Buffer.concat(chunks);
}
export async function runCloudTts(text, options) {
const api = new ListenHubApi({ apiKey: options.apiKey });
const stream = await api.tts({
input: text,
voice: options.voice,
model: options.model,
});
if (options.output) {
const fileStream = fs.createWriteStream(options.output);
await stream.pipeTo(Writable.toWeb(fileStream));
return;
}
const mp3Path = path.join(os.tmpdir(), `coli-cloud-tts-${Date.now()}.mp3`);
const audio = await collectStream(stream);
fs.writeFileSync(mp3Path, audio);
try {
await execa('afplay', [mp3Path]);
}
finally {
fs.unlinkSync(mp3Path);
}
}
export * from './_api/listenhub-openapi.js';
export * from './asr/_index.js';
export * from './cloud-tts/_index.js';
export * from './tts/_index.js';
export * from './_api/listenhub-openapi.js';
export * from './asr/_index.js';
export * from './cloud-tts/_index.js';
export * from './tts/_index.js';
import type { Command } from 'commander';
export declare function register(program: Command): void;
import { getVoices, runTts } from './tts.js';
export function register(program) {
program
.command('tts')
.description('Speak text using text-to-speech (macOS only)')
.argument('[text]', 'Text to speak')
.option('-v, --voice <name>', 'Voice to use, defaults to macOS system voice')
.option('-r, --rate <wpm>', 'Speech rate in words per minute', Number)
.option('-o, --output <file>', 'Save audio to file instead of speaking')
.option('--list-voices', 'List available voices')
.option('-j, --json', 'Output in JSON format (use with --list-voices)')
.action(async (text, options) => {
if (options.listVoices) {
const voices = await getVoices();
if (options.json) {
console.log(JSON.stringify(voices, null, 2));
}
else {
for (const voice of voices) {
console.log(`${voice.name}\t${voice.languageCode}\t${voice.example}`);
}
}
return;
}
if (!text) {
throw new Error('Please provide text to speak.');
}
await runTts(text, options);
});
}
export { getVoices, runTts, type TtsOptions } from './tts.js';
export { getVoices, runTts } from './tts.js';
import { type Voice } from 'mac-say';
export type TtsOptions = {
voice?: string;
rate?: number;
output?: string;
};
export declare function getVoices(): Promise<Voice[]>;
export declare function runTts(text: string, options?: TtsOptions): Promise<void>;
import { getVoices as macGetVoices, say } from 'mac-say';
export async function getVoices() {
return macGetVoices();
}
export async function runTts(text, options = {}) {
await say(text, {
voice: options.voice,
rate: options.rate,
outputFile: options.output,
});
}