@marswave/coli - npm Package Compare versions

+28

-5

distribution/source/asr/_cli.js

		@@ -6,3 +6,3 @@ import { Buffer } from 'node:buffer';
		import { convertToWav, readWave, runAsr, } from './asr.js';
		import { ensureModels, ensureVadModel } from './models.js';
		import { ensureModels, ensureVadModel, resolveAsrModelFiles, resolveVadModelFile, } from './models.js';
		import { streamAsr } from './stream-asr.js';
		@@ -16,2 +16,3 @@ export function register(program) {
		.option('--model <name>', 'Model to use: whisper, sensevoice', 'sensevoice')
		.option('--model-path <path>', 'Path to a local model file or directory')
		.option('--language <lang>', 'Language for sensevoice: auto, zh, en, ja, ko, yue', 'auto')
		@@ -27,3 +28,8 @@ .action(async (file, options) => {
		}
		await ensureModels([model]);
		if (options.modelPath) {
		resolveAsrModelFiles(model, options.modelPath);
		}
		else {
		await ensureModels([model]);
		}
		const resolvedPath = path.resolve(file);
		@@ -45,2 +51,3 @@ const ext = path.extname(resolvedPath).toLowerCase();
		model,
		modelPath: options.modelPath,
		// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
		@@ -61,2 +68,4 @@ language: options.language,
		.option('--vad', 'Enable voice activity detection', false)
		.option('--model-path <path>', 'Path to a local SenseVoice model file or directory')
		.option('--vad-model-path <path>', 'Path to a local VAD model file')
		.option('--language <lang>', 'Language for sensevoice: auto, zh, en, ja, ko, yue', 'auto')
		@@ -69,4 +78,17 @@ .option('--asr-interval-ms <ms>', 'Recognition interval in ms (ignored with --vad)', '1000')
		}
		await ensureModels();
		if (options.vad) {
		if (options.modelPath) {
		resolveAsrModelFiles('sensevoice', options.modelPath);
		}
		else {
		await ensureModels();
		}
		if (options.vadModelPath) {
		if (options.vad) {
		resolveVadModelFile(options.vadModelPath);
		}
		else {
		throw new Error('Use --vad with --vad-model-path.');
		}
		}
		else if (options.vad) {
		await ensureVadModel();
		@@ -89,3 +111,4 @@ }
		language: options.language,
		vad: options.vad \|\| undefined,
		modelPath: options.modelPath,
		vad: options.vad ? { modelPath: options.vadModelPath } : undefined,
		asrIntervalMs: Number(options.asrIntervalMs),
		@@ -92,0 +115,0 @@ onResult(result) {

+1

-1

distribution/source/asr/_index.d.ts

		@@ -1,3 +0,3 @@
		export { ensureModels, ensureVadModel, getModelPath, getVadModelPath, modelDisplayNames, } from './models.js';
		export { ensureModels, ensureVadModel, getModelPath, getVadModelPath, modelDisplayNames, type ModelName, } from './models.js';
		export { convertToWav, readWave, runAsr, type AsrOptions, type AudioData, type SenseVoiceLanguage, } from './asr.js';
		export { streamAsr, type AsrStreamResult, type StreamAsrOptions, type VadOptions, } from './stream-asr.js';

+2

-2

distribution/source/asr/asr.d.ts

		@@ -0,4 +1,4 @@
		import { type ModelName } from './models.js';
		export declare function readWave(filename: string): AudioData;
		export declare function convertToWav(inputPath: string): Promise<string>;
		type ModelName = 'whisper' \| 'sensevoice';
		export type SenseVoiceLanguage = 'auto' \| 'zh' \| 'en' \| 'ja' \| 'ko' \| 'yue';
		@@ -8,2 +8,3 @@ export type AsrOptions = {
		model: ModelName;
		modelPath?: string \| undefined;
		language?: SenseVoiceLanguage;
		@@ -16,2 +17,1 @@ };
		export declare function runAsr(input: string \| AudioData, options: AsrOptions): Promise<void>;
		export {};

+10

-10

distribution/source/asr/asr.js

		@@ -8,3 +8,3 @@ import fs from 'node:fs';
		import { deprecationAsrFilePath } from '../deprecations.js';
		import { getModelPath, modelDisplayNames } from './models.js';
		import { modelDisplayNames, resolveAsrModelFiles, } from './models.js';
		const require = createRequire(import.meta.url);
		@@ -46,6 +46,5 @@ // Loaded lazily to avoid loading the native addon until needed
		}
		function createRecognizer(model, language) {
		const modelDir = getModelPath(model);
		function createRecognizer(modelFiles, language) {
		const onnx = sherpaOnnx();
		if (model === 'whisper') {
		if (modelFiles.model === 'whisper') {
		return new onnx.OfflineRecognizer({
		@@ -55,6 +54,6 @@ featConfig: { sampleRate: 16_000, featureDim: 80 },
		whisper: {
		encoder: path.join(modelDir, 'tiny.en-encoder.int8.onnx'),
		decoder: path.join(modelDir, 'tiny.en-decoder.int8.onnx'),
		encoder: modelFiles.files.encoder,
		decoder: modelFiles.files.decoder,
		},
		tokens: path.join(modelDir, 'tiny.en-tokens.txt'),
		tokens: modelFiles.files.tokens,
		numThreads: 2,
		@@ -70,7 +69,7 @@ provider: 'cpu',
		senseVoice: {
		model: path.join(modelDir, 'model.int8.onnx'),
		model: modelFiles.files.model,
		useInverseTextNormalization: 1,
		language: language ?? 'auto',
		},
		tokens: path.join(modelDir, 'tokens.txt'),
		tokens: modelFiles.files.tokens,
		numThreads: 2,
		@@ -83,2 +82,3 @@ provider: 'cpu',
		export async function runAsr(input, options) {
		const modelFiles = resolveAsrModelFiles(options.model, options.modelPath);
		let wave;
		@@ -107,3 +107,3 @@ let needsCleanup = false;
		try {
		const recognizer = createRecognizer(options.model, options.language);
		const recognizer = createRecognizer(modelFiles, options.language);
		const stream = recognizer.createStream();
		@@ -110,0 +110,0 @@ stream.acceptWaveform({ sampleRate: wave.sampleRate, samples: wave.samples });

+27

-2

distribution/source/asr/models.d.ts

		@@ -1,7 +0,32 @@
		type ModelName = 'whisper' \| 'sensevoice';
		export type ModelName = 'whisper' \| 'sensevoice';
		export type WhisperModelFiles = {
		encoder: string;
		decoder: string;
		tokens: string;
		};
		export type SenseVoiceModelFiles = {
		model: string;
		tokens: string;
		};
		export type AsrModelFiles = {
		model: 'whisper';
		files: WhisperModelFiles;
		} \| {
		model: 'sensevoice';
		files: SenseVoiceModelFiles;
		};
		export declare const modelDisplayNames: Record<ModelName, string>;
		export declare function getModelPath(model: ModelName): string;
		export declare function resolveAsrModelFiles(model: 'whisper', modelPath?: string): {
		model: 'whisper';
		files: WhisperModelFiles;
		};
		export declare function resolveAsrModelFiles(model: 'sensevoice', modelPath?: string): {
		model: 'sensevoice';
		files: SenseVoiceModelFiles;
		};
		export declare function resolveAsrModelFiles(model: ModelName, modelPath?: string): AsrModelFiles;
		export declare function ensureModels(modelNames?: ModelName[]): Promise<void>;
		export declare function getVadModelPath(): string;
		export declare function resolveVadModelFile(modelPath?: string): string;
		export declare function ensureVadModel(): Promise<void>;
		export {};

+54

-0

distribution/source/asr/models.js

		@@ -59,2 +59,51 @@ import { createHash } from 'node:crypto';
		}
		function assertExistingFile(filePath, label) {
		if (!fs.existsSync(filePath)) {
		throw new Error(`${label} not found: ${filePath}`);
		}
		if (!fs.statSync(filePath).isFile()) {
		throw new Error(`${label} must be a file: ${filePath}`);
		}
		}
		function resolveModelDirectory(modelPath, defaultDirectory) {
		if (!modelPath) {
		return { directory: defaultDirectory };
		}
		const resolvedPath = path.resolve(modelPath);
		if (!fs.existsSync(resolvedPath)) {
		throw new Error(`Model path not found: ${resolvedPath}`);
		}
		const stat = fs.statSync(resolvedPath);
		if (stat.isDirectory()) {
		return { directory: resolvedPath };
		}
		if (stat.isFile()) {
		return { directory: path.dirname(resolvedPath), filePath: resolvedPath };
		}
		throw new Error(`Model path must be a file or directory: ${resolvedPath}`);
		}
		export function resolveAsrModelFiles(model, modelPath) {
		const { directory, filePath } = resolveModelDirectory(modelPath, getModelPath(model));
		if (model === 'whisper') {
		if (filePath) {
		throw new Error('Custom whisper model path must be a directory containing tiny.en-encoder.int8.onnx, tiny.en-decoder.int8.onnx, and tiny.en-tokens.txt.');
		}
		const files = {
		encoder: path.join(directory, 'tiny.en-encoder.int8.onnx'),
		decoder: path.join(directory, 'tiny.en-decoder.int8.onnx'),
		tokens: path.join(directory, 'tiny.en-tokens.txt'),
		};
		assertExistingFile(files.encoder, 'Whisper encoder model');
		assertExistingFile(files.decoder, 'Whisper decoder model');
		assertExistingFile(files.tokens, 'Whisper tokens file');
		return { model, files };
		}
		const files = {
		model: filePath ?? path.join(directory, 'model.int8.onnx'),
		tokens: path.join(directory, 'tokens.txt'),
		};
		assertExistingFile(files.model, 'SenseVoice model');
		assertExistingFile(files.tokens, 'SenseVoice tokens file');
		return { model, files };
		}
		async function getFileSha256(filePath) {
		@@ -202,2 +251,7 @@ const hash = createHash('sha256');
		}
		export function resolveVadModelFile(modelPath) {
		const resolvedPath = modelPath ? path.resolve(modelPath) : getVadModelPath();
		assertExistingFile(resolvedPath, 'VAD model');
		return resolvedPath;
		}
		export async function ensureVadModel() {
		@@ -204,0 +258,0 @@ const modelPath = getVadModelPath();

+2

-0

distribution/source/asr/stream-asr.d.ts

		@@ -12,2 +12,3 @@ import type { SenseVoiceLanguage } from './asr.js';
		export type VadOptions = {
		modelPath?: string \| undefined;
		threshold?: number;
		@@ -23,2 +24,3 @@ minSpeechDuration?: number;
		language?: SenseVoiceLanguage;
		modelPath?: string \| undefined;
		vad?: boolean \| VadOptions;
		@@ -25,0 +27,0 @@ onResult: (result: AsrStreamResult) => void;

+10

-9

distribution/source/asr/stream-asr.js

		import { createRequire } from 'node:module';
		import path from 'node:path';
		import { getModelPath, getVadModelPath } from './models.js';
		import { resolveAsrModelFiles, resolveVadModelFile, } from './models.js';
		const require = createRequire(import.meta.url);
		@@ -13,4 +12,3 @@ let _sherpaOnnx;
		const defaultAsrIntervalMs = 1000;
		function createRecognizer(language) {
		const modelDir = getModelPath('sensevoice');
		function createRecognizer(modelFiles, language) {
		const onnx = sherpaOnnx();
		@@ -21,7 +19,7 @@ return new onnx.OfflineRecognizer({
		senseVoice: {
		model: path.join(modelDir, 'model.int8.onnx'),
		model: modelFiles.model,
		useInverseTextNormalization: 1,
		language: language ?? 'auto',
		},
		tokens: path.join(modelDir, 'tokens.txt'),
		tokens: modelFiles.tokens,
		numThreads: 2,
		@@ -53,5 +51,6 @@ provider: 'cpu',
		const onnx = sherpaOnnx();
		const modelPath = resolveVadModelFile(vadOptions.modelPath);
		return new onnx.Vad({
		sileroVad: {
		model: getVadModelPath(),
		model: modelPath,
		threshold: vadOptions.threshold ?? 0.5,
		@@ -75,3 +74,4 @@ minSpeechDuration: vadOptions.minSpeechDuration ?? 0.25,
		async function streamWithVad(audio, options, vadOptions) {
		const recognizer = createRecognizer(options.language);
		const modelFiles = resolveAsrModelFiles('sensevoice', options.modelPath);
		const recognizer = createRecognizer(modelFiles.files, options.language);
		const vad = createVad(vadOptions);
		@@ -110,3 +110,4 @@ const { windowSize } = vad.config.sileroVad;
		const chunkInterval = (defaultSampleRate * intervalMs) / 1000;
		const recognizer = createRecognizer(options.language);
		const modelFiles = resolveAsrModelFiles('sensevoice', options.modelPath);
		const recognizer = createRecognizer(modelFiles.files, options.language);
		const buffers = [];
		@@ -113,0 +114,0 @@ let totalSamples = 0;

+44

-20

docs/asr.md

		@@ -21,2 +21,5 @@ # ASR (Automatic Speech Recognition)

		# Use a local model file or directory without downloading
		coli asr --model-path /path/to/sensevoice/model.int8.onnx recording.wav

		# Specify language (sensevoice only)
		@@ -31,2 +34,3 @@ coli asr --language zh recording.wav
		--model Model to use: whisper, sensevoice (default: sensevoice)
		--model-path Path to a local model file or directory
		--language Language for sensevoice: auto, zh, en, ja, ko, yue (default: auto)
		@@ -49,2 +53,5 @@ ```

		# Use local ASR and VAD models without downloading
		ffmpeg -f avfoundation -i :0 -ar 16000 -ac 1 -f s16le pipe:1 \| coli asr-stream --model-path /path/to/sensevoice/model.int8.onnx --vad --vad-model-path /path/to/silero_vad.onnx

		# From a file
		@@ -59,2 +66,4 @@ ffmpeg -i podcast.m4a -ar 16000 -ac 1 -f s16le pipe:1 \| coli asr-stream --vad
		--vad Enable voice activity detection
		--model-path <path> Path to a local SenseVoice model file or directory
		--vad-model-path <path> Path to a local VAD model file
		--language <lang> Language for sensevoice: auto, zh, en, ja, ko, yue (default: auto)
		@@ -122,2 +131,8 @@ --asr-interval-ms <ms> Recognition interval in ms (default: 1000, ignored with --vad)

		// Custom model path: no download, fails if local files are missing
		await runAsr(
		{sampleRate: 16000, samples: myFloat32Array},
		{json: false, model: 'sensevoice', modelPath: '/path/to/model.int8.onnx'},
		);

		// Deprecated: file path input (requires ffmpeg for non-WAV formats)
		@@ -129,7 +144,8 @@ await runAsr('recording.m4a', {json: false, model: 'sensevoice'});

		\| Property \| Type \| Description \|
		\| ---------- \| --------------------------- \| --------------------------------------------------------------------------------------------------- \|
		\| `json` \| `boolean` \| Output JSON (with model name, tokens, timestamps, etc.) instead of plain text \|
		\| `model` \| `'whisper' \\| 'sensevoice'` \| Which model to use for recognition \|
		\| `language` \| `SenseVoiceLanguage` \| Language hint for sensevoice: `'auto'`, `'zh'`, `'en'`, `'ja'`, `'ko'`, `'yue'` (default: `'auto'`) \|
		\| Property \| Type \| Description \|
		\| ----------- \| --------------------------- \| ------------------------------------------------------------------------------------------------------- \|
		\| `json` \| `boolean` \| Output JSON (with model name, tokens, timestamps, etc.) instead of plain text \|
		\| `model` \| `'whisper' \\| 'sensevoice'` \| Which model to use for recognition \|
		\| `modelPath` \| `string` \| Path to a local model file or directory. Skips download and throws if required local files are missing. \|
		\| `language` \| `SenseVoiceLanguage` \| Language hint for sensevoice: `'auto'`, `'zh'`, `'en'`, `'ja'`, `'ko'`, `'yue'` (default: `'auto'`) \|

		@@ -178,2 +194,3 @@ ### `getModelPath(model)`
		await streamAsr(audioSource, {
		modelPath: '/path/to/model.int8.onnx',
		onResult(result) {
		@@ -195,3 +212,8 @@ console.log(result.text, result.isFinal ? '(final)' : '(partial)');
		await streamAsr(audioSource, {
		vad: {threshold: 0.4, minSilenceDuration: 0.3, maxSpeechDuration: 10},
		vad: {
		modelPath: '/path/to/silero_vad.onnx',
		threshold: 0.4,
		minSilenceDuration: 0.3,
		maxSpeechDuration: 10,
		},
		onResult(result) {
		@@ -205,19 +227,21 @@ console.log(result.text);

		\| Property \| Type \| Description \|
		\| --------------- \| ----------------------------------- \| --------------------------------------------------------------------------------------------------- \|
		\| `onResult` \| `(result: AsrStreamResult) => void` \| Callback invoked with each recognition result \|
		\| `sampleRate` \| `number` \| Audio sample rate in Hz (default: `16000`) \|
		\| `language` \| `SenseVoiceLanguage` \| Language hint for sensevoice: `'auto'`, `'zh'`, `'en'`, `'ja'`, `'ko'`, `'yue'` (default: `'auto'`) \|
		\| `asrIntervalMs` \| `number` \| Recognition interval in milliseconds (default: `1000`). Ignored when using VAD \|
		\| `vad` \| `boolean \\| VadOptions` \| Enable VAD. Pass `true` for defaults or a `VadOptions` object \|
		\| Property \| Type \| Description \|
		\| --------------- \| ----------------------------------- \| ------------------------------------------------------------------------------------------------------------------ \|
		\| `onResult` \| `(result: AsrStreamResult) => void` \| Callback invoked with each recognition result \|
		\| `sampleRate` \| `number` \| Audio sample rate in Hz (default: `16000`) \|
		\| `language` \| `SenseVoiceLanguage` \| Language hint for sensevoice: `'auto'`, `'zh'`, `'en'`, `'ja'`, `'ko'`, `'yue'` (default: `'auto'`) \|
		\| `modelPath` \| `string` \| Path to a local SenseVoice model file or directory. Skips download and throws if required local files are missing. \|
		\| `asrIntervalMs` \| `number` \| Recognition interval in milliseconds (default: `1000`). Ignored when using VAD \|
		\| `vad` \| `boolean \\| VadOptions` \| Enable VAD. Pass `true` for defaults or a `VadOptions` object \|

		VadOptions

		\| Property \| Type \| Description \|
		\| ---------------------- \| --------- \| ------------------------------------------------------------------ \|
		\| `threshold` \| `number` \| Speech detection threshold (default: `0.5`) \|
		\| `minSpeechDuration` \| `number` \| Minimum speech duration in seconds (default: `0.25`) \|
		\| `minSilenceDuration` \| `number` \| Minimum silence to end a segment in seconds (default: `0.5`) \|
		\| `maxSpeechDuration` \| `number` \| Maximum speech segment duration in seconds (default: `15`) \|
		\| `enableExternalBuffer` \| `boolean` \| Use external buffer for VAD speech segments (default: `undefined`) \|
		\| Property \| Type \| Description \|
		\| ---------------------- \| --------- \| --------------------------------------------------------------------- \|
		\| `modelPath` \| `string` \| Path to a local VAD model file. Skips download and throws if missing. \|
		\| `threshold` \| `number` \| Speech detection threshold (default: `0.5`) \|
		\| `minSpeechDuration` \| `number` \| Minimum speech duration in seconds (default: `0.25`) \|
		\| `minSilenceDuration` \| `number` \| Minimum silence to end a segment in seconds (default: `0.5`) \|
		\| `maxSpeechDuration` \| `number` \| Maximum speech segment duration in seconds (default: `15`) \|
		\| `enableExternalBuffer` \| `boolean` \| Use external buffer for VAD speech segments (default: `undefined`) \|

		@@ -224,0 +248,0 @@ Result

+1

-1

package.json

		{
		"name": "@marswave/coli",
		"private": false,
		"version": "0.0.19",
		"version": "0.0.20",
		"description": "A CLI for the Cola",
		@@ -6,0 +6,0 @@ "repository": "marswaveai/coli",

		@@ -21,2 +21,5 @@ # ASR (Automatic Speech Recognition)

		# Use a local model file or directory without downloading
		coli asr --model-path /path/to/sensevoice/model.int8.onnx recording.wav

		# Specify language (sensevoice only)
		@@ -31,2 +34,3 @@ coli asr --language zh recording.wav
		--model Model to use: whisper, sensevoice (default: sensevoice)
		--model-path Path to a local model file or directory
		--language Language for sensevoice: auto, zh, en, ja, ko, yue (default: auto)
		@@ -49,2 +53,5 @@ ```

		# Use local ASR and VAD models without downloading
		ffmpeg -f avfoundation -i :0 -ar 16000 -ac 1 -f s16le pipe:1 \| coli asr-stream --model-path /path/to/sensevoice/model.int8.onnx --vad --vad-model-path /path/to/silero_vad.onnx

		# From a file
		@@ -59,2 +66,4 @@ ffmpeg -i podcast.m4a -ar 16000 -ac 1 -f s16le pipe:1 \| coli asr-stream --vad
		--vad Enable voice activity detection
		--model-path <path> Path to a local SenseVoice model file or directory
		--vad-model-path <path> Path to a local VAD model file
		--language <lang> Language for sensevoice: auto, zh, en, ja, ko, yue (default: auto)
		@@ -122,2 +131,8 @@ --asr-interval-ms <ms> Recognition interval in ms (default: 1000, ignored with --vad)

		// Custom model path: no download, fails if local files are missing
		await runAsr(
		{sampleRate: 16000, samples: myFloat32Array},
		{json: false, model: 'sensevoice', modelPath: '/path/to/model.int8.onnx'},
		);

		// Deprecated: file path input (requires ffmpeg for non-WAV formats)
		@@ -129,7 +144,8 @@ await runAsr('recording.m4a', {json: false, model: 'sensevoice'});

		\| Property \| Type \| Description \|
		\| ---------- \| --------------------------- \| --------------------------------------------------------------------------------------------------- \|
		\| `json` \| `boolean` \| Output JSON (with model name, tokens, timestamps, etc.) instead of plain text \|
		\| `model` \| `'whisper' \\| 'sensevoice'` \| Which model to use for recognition \|
		\| `language` \| `SenseVoiceLanguage` \| Language hint for sensevoice: `'auto'`, `'zh'`, `'en'`, `'ja'`, `'ko'`, `'yue'` (default: `'auto'`) \|
		\| Property \| Type \| Description \|
		\| ----------- \| --------------------------- \| ------------------------------------------------------------------------------------------------------- \|
		\| `json` \| `boolean` \| Output JSON (with model name, tokens, timestamps, etc.) instead of plain text \|
		\| `model` \| `'whisper' \\| 'sensevoice'` \| Which model to use for recognition \|
		\| `modelPath` \| `string` \| Path to a local model file or directory. Skips download and throws if required local files are missing. \|
		\| `language` \| `SenseVoiceLanguage` \| Language hint for sensevoice: `'auto'`, `'zh'`, `'en'`, `'ja'`, `'ko'`, `'yue'` (default: `'auto'`) \|

		@@ -178,2 +194,3 @@ ### `getModelPath(model)`
		await streamAsr(audioSource, {
		modelPath: '/path/to/model.int8.onnx',
		onResult(result) {
		@@ -195,3 +212,8 @@ console.log(result.text, result.isFinal ? '(final)' : '(partial)');
		await streamAsr(audioSource, {
		vad: {threshold: 0.4, minSilenceDuration: 0.3, maxSpeechDuration: 10},
		vad: {
		modelPath: '/path/to/silero_vad.onnx',
		threshold: 0.4,
		minSilenceDuration: 0.3,
		maxSpeechDuration: 10,
		},
		onResult(result) {
		@@ -205,19 +227,21 @@ console.log(result.text);

		\| Property \| Type \| Description \|
		\| --------------- \| ----------------------------------- \| --------------------------------------------------------------------------------------------------- \|
		\| `onResult` \| `(result: AsrStreamResult) => void` \| Callback invoked with each recognition result \|
		\| `sampleRate` \| `number` \| Audio sample rate in Hz (default: `16000`) \|
		\| `language` \| `SenseVoiceLanguage` \| Language hint for sensevoice: `'auto'`, `'zh'`, `'en'`, `'ja'`, `'ko'`, `'yue'` (default: `'auto'`) \|
		\| `asrIntervalMs` \| `number` \| Recognition interval in milliseconds (default: `1000`). Ignored when using VAD \|
		\| `vad` \| `boolean \\| VadOptions` \| Enable VAD. Pass `true` for defaults or a `VadOptions` object \|
		\| Property \| Type \| Description \|
		\| --------------- \| ----------------------------------- \| ------------------------------------------------------------------------------------------------------------------ \|
		\| `onResult` \| `(result: AsrStreamResult) => void` \| Callback invoked with each recognition result \|
		\| `sampleRate` \| `number` \| Audio sample rate in Hz (default: `16000`) \|
		\| `language` \| `SenseVoiceLanguage` \| Language hint for sensevoice: `'auto'`, `'zh'`, `'en'`, `'ja'`, `'ko'`, `'yue'` (default: `'auto'`) \|
		\| `modelPath` \| `string` \| Path to a local SenseVoice model file or directory. Skips download and throws if required local files are missing. \|
		\| `asrIntervalMs` \| `number` \| Recognition interval in milliseconds (default: `1000`). Ignored when using VAD \|
		\| `vad` \| `boolean \\| VadOptions` \| Enable VAD. Pass `true` for defaults or a `VadOptions` object \|

		VadOptions

		\| Property \| Type \| Description \|
		\| ---------------------- \| --------- \| ------------------------------------------------------------------ \|
		\| `threshold` \| `number` \| Speech detection threshold (default: `0.5`) \|
		\| `minSpeechDuration` \| `number` \| Minimum speech duration in seconds (default: `0.25`) \|
		\| `minSilenceDuration` \| `number` \| Minimum silence to end a segment in seconds (default: `0.5`) \|
		\| `maxSpeechDuration` \| `number` \| Maximum speech segment duration in seconds (default: `15`) \|
		\| `enableExternalBuffer` \| `boolean` \| Use external buffer for VAD speech segments (default: `undefined`) \|
		\| Property \| Type \| Description \|
		\| ---------------------- \| --------- \| --------------------------------------------------------------------- \|
		\| `modelPath` \| `string` \| Path to a local VAD model file. Skips download and throws if missing. \|
		\| `threshold` \| `number` \| Speech detection threshold (default: `0.5`) \|
		\| `minSpeechDuration` \| `number` \| Minimum speech duration in seconds (default: `0.25`) \|
		\| `minSilenceDuration` \| `number` \| Minimum silence to end a segment in seconds (default: `0.5`) \|
		\| `maxSpeechDuration` \| `number` \| Maximum speech segment duration in seconds (default: `15`) \|
		\| `enableExternalBuffer` \| `boolean` \| Use external buffer for VAD speech segments (default: `undefined`) \|

		@@ -224,0 +248,0 @@ Result

@marswave/coli - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics