@pr0gramm/fluester
Advanced tools
Comparing version 0.3.7 to 0.3.8
@@ -0,13 +1,41 @@ | ||
import { ModelName } from "./model.js"; | ||
import { TranscriptLine } from "./tsToArray.js"; | ||
import { FlagTypes } from "./whisper.js"; | ||
export interface WhisperOptions { | ||
modelName?: string; | ||
modelPath?: string; | ||
export interface WhisperClientOptions { | ||
/** Path to the whisper executable */ | ||
executablePath?: string; | ||
} | ||
export interface WhisperOptionsBase { | ||
whisperOptions?: FlagTypes; | ||
} | ||
/** | ||
* @param filePath Path to audio file. | ||
* @param options Whisper options. | ||
* @throws Some error if execution failed. | ||
*/ | ||
export declare function whisper(filePath: string, options?: WhisperOptions): Promise<TranscriptLine[]>; | ||
export interface WhisperOptionsWithModelPath extends WhisperOptionsBase { | ||
modelPath: string; | ||
} | ||
export interface WhisperOptionsWithModelName extends WhisperOptionsBase { | ||
/** | ||
* Name of model stored in `node_modules/@pr0gramm/fluester/lib/whisper.cpp/models` | ||
* | ||
* The name you entered when downloading the model. | ||
*/ | ||
modelName: ModelName; | ||
} | ||
export type WhisperOptions = WhisperOptionsWithModelPath | WhisperOptionsWithModelName; | ||
export interface WhisperClient { | ||
translate: (filePath: string, options: WhisperOptions) => Promise<TranscriptLine[]>; | ||
} | ||
export declare function createWhisperClient(options: WhisperClientOptions): WhisperClient; | ||
export interface CppCommandTypes { | ||
filePath: string; | ||
modelName?: ModelName; | ||
modelPath?: string; | ||
options?: FlagTypes; | ||
} | ||
export interface FlagTypes { | ||
/** Build TXT? */ | ||
generateTxt?: boolean; | ||
/** Build SRT? */ | ||
generateSubtitles?: boolean; | ||
/** Build VTT? */ | ||
generateVtt?: boolean; | ||
timestampSize?: number; | ||
wordTimestamps?: boolean; | ||
} |
@@ -1,29 +0,62 @@ | ||
import path from "node:path"; | ||
import * as fs from "node:fs/promises"; | ||
import * as path from "node:path"; | ||
import { execute } from "./execute.js"; | ||
import { defaultExecutablePath, nodeModulesModelPath } from "./interop.js"; | ||
import { modelFileNames } from "./model.js"; | ||
import transcriptToArray from "./tsToArray.js"; | ||
import { buildExecCommand } from "./whisper.js"; | ||
/** | ||
* @param filePath Path to audio file. | ||
* @param options Whisper options. | ||
* @throws Some error if execution failed. | ||
*/ | ||
export async function whisper(filePath, options) { | ||
try { | ||
// todo: combine steps 1 & 2 into separate function called whisperCpp (createCppCommand + shell) | ||
// 1. create command string for whisper.cpp | ||
const command = buildExecCommand({ | ||
filePath: path.normalize(filePath), | ||
modelName: options?.modelName, | ||
modelPath: options?.modelPath, | ||
options: options?.whisperOptions, | ||
}); | ||
// 2. run command in whisper.cpp directory | ||
// todo: add return for continually updated progress value | ||
const transcript = await execute(...command); | ||
// 3. parse whisper response string into array | ||
return transcriptToArray(transcript.toString()); | ||
export function createWhisperClient(options) { | ||
const effectiveOptions = { | ||
executablePath: defaultExecutablePath, | ||
...options, | ||
}; | ||
return { | ||
// TODO | ||
translate: async (filePath, options) => { | ||
try { | ||
const modelPath = "modelPath" in options | ||
? options.modelPath | ||
: path.join(nodeModulesModelPath, modelFileNames[options.modelName]); | ||
if (!(await fs.stat(modelPath))) { | ||
throw new Error(`Model not found at "${modelPath}".`); | ||
} | ||
// 1. create command string for whisper.cpp | ||
const flags = options.whisperOptions | ||
? getFlags(options.whisperOptions) | ||
: []; | ||
const args = [...flags, "-m", modelPath, "-f", filePath]; | ||
// 2. run command in whisper.cpp directory | ||
// TODO: add return for continually updated progress value | ||
const transcript = await execute(effectiveOptions.executablePath, args); | ||
// 3. parse whisper response string into array | ||
return transcriptToArray(transcript.toString()); | ||
} | ||
catch (cause) { | ||
throw new Error("Error during whisper operation", { cause }); | ||
} | ||
}, | ||
}; | ||
} | ||
// option flags list: https://github.com/ggerganov/whisper.cpp/blob/master/README.md?plain=1#L91 | ||
function getFlags(flags) { | ||
const s = []; | ||
// output files | ||
if (flags.generateTxt) { | ||
s.push("-otxt"); | ||
} | ||
catch (cause) { | ||
throw new Error("Error during whisper operation", { cause }); | ||
if (flags.generateSubtitles) { | ||
s.push("-osrt"); | ||
} | ||
if (flags.generateVtt) { | ||
s.push("-ovtt"); | ||
} | ||
// timestamps | ||
if (flags.timestampSize) { | ||
s.push("-ml"); | ||
s.push(flags.timestampSize.toString()); | ||
} | ||
if (flags.wordTimestamps) { | ||
s.push("-ml"); | ||
s.push("1"); | ||
} | ||
return s; | ||
} |
@@ -9,3 +9,3 @@ #!/usr/bin/env node | ||
if (!(await canExecute(whisperCppMain))) { | ||
console.log("whisper.cpp not initialized. Compiling whisper.cpp..."); | ||
console.log("whisper.cpp not initialized, compiling..."); | ||
await execute("make"); | ||
@@ -22,3 +22,3 @@ if (!(await canExecute(whisperCppMain))) { | ||
else { | ||
console.error("Could not run whisper.cpp"); | ||
console.error("Could not test-run whisper.cpp"); | ||
process.exit(-1); | ||
@@ -25,0 +25,0 @@ } |
@@ -1,7 +0,8 @@ | ||
import { whisper } from "./index.js"; | ||
import { createWhisperClient } from "./index.js"; | ||
(async function run() { | ||
const client = createWhisperClient({}); | ||
try { | ||
const transcript = await whisper("/Users/Shared/twospeak_clip.wav", { | ||
const transcript = await client.translate("/Users/Shared/twospeak_clip.wav", { | ||
// modelPath: "/Users/Shared/custom-models/ggml-base.en.bin", | ||
// modelName: "base.en", | ||
modelName: "base.en", | ||
whisperOptions: { wordTimestamps: true }, | ||
@@ -8,0 +9,0 @@ }); |
{ | ||
"name": "@pr0gramm/fluester", | ||
"version": "0.3.7", | ||
"version": "0.3.8", | ||
"license": "MIT", | ||
@@ -25,3 +25,3 @@ "description": "Node.js bindings for OpenAI's Whisper. Optimized for CPU.", | ||
"@biomejs/biome": "^1.2.2", | ||
"@types/node": "^20.8.3", | ||
"@types/node": "^20.8.4", | ||
"bun": "^1.0.4", | ||
@@ -28,0 +28,0 @@ "typescript": "^5.2.2" |
@@ -33,6 +33,8 @@ # fluester – [ˈflʏstɐ] [![CI](https://github.com/pr0gramm-com/fluester/actions/workflows/CI.yml/badge.svg)](https://github.com/pr0gramm-com/fluester/actions/workflows/CI.yml) [![CD](https://github.com/pr0gramm-com/fluester/actions/workflows/CD.yml/badge.svg)](https://github.com/pr0gramm-com/fluester/actions/workflows/CD.yml) ![version](https://img.shields.io/npm/v/%40pr0gramm/fluester) ![downloads](https://img.shields.io/npm/dm/%40pr0gramm/fluester) | ||
```js | ||
import whisper from "@pr0gramm/fluester"; | ||
import { createWhisperClient } from "@pr0gramm/fluester"; | ||
const transcript = await whisper("example/sample.wav"); | ||
const client = createWhisperClient(); | ||
const transcript = await client.translate("example/sample.wav"); | ||
console.log(transcript); // output: [ {start,end,speech} ] | ||
@@ -39,0 +41,0 @@ ``` |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
80
5311587
299
4129