@pr0gramm/fluester
Advanced tools
Comparing version 0.3.9 to 0.3.10
import { ModelName } from "./model.js"; | ||
import { TranscriptLine } from "./transcript.js"; | ||
export interface WhisperClientOptions { | ||
import { LanguageDetectionResult, TranscriptLine } from "./transcript.js"; | ||
export interface WhisperClientOptionsBase { | ||
/** Path to the whisper executable */ | ||
executablePath?: string; | ||
} | ||
export interface WhisperOptionsBase { | ||
whisperOptions?: FlagTypes; | ||
} | ||
export interface WhisperOptionsWithModelPath extends WhisperOptionsBase { | ||
modelPath: string; | ||
} | ||
export interface WhisperOptionsWithModelName extends WhisperOptionsBase { | ||
export interface WhisperClientOptionsWithModelName extends WhisperClientOptionsBase { | ||
/** | ||
@@ -21,3 +15,10 @@ * Name of model stored in `node_modules/@pr0gramm/fluester/lib/whisper.cpp/models` | ||
} | ||
export type WhisperOptions = WhisperOptionsWithModelPath | WhisperOptionsWithModelName; | ||
export interface WhisperClientOptionsWithModelPath extends WhisperClientOptionsBase { | ||
modelPath: string; | ||
} | ||
export type WhisperClientOptions = WhisperClientOptionsWithModelName | WhisperClientOptionsWithModelPath; | ||
export interface WhisperOptions { | ||
whisperOptions?: FlagTypes; | ||
sourceLanguage?: string; | ||
} | ||
export interface WhisperClient { | ||
@@ -30,2 +31,3 @@ /** | ||
translate: (filePath: string, options: WhisperOptions) => Promise<TranscriptLine[]>; | ||
detectLanguage: (filePath: string) => Promise<LanguageDetectionResult | undefined>; | ||
} | ||
@@ -32,0 +34,0 @@ export declare function createWhisperClient(options: WhisperClientOptions): WhisperClient; |
@@ -6,3 +6,3 @@ import * as fs from "node:fs/promises"; | ||
import { modelFileNames } from "./model.js"; | ||
import transcriptToArray from "./transcript.js"; | ||
import transcriptToArray, { parseDetectedLanguage, } from "./transcript.js"; | ||
export function createWhisperClient(options) { | ||
@@ -12,16 +12,13 @@ const effectiveOptions = { | ||
...options, | ||
modelPath: getModelPath(options), | ||
}; | ||
async function ensureModel() { | ||
if (!(await fs.stat(effectiveOptions.modelPath))) { | ||
throw new Error(`Model not found at "${effectiveOptions.modelPath}".`); | ||
} | ||
} | ||
return { | ||
// TODO | ||
/** | ||
* @param filePath The audio file to translate. | ||
* @param options | ||
* @returns English translation of the audio file. If it's already english, it will be a transcription. | ||
*/ | ||
translate: async (filePath, options) => { | ||
await ensureModel(); | ||
try { | ||
const modelPath = getModelPath(options); | ||
if (!(await fs.stat(modelPath))) { | ||
throw new Error(`Model not found at "${modelPath}".`); | ||
} | ||
// 1. create command string for whisper.cpp | ||
@@ -31,3 +28,9 @@ const flags = options.whisperOptions | ||
: []; | ||
const args = [...flags, "-m", modelPath, "-f", filePath]; | ||
const args = [ | ||
...flags, | ||
"-m", | ||
effectiveOptions.modelPath, | ||
"-f", | ||
filePath, | ||
]; | ||
// 2. run command in whisper.cpp directory | ||
@@ -43,2 +46,13 @@ // TODO: add return for continually updated progress value | ||
}, | ||
detectLanguage: async (filePath) => { | ||
await ensureModel(); | ||
const result = await execute(effectiveOptions.executablePath, [ | ||
"--detect-language", | ||
"-m", | ||
effectiveOptions.modelPath, | ||
filePath, | ||
]); | ||
// TODO: Check for probability threshold | ||
return parseDetectedLanguage(result.stderr.toString()); | ||
}, | ||
}; | ||
@@ -45,0 +59,0 @@ } |
import { createWhisperClient } from "./index.js"; | ||
(async function run() { | ||
const client = createWhisperClient({}); | ||
const client = createWhisperClient({ | ||
modelName: "base.en", | ||
}); | ||
try { | ||
const transcript = await client.translate("/Users/Shared/twospeak_clip.wav", { | ||
// modelPath: "/Users/Shared/custom-models/ggml-base.en.bin", | ||
modelName: "base.en", | ||
whisperOptions: { wordTimestamps: true }, | ||
@@ -9,0 +10,0 @@ }); |
@@ -7,1 +7,6 @@ export interface TranscriptLine { | ||
export default function parseTranscript(vtt: string): TranscriptLine[]; | ||
export interface LanguageDetectionResult { | ||
language: string; | ||
probability: number; | ||
} | ||
export declare function parseDetectedLanguage(output: string): LanguageDetectionResult | undefined; |
@@ -17,1 +17,15 @@ export default function parseTranscript(vtt) { | ||
} | ||
export function parseDetectedLanguage(output) { | ||
// whisper.cpp appears to use two-letter-country-code: | ||
// https://github.com/ggerganov/whisper.cpp/blob/940cdb13964a563d86c7dc6e160a43ec89b8bb2e/whisper.cpp#L195-L295 | ||
// Example line: | ||
// whisper_full_with_state: auto-detected language: en (p = 0.958819) | ||
const res = /auto-detected language: (\w\w)\s*\(p\s*=\s*(\d+\.\d+)\)/.exec(output); | ||
if (!res) { | ||
return undefined; | ||
} | ||
return { | ||
language: res[1].toLowerCase(), | ||
probability: Number(res[2].trim()), | ||
}; | ||
} |
{ | ||
"name": "@pr0gramm/fluester", | ||
"version": "0.3.9", | ||
"version": "0.3.10", | ||
"license": "MIT", | ||
@@ -5,0 +5,0 @@ "description": "Node.js bindings for OpenAI's Whisper. Optimized for CPU.", |
@@ -1,3 +0,2 @@ | ||
# fluester – [ˈflʏstɐ] [![CI](https://github.com/pr0gramm-com/fluester/actions/workflows/CI.yml/badge.svg)](https://github.com/pr0gramm-com/fluester/actions/workflows/CI.yml) [![CD](https://github.com/pr0gramm-com/fluester/actions/workflows/CD.yml/badge.svg)](https://github.com/pr0gramm-com/fluester/actions/workflows/CD.yml) ![version](https://img.shields.io/npm/v/%40pr0gramm/fluester) ![downloads](https://img.shields.io/npm/dm/%40pr0gramm/fluester) | ||
![License](https://img.shields.io/npm/l/%40pr0gramm%2Ffluester) | ||
# fluester – [ˈflʏstɐ] [![CI](https://github.com/pr0gramm-com/fluester/actions/workflows/CI.yml/badge.svg)](https://github.com/pr0gramm-com/fluester/actions/workflows/CI.yml) [![CD](https://github.com/pr0gramm-com/fluester/actions/workflows/CD.yml/badge.svg)](https://github.com/pr0gramm-com/fluester/actions/workflows/CD.yml) ![version](https://img.shields.io/npm/v/%40pr0gramm/fluester) ![downloads](https://img.shields.io/npm/dm/%40pr0gramm/fluester) ![License](https://img.shields.io/npm/l/%40pr0gramm%2Ffluester) | ||
@@ -32,6 +31,9 @@ Node.js bindings for OpenAI's Whisper. Hard-fork of [whisper-node](https://github.com/ariym/whisper-node). | ||
## Usage | ||
### Translation | ||
```js | ||
import { createWhisperClient } from "@pr0gramm/fluester"; | ||
const client = createWhisperClient(); | ||
const client = createWhisperClient({ | ||
modelName: "base", | ||
}); | ||
@@ -43,3 +45,3 @@ const transcript = await client.translate("example/sample.wav"); | ||
### Output (JSON) | ||
#### Output (JSON) | ||
```js | ||
@@ -55,21 +57,16 @@ [ | ||
### Usage with Additional Options | ||
### Language Detection | ||
```js | ||
import whisper from "@pr0gramm/fluester"; | ||
import { createWhisperClient } from "@pr0gramm/fluester"; | ||
const filePath = "example/sample.wav", // required | ||
const client = createWhisperClient({ | ||
modelName: "base", | ||
}); | ||
const options = { | ||
modelName: "tiny.en", // default | ||
modelPath: "/custom/path/to/model.bin", // use model in a custom directory | ||
whisperOptions: { | ||
generateTxt: false, // outputs .txt file | ||
generateSubtitles: false, // outputs .srt file | ||
generateVtt: false, // outputs .vtt file | ||
timestampSize: 10, // amount of dialogue per timestamp pair | ||
wordTimestamps: true // timestamp for every word | ||
} | ||
const result = await client.detectLanguage("example/sample.wav"); | ||
if(!result) { | ||
console.log(`Detected: ${result.language} with probability ${result.probability}`); | ||
} else { | ||
console.log("Did not detect anything :("); | ||
} | ||
const transcript = await whisper(filePath, options); | ||
``` | ||
@@ -76,0 +73,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
5314012
4178
99