@pr0gramm/fluester - npm Package Compare versions

Comparing version 0.3.9 to 0.3.10

dist/index.d.ts

		import { ModelName } from "./model.js";
		import { TranscriptLine } from "./transcript.js";
		export interface WhisperClientOptions {
		import { LanguageDetectionResult, TranscriptLine } from "./transcript.js";
		export interface WhisperClientOptionsBase {
		/** Path to the whisper executable */
		executablePath?: string;
		}
		export interface WhisperOptionsBase {
		whisperOptions?: FlagTypes;
		}
		export interface WhisperOptionsWithModelPath extends WhisperOptionsBase {
		modelPath: string;
		}
		export interface WhisperOptionsWithModelName extends WhisperOptionsBase {
		export interface WhisperClientOptionsWithModelName extends WhisperClientOptionsBase {
		/**
		@@ -21,3 +15,10 @@ * Name of model stored in `node_modules/@pr0gramm/fluester/lib/whisper.cpp/models`
		}
		export type WhisperOptions = WhisperOptionsWithModelPath \| WhisperOptionsWithModelName;
		export interface WhisperClientOptionsWithModelPath extends WhisperClientOptionsBase {
		modelPath: string;
		}
		export type WhisperClientOptions = WhisperClientOptionsWithModelName \| WhisperClientOptionsWithModelPath;
		export interface WhisperOptions {
		whisperOptions?: FlagTypes;
		sourceLanguage?: string;
		}
		export interface WhisperClient {
		@@ -30,2 +31,3 @@ /**
		translate: (filePath: string, options: WhisperOptions) => Promise<TranscriptLine[]>;
		detectLanguage: (filePath: string) => Promise<LanguageDetectionResult \| undefined>;
		}
		@@ -32,0 +34,0 @@ export declare function createWhisperClient(options: WhisperClientOptions): WhisperClient;

dist/index.js

		@@ -6,3 +6,3 @@ import * as fs from "node:fs/promises";
		import { modelFileNames } from "./model.js";
		import transcriptToArray from "./transcript.js";
		import transcriptToArray, { parseDetectedLanguage, } from "./transcript.js";
		export function createWhisperClient(options) {
		@@ -12,16 +12,13 @@ const effectiveOptions = {
		...options,
		modelPath: getModelPath(options),
		};
		async function ensureModel() {
		if (!(await fs.stat(effectiveOptions.modelPath))) {
		throw new Error(`Model not found at "${effectiveOptions.modelPath}".`);
		}
		}
		return {
		// TODO
		/**
		* @param filePath The audio file to translate.
		* @param options
		* @returns English translation of the audio file. If it's already english, it will be a transcription.
		*/
		translate: async (filePath, options) => {
		await ensureModel();
		try {
		const modelPath = getModelPath(options);
		if (!(await fs.stat(modelPath))) {
		throw new Error(`Model not found at "${modelPath}".`);
		}
		// 1. create command string for whisper.cpp
		@@ -31,3 +28,9 @@ const flags = options.whisperOptions
		: [];
		const args = [...flags, "-m", modelPath, "-f", filePath];
		const args = [
		...flags,
		"-m",
		effectiveOptions.modelPath,
		"-f",
		filePath,
		];
		// 2. run command in whisper.cpp directory
		@@ -43,2 +46,13 @@ // TODO: add return for continually updated progress value
		},
		detectLanguage: async (filePath) => {
		await ensureModel();
		const result = await execute(effectiveOptions.executablePath, [
		"--detect-language",
		"-m",
		effectiveOptions.modelPath,
		filePath,
		]);
		// TODO: Check for probability threshold
		return parseDetectedLanguage(result.stderr.toString());
		},
		};
		@@ -45,0 +59,0 @@ }

dist/test.js

		import { createWhisperClient } from "./index.js";
		(async function run() {
		const client = createWhisperClient({});
		const client = createWhisperClient({
		modelName: "base.en",
		});
		try {
		const transcript = await client.translate("/Users/Shared/twospeak_clip.wav", {
		// modelPath: "/Users/Shared/custom-models/ggml-base.en.bin",
		modelName: "base.en",
		whisperOptions: { wordTimestamps: true },
		@@ -9,0 +10,0 @@ });

dist/transcript.d.ts

		@@ -7,1 +7,6 @@ export interface TranscriptLine {
		export default function parseTranscript(vtt: string): TranscriptLine[];
		export interface LanguageDetectionResult {
		language: string;
		probability: number;
		}
		export declare function parseDetectedLanguage(output: string): LanguageDetectionResult \| undefined;

dist/transcript.js

		@@ -17,1 +17,15 @@ export default function parseTranscript(vtt) {
		}
		export function parseDetectedLanguage(output) {
		// whisper.cpp appears to use two-letter-country-code:
		// https://github.com/ggerganov/whisper.cpp/blob/940cdb13964a563d86c7dc6e160a43ec89b8bb2e/whisper.cpp#L195-L295
		// Example line:
		// whisper_full_with_state: auto-detected language: en (p = 0.958819)
		const res = /auto-detected language: (\w\w)\s$p\s=\s*(\d+\.\d+)$/.exec(output);
		if (!res) {
		return undefined;
		}
		return {
		language: res[1].toLowerCase(),
		probability: Number(res[2].trim()),
		};
		}

package.json

		{
		"name": "@pr0gramm/fluester",
		"version": "0.3.9",
		"version": "0.3.10",
		"license": "MIT",
		@@ -5,0 +5,0 @@ "description": "Node.js bindings for OpenAI's Whisper. Optimized for CPU.",

README.md

		@@ -1,3 +0,2 @@
		# fluester – [ˈflʏstɐ] [![CI](https://github.com/pr0gramm-com/fluester/actions/workflows/CI.yml/badge.svg)](https://github.com/pr0gramm-com/fluester/actions/workflows/CI.yml) [![CD](https://github.com/pr0gramm-com/fluester/actions/workflows/CD.yml/badge.svg)](https://github.com/pr0gramm-com/fluester/actions/workflows/CD.yml) ![version](https://img.shields.io/npm/v/%40pr0gramm/fluester) ![downloads](https://img.shields.io/npm/dm/%40pr0gramm/fluester)
		![License](https://img.shields.io/npm/l/%40pr0gramm%2Ffluester)
		# fluester – [ˈflʏstɐ] [![CI](https://github.com/pr0gramm-com/fluester/actions/workflows/CI.yml/badge.svg)](https://github.com/pr0gramm-com/fluester/actions/workflows/CI.yml) [![CD](https://github.com/pr0gramm-com/fluester/actions/workflows/CD.yml/badge.svg)](https://github.com/pr0gramm-com/fluester/actions/workflows/CD.yml) ![version](https://img.shields.io/npm/v/%40pr0gramm/fluester) ![downloads](https://img.shields.io/npm/dm/%40pr0gramm/fluester) ![License](https://img.shields.io/npm/l/%40pr0gramm%2Ffluester)

		@@ -32,6 +31,9 @@ Node.js bindings for OpenAI's Whisper. Hard-fork of [whisper-node](https://github.com/ariym/whisper-node).
		## Usage
		### Translation
		```js
		import { createWhisperClient } from "@pr0gramm/fluester";

		const client = createWhisperClient();
		const client = createWhisperClient({
		modelName: "base",
		});

		@@ -43,3 +45,3 @@ const transcript = await client.translate("example/sample.wav");

		### Output (JSON)
		#### Output (JSON)
		```js
		@@ -55,21 +57,16 @@ [

		### Usage with Additional Options
		### Language Detection
		```js
		import whisper from "@pr0gramm/fluester";
		import { createWhisperClient } from "@pr0gramm/fluester";

		const filePath = "example/sample.wav", // required
		const client = createWhisperClient({
		modelName: "base",
		});

		const options = {
		modelName: "tiny.en", // default
		modelPath: "/custom/path/to/model.bin", // use model in a custom directory
		whisperOptions: {
		generateTxt: false, // outputs .txt file
		generateSubtitles: false, // outputs .srt file
		generateVtt: false, // outputs .vtt file
		timestampSize: 10, // amount of dialogue per timestamp pair
		wordTimestamps: true // timestamp for every word
		}
		const result = await client.detectLanguage("example/sample.wav");
		if(!result) {
		console.log(`Detected: ${result.language} with probability ${result.probability}`);
		} else {
		console.log("Did not detect anything :(");
		}

		const transcript = await whisper(filePath, options);
		```
		@@ -76,0 +73,0 @@

@pr0gramm/fluester - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics