@pr0gramm/fluester - npm Package Compare versions

lefthook.yml

lib/whisper.cpp/.devops/main-cuda.Dockerfile

lib/whisper.cpp/.devops/main.Dockerfile

lib/whisper.cpp/extra/sync-ggml-am.sh

lib/whisper.cpp/extra/sync-ggml.last

lib/whisper.cpp/extra/sync-llama.sh

lib/whisper.cpp/ggml-common.h

lib/whisper.cpp/ggml-cuda/acc.cu

lib/whisper.cpp/ggml-cuda/acc.cuh

lib/whisper.cpp/ggml-cuda/alibi.cu

lib/whisper.cpp/ggml-cuda/alibi.cuh

lib/whisper.cpp/ggml-cuda/arange.cu

lib/whisper.cpp/ggml-cuda/arange.cuh

lib/whisper.cpp/ggml-cuda/argsort.cu

lib/whisper.cpp/ggml-cuda/argsort.cuh

lib/whisper.cpp/ggml-cuda/binbcast.cu

lib/whisper.cpp/ggml-cuda/binbcast.cuh

lib/whisper.cpp/ggml-cuda/clamp.cu

lib/whisper.cpp/ggml-cuda/clamp.cuh

lib/whisper.cpp/ggml-cuda/common.cuh

lib/whisper.cpp/ggml-cuda/concat.cu

lib/whisper.cpp/ggml-cuda/concat.cuh

lib/whisper.cpp/ggml-cuda/convert.cu

lib/whisper.cpp/ggml-cuda/convert.cuh

lib/whisper.cpp/ggml-cuda/cpy.cu

lib/whisper.cpp/ggml-cuda/cpy.cuh

lib/whisper.cpp/ggml-cuda/dequantize.cuh

lib/whisper.cpp/ggml-cuda/diagmask.cu

lib/whisper.cpp/ggml-cuda/diagmask.cuh

lib/whisper.cpp/ggml-cuda/dmmv.cu

lib/whisper.cpp/ggml-cuda/dmmv.cuh

lib/whisper.cpp/ggml-cuda/getrows.cu

lib/whisper.cpp/ggml-cuda/getrows.cuh

lib/whisper.cpp/ggml-cuda/im2col.cu

lib/whisper.cpp/ggml-cuda/im2col.cuh

lib/whisper.cpp/ggml-cuda/mmq.cu

lib/whisper.cpp/ggml-cuda/mmq.cuh

lib/whisper.cpp/ggml-cuda/mmvq.cu

lib/whisper.cpp/ggml-cuda/mmvq.cuh

lib/whisper.cpp/ggml-cuda/norm.cu

lib/whisper.cpp/ggml-cuda/norm.cuh

lib/whisper.cpp/ggml-cuda/pad.cu

lib/whisper.cpp/ggml-cuda/pad.cuh

lib/whisper.cpp/ggml-cuda/pool2d.cu

lib/whisper.cpp/ggml-cuda/pool2d.cuh

lib/whisper.cpp/ggml-cuda/quantize.cu

lib/whisper.cpp/ggml-cuda/quantize.cuh

lib/whisper.cpp/ggml-cuda/rope.cu

lib/whisper.cpp/ggml-cuda/rope.cuh

lib/whisper.cpp/ggml-cuda/scale.cu

lib/whisper.cpp/ggml-cuda/scale.cuh

lib/whisper.cpp/ggml-cuda/softmax.cu

lib/whisper.cpp/ggml-cuda/softmax.cuh

lib/whisper.cpp/ggml-cuda/sumrows.cu

lib/whisper.cpp/ggml-cuda/sumrows.cuh

lib/whisper.cpp/ggml-cuda/tsembd.cu

lib/whisper.cpp/ggml-cuda/tsembd.cuh

lib/whisper.cpp/ggml-cuda/unary.cu

lib/whisper.cpp/ggml-cuda/unary.cuh

lib/whisper.cpp/ggml-cuda/upscale.cu

lib/whisper.cpp/ggml-cuda/upscale.cuh

lib/whisper.cpp/ggml-cuda/vecdotq.cuh

lib/whisper.cpp/ggml-kompute.cpp

lib/whisper.cpp/ggml-kompute.h

lib/whisper.cpp/ggml-sycl.cpp

lib/whisper.cpp/ggml-sycl.h

lib/whisper.cpp/ggml-vulkan.cpp

lib/whisper.cpp/ggml-vulkan.h

lib/whisper.cpp/models/requirements-coreml.txt

lib/whisper.cpp/models/requirements-openvino.txt

lib/whisper.cpp/README_sycl.md

2

lib/whisper.cpp/bindings/javascript/package.json

		{
		"name": "whisper.cpp",
		"version": "1.5.1",
		"version": "1.5.4",
		"description": "Whisper speech recognition",
		@@ -5,0 +5,0 @@ "main": "whisper.js",

4

lib/whisper.cpp/bindings/javascript/README.md

		@@ -44,3 +44,3 @@ # whisper.cpp

		```java
		```text
		$ node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
		@@ -67,3 +67,3 @@

		system_info: n_threads = 8 / 10 \| AVX = 0 \| AVX2 = 0 \| AVX512 = 0 \| NEON = 0 \| F16C = 0 \| FP16_VA = 0 \| WASM_SIMD = 1 \| BLAS = 0 \|
		system_info: n_threads = 8 / 10 \| AVX = 0 \| AVX2 = 0 \| AVX512 = 0 \| NEON = 0 \| F16C = 0 \| FP16_VA = 0 \| WASM_SIMD = 1 \| BLAS = 0 \|

		@@ -70,0 +70,0 @@ operator(): processing 176000 samples, 11.0 sec, 8 threads, 1 processors, lang = en, task = transcribe ...

201

lib/whisper.cpp/CMakeLists.txt

		cmake_minimum_required (VERSION 3.5)

		project(whisper.cpp VERSION 1.5.1)
		project(whisper.cpp VERSION 1.5.4)
		set(SOVERSION 1)

		@@ -71,9 +72,14 @@ # Add path to modules
		option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
		option(WHISPER_METAL_EMBED_LIBRARY "whisper: embed Metal library" OFF)
		else()
		option(WHISPER_BLAS "whisper: use BLAS libraries" OFF)
		option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic)
		option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF)
		option(WHISPER_CUBLAS "whisper: support for cuBLAS" OFF)
		option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF)
		option(WHISPER_CLBLAST "whisper: use CLBlast" OFF)
		option(WHISPER_BLAS "whisper: use BLAS libraries" OFF)
		option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic)
		option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF)
		option(WHISPER_OPENBLAS_INTERFACE64 "whisper: use OpenBLAS w/ 64-bit interface" OFF)
		option(WHISPER_CUDA "whisper: support for CUDA" OFF)
		option(WHISPER_CUBLAS "whisper: support for CUDA (deprecated)" OFF)
		option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF)
		option(WHISPER_CLBLAST "whisper: use CLBlast" OFF)
		option(WHISPER_SYCL "whisper: use SYCL" OFF)
		option(WHISPER_SYCL_F16 "whisper: use 16 bit floats for sycl calculations" OFF)
		endif()
		@@ -109,2 +115,9 @@

		#compile flag sycl
		if (WHISPER_SYCL)
		set(CMAKE_CXX_STANDARD 17)
		else()
		set(CMAKE_CXX_STANDARD 11)
		endif()

		# on APPLE
		@@ -120,3 +133,3 @@ if (APPLE)
		set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
		set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
		set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64)
		else()
		@@ -151,4 +164,29 @@ message(FATAL_ERROR "Accelerate framework not found")

		# copy ggml-metal.metal to bin directory
		# copy ggml-common.h and ggml-metal.metal to bin directory
		configure_file(ggml-common.h bin/ggml-common.h COPYONLY)
		configure_file(ggml-metal.metal bin/ggml-metal.metal COPYONLY)

		if (WHISPER_METAL_EMBED_LIBRARY)
		enable_language(ASM)
		set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_METAL_EMBED_LIBRARY)

		set(METALLIB_SOURCE "${CMAKE_SOURCE_DIR}/ggml-metal.metal")

		file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
		set(EMBED_METALLIB_ASSEMBLY "${CMAKE_BINARY_DIR}/autogenerated/ggml-embed-metallib.s")

		add_custom_command(
		OUTPUT ${EMBED_METALLIB_ASSEMBLY}
		COMMAND echo ".section __DATA,__ggml_metallib" > ${EMBED_METALLIB_ASSEMBLY}
		COMMAND echo ".globl _ggml_metallib_start" >> ${EMBED_METALLIB_ASSEMBLY}
		COMMAND echo "_ggml_metallib_start:" >> ${EMBED_METALLIB_ASSEMBLY}
		COMMAND echo ".incbin \\\"${METALLIB_SOURCE}\\\"" >> ${EMBED_METALLIB_ASSEMBLY}
		COMMAND echo ".globl _ggml_metallib_end" >> ${EMBED_METALLIB_ASSEMBLY}
		COMMAND echo "_ggml_metallib_end:" >> ${EMBED_METALLIB_ASSEMBLY}
		DEPENDS ${METALLIB_SOURCE}
		COMMENT "Generate assembly for embedded Metal library"
		)

		set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${EMBED_METALLIB_ASSEMBLY})
		endif()
		endif()
		@@ -177,19 +215,64 @@
		set(WHISPER_BLAS ON)
		# BLA_PKGCONFIG_BLAS is supported since CMake 3.25.
		# FindBLAS.cmake pkg-config logic seems incomplete, because when
		# BLA_SIZEOF_INTEGER is 8, then it should search for blas64 instead of blas.
		# blas.pc/blas64.pc are not always provided, so let's be more specific
		# and go with openblas.pc/openblas64.pc if WHISPER_OPENBLAS is on.
		if (WHISPER_OPENBLAS_INTERFACE64)
		set(WHISPER_BLAS_LIB "openblas64")
		else ()
		set(WHISPER_BLAS_LIB "openblas")
		endif ()
		set(BLA_PKGCONFIG_BLAS ${WHISPER_BLAS_LIB})
		# OpenBLAS prebuilt libraries for Windows do not have "64" suffix in filename.
		# (But .pc file has "64" suffix in filename for USE_64BITINT=1 Windows build.)
		if (MSVC)
		set(WHISPER_BLAS_LIB "openblas")
		endif ()
		endif()

		if (WHISPER_BLAS)
		if (WIN32)
		if(DEFINED ENV{OPENBLAS_PATH})
		set(BLAS_LIBRARIES $ENV{OPENBLAS_PATH}/lib/libopenblas.dll.a)
		message(STATUS "Libraries ${BLAS_LIBRARIES}")
		set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
		include_directories($ENV{OPENBLAS_PATH}/include)
		set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${BLAS_LIBRARIES})
		if (NOT "$ENV{OPENBLAS_PATH}" STREQUAL "")
		if (WHISPER_STATIC)
		set(WHISPER_BLAS_LIB_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
		set(WHISPER_BLAS_LIB_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
		else ()
		message(FATAL_ERROR "BLAS library was not found. Environment variable OPENBLAS_PATH not defined.")
		if (CMAKE_IMPORT_LIBRARY_SUFFIX)
		set(WHISPER_BLAS_LIB_PREFIX ${CMAKE_IMPORT_LIBRARY_PREFIX})
		set(WHISPER_BLAS_LIB_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX})
		else ()
		set(WHISPER_BLAS_LIB_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX})
		set(WHISPER_BLAS_LIB_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
		endif ()
		endif ()
		# OpenBLAS prebuilt libraries hardcode "lib" prefix in filename even on Windows
		if (WHISPER_OPENBLAS)
		set(WHISPER_BLAS_LIB_PREFIX "lib")
		endif ()
		message(STATUS "BLAS compatible library path provided")
		set(BLAS_LIBRARIES "$ENV{OPENBLAS_PATH}/lib/${WHISPER_BLAS_LIB_PREFIX}${WHISPER_BLAS_LIB}${WHISPER_BLAS_LIB_SUFFIX}")
		message(STATUS "Libraries ${BLAS_LIBRARIES}")
		set(BLAS_INCLUDE_DIRS "$ENV{OPENBLAS_PATH}/include")
		message(STATUS "Include dirs ${BLAS_INCLUDE_DIRS}")
		if (NOT EXISTS "${BLAS_LIBRARIES}")
		message(FATAL_ERROR "BLAS library was not found. Environment variable OPENBLAS_PATH misdefined.")
		endif ()
		set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
		include_directories(${BLAS_INCLUDE_DIRS})
		set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${BLAS_LIBRARIES})
		else ()
		set(BLA_STATIC 1)
		if (WHISPER_STATIC)
		# FindBLAS.cmake pkg-config logic seems incomplete, because when
		# BLA_STATIC is on, then it should use pkg_check_modules_static
		# instead of pkg_check_modules.
		# Some manual variable overriding may be necessary if you don't
		# achieve desired results.
		set(BLA_STATIC 1)
		endif ()
		set(BLA_VENDOR ${WHISPER_BLAS_VENDOR})
		set(BLA_SIZEOF_INTEGER 8)
		if (WHISPER_OPENBLAS_INTERFACE64)
		set(BLA_SIZEOF_INTEGER 8)
		else ()
		set(BLA_SIZEOF_INTEGER 4)
		endif()
		set(BLA_PREFER_PKGCONFIG 1)
		@@ -201,3 +284,10 @@ find_package(BLAS)
		message(STATUS "Libraries ${BLAS_LIBRARIES}")
		find_path(BLAS_INCLUDE_DIRS cblas.h /usr/include/openblas /usr/local/include/openblas $ENV{BLAS_HOME}/include)
		if (NOT DEFINED BLAS_INCLUDE_DIRS)
		if (PKGC_BLAS_FOUND)
		set(BLAS_INCLUDE_DIRS "${PKGC_BLAS_INCLUDE_DIRS}")
		else ()
		find_path(BLAS_INCLUDE_DIRS cblas.h /usr/include/openblas)
		endif()
		endif()
		message(STATUS "Include dirs ${BLAS_INCLUDE_DIRS}")
		set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
		@@ -213,2 +303,7 @@ include_directories(${BLAS_INCLUDE_DIRS})
		if (WHISPER_CUBLAS)
		message(WARNING "WHISPER_CUBLAS is deprecated and will be removed in the future.\nUse WHISPER_CUDA instead")
		set(WHISPER_CUDA ON)
		endif()

		if (WHISPER_CUDA)
		cmake_minimum_required(VERSION 3.17)
		@@ -223,8 +318,15 @@

		set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h)
		file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu")
		list(APPEND GGML_SOURCES_CUDA ggml-cuda.h)
		list(APPEND GGML_SOURCES_CUDA ggml-cuda.cu)

		add_compile_definitions(GGML_USE_CUBLAS)
		add_compile_definitions(GGML_USE_CUDA)

		if (WHISPER_STATIC)
		set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
		if (WIN32)
		# As of 12.3.1 CUDA Tookit for Windows does not offer a static cublas library
		set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
		else ()
		set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
		endif()
		else()
		@@ -234,2 +336,3 @@ set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)

		set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cuda_driver)
		else()
		@@ -256,3 +359,3 @@ message(FATAL_ERROR "cuBLAS not found")
		message(STATUS "HIP and hipBLAS found")
		add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS)
		add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA)
		add_library(ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h)
		@@ -291,2 +394,26 @@ set_property(TARGET ggml-rocm PROPERTY POSITION_INDEPENDENT_CODE ON)

		if (WHISPER_SYCL)
		if ( NOT DEFINED ENV{ONEAPI_ROOT})
		message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
		endif()
		#todo: AOT

		find_package(IntelSYCL REQUIRED)
		if (WHISPER_SYCL_F16)
		add_compile_definitions(GGML_SYCL_F16)
		endif()
		add_compile_definitions(GGML_USE_SYCL)

		add_compile_options(-I./) #include DPCT
		add_compile_options(-I/${SYCL_INCLUDE_DIR})

		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")

		set(GGML_HEADERS_SYCL ggml-sycl.h)
		set(GGML_SOURCES_SYCL ggml-sycl.cpp)

		set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
		endif()
		# compiler flags
		@@ -323,3 +450,4 @@
		if (NOT MSVC)
		set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
		# TODO: temporary disabled until we figure out ggml-metal.m
		#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
		#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations")
		@@ -353,4 +481,4 @@ endif()
		if (EMSCRIPTEN)
		set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
		set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -s TOTAL_STACK=5242880")
		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
		else()
		@@ -487,2 +615,4 @@ if(NOT WHISPER_NO_AVX)
		${GGML_SOURCES_OPENCL}
		${GGML_SOURCES_SYCL}
		${GGML_HEADERS_SYCL}
		whisper.h
		@@ -492,2 +622,8 @@ whisper.cpp

		# Set the version numbers
		set_target_properties(whisper PROPERTIES
		VERSION ${PROJECT_VERSION}
		SOVERSION ${SOVERSION}
		)

		include(DefaultTargetOptions)
		@@ -516,2 +652,3 @@
		if (BUILD_SHARED_LIBS)
		set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
		target_link_libraries(${TARGET} PUBLIC
		@@ -540,3 +677,9 @@ ${CMAKE_DL_LIBS}
		message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
		set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES OFF)
		# Only configure gmml CUDA architectures is not globally set
		if (NOT DEFINED GGML_CUDA_ARCHITECTURES)
		# Not overriden by user, so set defaults
		set(GGML_CUDA_ARCHITECTURES 52 61 70)
		endif()
		message(STATUS "GGML Configuring CUDA architectures ${GGML_CUDA_ARCHITECTURES}")
		set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES ${GGML_CUDA_ARCHITECTURES})
		set_property(TARGET whisper PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
		@@ -553,3 +696,3 @@ endif()

		set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "whisper.h")
		set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "ggml.h;whisper.h")

		@@ -556,0 +699,0 @@ include(GNUInstallDirs)

68

lib/whisper.cpp/models/README.md

		@@ -1,17 +0,14 @@
		## Whisper model files in custom ggml format
		## Whisper model files in custom `ggml` format

		The [original Whisper PyTorch models provided by OpenAI](https://github.com/openai/whisper/blob/main/whisper/__init__.py#L17-L27)
		The [original Whisper PyTorch models provided by OpenAI](https://github.com/openai/whisper/blob/main/whisper/__init__.py#L17-L30)
		are converted to custom `ggml` format in order to be able to load them in C/C++.
		Conversion is performed using the [convert-pt-to-ggml.py](convert-pt-to-ggml.py) script.

		You can either obtain the original models and generate the `ggml` files yourself using the conversion script,
		or you can use the [download-ggml-model.sh](download-ggml-model.sh) script to download the already converted models.
		Currently, they are hosted on the following locations:
		There are three ways to obtain `ggml` models:

		- https://huggingface.co/ggerganov/whisper.cpp
		- https://ggml.ggerganov.com
		### 1. Use [download-ggml-model.sh](download-ggml-model.sh) to download pre-converted models

		Sample download:
		Example download:

		```java
		```text
		$ ./download-ggml-model.sh base.en
		@@ -26,6 +23,16 @@ Downloading ggml model base.en ...

		To convert the files yourself, use the convert-pt-to-ggml.py script. Here is an example usage.
		The original PyTorch files are assumed to have been downloaded into ~/.cache/whisper
		Change `~/path/to/repo/whisper/` to the location for your copy of the Whisper source:
		```
		### 2. Manually download pre-converted models

		`ggml` models are available from the following locations:

		- https://huggingface.co/ggerganov/whisper.cpp/tree/main
		- https://ggml.ggerganov.com

		### 3. Convert with [convert-pt-to-ggml.py](convert-pt-to-ggml.py)

		Download one of the [models provided by OpenAI](https://github.com/openai/whisper/blob/main/whisper/__init__.py#L17-L30) and generate the `ggml` files using the [convert-pt-to-ggml.py](convert-pt-to-ggml.py) script.

		Example conversion, assuming the original PyTorch files have been downloaded into `~/.cache/whisper`. Change `~/path/to/repo/whisper/` to the location for your copy of the Whisper source:

		```bash
		mkdir models/whisper-medium
		@@ -37,22 +44,23 @@ python models/convert-pt-to-ggml.py ~/.cache/whisper/medium.pt ~/path/to/repo/whisper/ ./models/whisper-medium

		A third option to obtain the model files is to download them from Hugging Face:
		## Available models

		https://huggingface.co/ggerganov/whisper.cpp/tree/main
		\| Model \| Disk \| SHA \|
		\| ------------- \| ------- \| ------------------------------------------ \|
		\| tiny \| 75 MiB \| `bd577a113a864445d4c299885e0cb97d4ba92b5f` \|
		\| tiny.en \| 75 MiB \| `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` \|
		\| base \| 142 MiB \| `465707469ff3a37a2b9b8d8f89f2f99de7299dac` \|
		\| base.en \| 142 MiB \| `137c40403d78fd54d454da0f9bd998f78703390c` \|
		\| small \| 466 MiB \| `55356645c2b361a969dfd0ef2c5a50d530afd8d5` \|
		\| small.en \| 466 MiB \| `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` \|
		\| small.en-tdrz \| 465 MiB \| `b6c6e7e89af1a35c08e6de56b66ca6a02a2fdfa1` \|
		\| medium \| 1.5 GiB \| `fd9727b6e1217c2f614f9b698455c4ffd82463b4` \|
		\| medium.en \| 1.5 GiB \| `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` \|
		\| large-v1 \| 2.9 GiB \| `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` \|
		\| large-v2 \| 2.9 GiB \| `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` \|
		\| large-v2-q5_0 \| 1.1 GiB \| `00e39f2196344e901b3a2bd5814807a769bd1630` \|
		\| large-v3 \| 2.9 GiB \| `ad82bf6a9043ceed055076d0fd39f5f186ff8062` \|
		\| large-v3-q5_0 \| 1.1 GiB \| `e6e2ed78495d403bef4b7cff42ef4aaadcfea8de` \|

		## Available models
		Models are multilingual unless the model name includes `.en`. Models ending in `-q5_0` are [quantized](../README.md#quantization). Models ending in `-tdrz` support local diarization (marking of speaker turns) using [tinydiarize](https://github.com/akashmjn/tinydiarize). More information about models is available [upstream (openai/whisper)](https://github.com/openai/whisper#available-models-and-languages). The list above is a subset of the models supported by the [download-ggml-model.sh](download-ggml-model.sh) script, but many more are available at https://huggingface.co/ggerganov/whisper.cpp/tree/main and elsewhere.

		\| Model \| Disk \| SHA \|
		\| --- \| --- \| --- \|
		\| tiny \| 75 MiB \| `bd577a113a864445d4c299885e0cb97d4ba92b5f` \|
		\| tiny.en \| 75 MiB \| `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` \|
		\| base \| 142 MiB \| `465707469ff3a37a2b9b8d8f89f2f99de7299dac` \|
		\| base.en \| 142 MiB \| `137c40403d78fd54d454da0f9bd998f78703390c` \|
		\| small \| 466 MiB \| `55356645c2b361a969dfd0ef2c5a50d530afd8d5` \|
		\| small.en \| 466 MiB \| `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` \|
		\| medium \| 1.5 GiB \| `fd9727b6e1217c2f614f9b698455c4ffd82463b4` \|
		\| medium.en \| 1.5 GiB \| `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` \|
		\| large-v1 \| 2.9 GiB \| `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` \|
		\| large-v2 \| 2.9 GiB \| `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` \|
		\| large-v3 \| 2.9 GiB \| `ad82bf6a9043ceed055076d0fd39f5f186ff8062` \|

		## Model files for testing purposes
		@@ -59,0 +67,0 @@

177

lib/whisper.cpp/README.md

		@@ -9,3 +9,3 @@ # whisper.cpp

		Stable: [v1.5.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.1) / [Roadmap \| F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
		Stable: [v1.5.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.4) / [Roadmap \| F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)

		@@ -37,5 +37,6 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
		- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
		- [x] [docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)

		The entire high-level implementation of the model is contained in [whisper.h](whisper.h) and [whisper.cpp](whisper.cpp).
		The rest of the code is part of the [ggml](https://github.com/ggerganov/ggml) machine learning library.
		The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library.

		@@ -65,12 +66,14 @@ Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.

		The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD
		intrinsics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
		the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.
		The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD intrinsics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.

		## Quick start

		First clone the repository.
		First clone the repository:

		Then, download one of the Whisper models converted in [ggml format](models). For example:
		```bash
		git clone https://github.com/ggerganov/whisper.cpp.git
		```

		Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:

		```bash
		@@ -80,4 +83,2 @@ bash ./models/download-ggml-model.sh base.en

		If you wish to convert the Whisper models to ggml format yourself, instructions are in [models/README.md](models/README.md).

		Now build the [main](examples/main) example and transcribe an audio file like this:
		@@ -97,3 +98,3 @@

		```java
		```text
		$ make base.en
		@@ -214,3 +215,3 @@

		```java
		```bash
		ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
		@@ -247,5 +248,5 @@ ```

		\| Model \| Disk \| Mem \|
		\| --- \| --- \| --- \|
		\| tiny \| 75 MiB \| ~273 MB \|
		\| Model \| Disk \| Mem \|
		\| ------ \| ------- \| ------- \|
		\| tiny \| 75 MiB \| ~273 MB \|
		\| base \| 142 MiB \| ~388 MB \|
		@@ -287,3 +288,4 @@ \| small \| 466 MiB \| ~852 MB \|
		- Python 3.10 is recommended.
		- [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
		- MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
		- [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
		- To create an environment, use: `conda create -n py310-whisper python=3.10 -y`
		@@ -314,4 +316,4 @@ - To activate the environment, use: `conda activate py310-whisper`

		```bash
		./main -m models/ggml-base.en.bin -f samples/jfk.wav
		```text
		$ ./main -m models/ggml-base.en.bin -f samples/jfk.wav

		@@ -344,3 +346,4 @@ ...
		Windows:
		```

		```powershell
		cd models
		@@ -350,7 +353,8 @@ python -m venv openvino_conv_env
		python -m pip install --upgrade pip
		pip install -r openvino-conversion-requirements.txt
		pip install -r requirements-openvino.txt
		```

		Linux and macOS:
		```

		```bash
		cd models
		@@ -360,3 +364,3 @@ python3 -m venv openvino_conv_env
		python -m pip install --upgrade pip
		pip install -r openvino-conversion-requirements.txt
		pip install -r requirements-openvino.txt
		```
		@@ -370,3 +374,3 @@

		This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as ggml models, as that
		This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that
		is the default location that the OpenVINO extension will search at runtime.
		@@ -381,2 +385,3 @@
		Linux:

		```bash
		@@ -387,3 +392,4 @@ source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh
		Windows (cmd):
		```

		```powershell
		C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat
		@@ -393,2 +399,3 @@ ```
		And then build the project using cmake:

		```bash
		@@ -400,5 +407,6 @@ cmake -B build -DWHISPER_OPENVINO=1
		- Run the examples as usual. For example:
		```bash
		./main -m models/ggml-base.en.bin -f samples/jfk.wav

		```text
		$ ./main -m models/ggml-base.en.bin -f samples/jfk.wav

		...
		@@ -426,7 +434,7 @@

		Now build `whisper.cpp` with cuBLAS support:
		Now build `whisper.cpp` with CUDA support:

		```
		make clean
		WHISPER_CUBLAS=1 make -j
		WHISPER_CUDA=1 make -j
		```
		@@ -454,3 +462,2 @@


		Run all the examples as usual.
		@@ -470,2 +477,34 @@

		## Docker

		### Prerequisites

		- Docker must be installed and running on your system.
		- Create a folder to store big models & intermediate files (ex. /whisper/models)

		### Images

		We have two Docker images available for this project:

		1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
		2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)

		### Usage

		```shell
		# download model and persist it in a local folder
		docker run -it --rm \
		-v path/to/models:/models \
		whisper.cpp:main "./models/download-ggml-model.sh base /models"
		# transcribe an audio file
		docker run -it --rm \
		-v path/to/models:/models \
		-v path/to/audios:/audios \
		whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
		# transcribe an audio file in samples folder
		docker run -it --rm \
		-v path/to/models:/models \
		whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
		```

		## Limitations
		@@ -483,3 +522,3 @@

		```java
		```text
		$ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8
		@@ -556,2 +595,3 @@
		```

		</details>
		@@ -565,3 +605,3 @@

		```java
		```bash
		make stream
		@@ -578,3 +618,3 @@ ./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000

		```java
		```bash
		./main -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
		@@ -589,4 +629,4 @@ ```

		```java
		./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
		```text
		$ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16

		@@ -614,4 +654,4 @@ whisper_model_load: loading model from './models/ggml-base.en.bin'

		```java
		./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
		```text
		$ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1

		@@ -686,3 +726,3 @@ whisper_model_load: loading model from './models/ggml-base.en.bin'

		```java
		```bash
		./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
		@@ -697,3 +737,3 @@ source ./samples/jfk.wav.wts

		```java
		```bash
		./main -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
		@@ -708,3 +748,3 @@ source ./samples/mm0.wav.wts

		```java
		```bash
		./main -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
		@@ -723,3 +763,3 @@ source ./samples/gb0.wav.wts

		```java
		```bash
		./extra/bench-wts.sh samples/jfk.wav
		@@ -753,5 +793,4 @@ ffplay ./samples/jfk.wav.all.mp4

		## `ggml` format

		## ggml format

		The original models are converted to a custom binary format. This allows to pack everything needed into a single file:
		@@ -770,24 +809,23 @@

		For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or the README
		in [models](models).
		For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md).

		## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings)

		- [X] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) \| [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
		- [X] JavaScript: [bindings/javascript](bindings/javascript) \| [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
		- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) \| [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
		- [x] JavaScript: [bindings/javascript](bindings/javascript) \| [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
		- React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
		- [X] Go: [bindings/go](bindings/go) \| [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
		- [X] Java:
		- [x] Go: [bindings/go](bindings/go) \| [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
		- [x] Java:
		- [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni)
		- [X] Ruby: [bindings/ruby](bindings/ruby) \| [#507](https://github.com/ggerganov/whisper.cpp/discussions/507)
		- [X] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) \| [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
		- [x] Ruby: [bindings/ruby](bindings/ruby) \| [#507](https://github.com/ggerganov/whisper.cpp/discussions/507)
		- [x] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) \| [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
		- [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper)
		- [X] .NET: \| [#422](https://github.com/ggerganov/whisper.cpp/discussions/422)
		- [x] .NET: \| [#422](https://github.com/ggerganov/whisper.cpp/discussions/422)
		- [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net)
		- [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
		- [X] Python: \| [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
		- [x] Python: \| [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
		- [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
		- [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
		- [X] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
		- [X] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)
		- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
		- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)

		@@ -799,18 +837,19 @@ ## Examples

		\| Example \| Web \| Description \|
		\| --- \| --- \| --- \|
		\| [main](examples/main) \| [whisper.wasm](examples/whisper.wasm) \| Tool for translating and transcribing audio using Whisper \|
		\| [bench](examples/bench) \| [bench.wasm](examples/bench.wasm) \| Benchmark the performance of Whisper on your machine \|
		\| [stream](examples/stream) \| [stream.wasm](examples/stream.wasm) \| Real-time transcription of raw microphone capture \|
		\| [command](examples/command) \| [command.wasm](examples/command.wasm) \| Basic voice assistant example for receiving voice commands from the mic \|
		\| [talk](examples/talk) \| [talk.wasm](examples/talk.wasm) \| Talk with a GPT-2 bot \|
		\| [talk-llama](examples/talk-llama) \| \| Talk with a LLaMA bot \|
		\| [whisper.objc](examples/whisper.objc) \| \| iOS mobile application using whisper.cpp \|
		\| [whisper.swiftui](examples/whisper.swiftui) \| \| SwiftUI iOS / macOS application using whisper.cpp \|
		\| [whisper.android](examples/whisper.android) \| \| Android mobile application using whisper.cpp \|
		\| [whisper.nvim](examples/whisper.nvim) \| \| Speech-to-text plugin for Neovim \|
		\| [generate-karaoke.sh](examples/generate-karaoke.sh) \| \| Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture \|
		\| [livestream.sh](examples/livestream.sh) \| \| [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) \|
		\| [yt-wsp.sh](examples/yt-wsp.sh) \| \| Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) \|
		\| [server](examples/server) \| \| HTTP transcription server with OAI-like API \|
		\| Example \| Web \| Description \|
		\| --------------------------------------------------- \| ------------------------------------- \| ------------------------------------------------------------------------------------------------------------------------------- \|
		\| [main](examples/main) \| [whisper.wasm](examples/whisper.wasm) \| Tool for translating and transcribing audio using Whisper \|
		\| [bench](examples/bench) \| [bench.wasm](examples/bench.wasm) \| Benchmark the performance of Whisper on your machine \|
		\| [stream](examples/stream) \| [stream.wasm](examples/stream.wasm) \| Real-time transcription of raw microphone capture \|
		\| [command](examples/command) \| [command.wasm](examples/command.wasm) \| Basic voice assistant example for receiving voice commands from the mic \|
		\| [wchess](examples/wchess) \| [wchess.wasm](examples/wchess) \| Voice-controlled chess \|
		\| [talk](examples/talk) \| [talk.wasm](examples/talk.wasm) \| Talk with a GPT-2 bot \|
		\| [talk-llama](examples/talk-llama) \| \| Talk with a LLaMA bot \|
		\| [whisper.objc](examples/whisper.objc) \| \| iOS mobile application using whisper.cpp \|
		\| [whisper.swiftui](examples/whisper.swiftui) \| \| SwiftUI iOS / macOS application using whisper.cpp \|
		\| [whisper.android](examples/whisper.android) \| \| Android mobile application using whisper.cpp \|
		\| [whisper.nvim](examples/whisper.nvim) \| \| Speech-to-text plugin for Neovim \|
		\| [generate-karaoke.sh](examples/generate-karaoke.sh) \| \| Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture \|
		\| [livestream.sh](examples/livestream.sh) \| \| [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) \|
		\| [yt-wsp.sh](examples/yt-wsp.sh) \| \| Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) \|
		\| [server](examples/server) \| \| HTTP transcription server with OAI-like API \|

		@@ -817,0 +856,0 @@ ## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)

3

package.json

		{
		"name": "@pr0gramm/fluester",
		"version": "0.6.0",
		"version": "0.6.1",
		"license": "MIT",
		@@ -30,2 +30,3 @@ "description": "Node.js bindings for OpenAI's Whisper. Optimized for CPU.",
		"bun": "^1.1.2",
		"lefthook": "^1.6.8",
		"typedoc": "^0.25.13",
		@@ -32,0 +33,0 @@ "typescript": "^5.4.4"

lib/whisper.cpp/models/openvino-conversion-requirements.txt

lib/whisper.cpp/bindings/go/params.go