@pr0gramm/fluester
Advanced tools
Comparing version 0.6.0 to 0.6.1
{ | ||
"name": "whisper.cpp", | ||
"version": "1.5.1", | ||
"version": "1.5.4", | ||
"description": "Whisper speech recognition", | ||
@@ -5,0 +5,0 @@ "main": "whisper.js", |
@@ -44,3 +44,3 @@ # whisper.cpp | ||
```java | ||
```text | ||
$ node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js | ||
@@ -67,3 +67,3 @@ | ||
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 1 | BLAS = 0 | | ||
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 1 | BLAS = 0 | | ||
@@ -70,0 +70,0 @@ operator(): processing 176000 samples, 11.0 sec, 8 threads, 1 processors, lang = en, task = transcribe ... |
cmake_minimum_required (VERSION 3.5) | ||
project(whisper.cpp VERSION 1.5.1) | ||
project(whisper.cpp VERSION 1.5.4) | ||
set(SOVERSION 1) | ||
@@ -71,9 +72,14 @@ # Add path to modules | ||
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF) | ||
option(WHISPER_METAL_EMBED_LIBRARY "whisper: embed Metal library" OFF) | ||
else() | ||
option(WHISPER_BLAS "whisper: use BLAS libraries" OFF) | ||
option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic) | ||
option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF) | ||
option(WHISPER_CUBLAS "whisper: support for cuBLAS" OFF) | ||
option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF) | ||
option(WHISPER_CLBLAST "whisper: use CLBlast" OFF) | ||
option(WHISPER_BLAS "whisper: use BLAS libraries" OFF) | ||
option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic) | ||
option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF) | ||
option(WHISPER_OPENBLAS_INTERFACE64 "whisper: use OpenBLAS w/ 64-bit interface" OFF) | ||
option(WHISPER_CUDA "whisper: support for CUDA" OFF) | ||
option(WHISPER_CUBLAS "whisper: support for CUDA (deprecated)" OFF) | ||
option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF) | ||
option(WHISPER_CLBLAST "whisper: use CLBlast" OFF) | ||
option(WHISPER_SYCL "whisper: use SYCL" OFF) | ||
option(WHISPER_SYCL_F16 "whisper: use 16 bit floats for sycl calculations" OFF) | ||
endif() | ||
@@ -109,2 +115,9 @@ | ||
#compile flag sycl | ||
if (WHISPER_SYCL) | ||
set(CMAKE_CXX_STANDARD 17) | ||
else() | ||
set(CMAKE_CXX_STANDARD 11) | ||
endif() | ||
# on APPLE | ||
@@ -120,3 +133,3 @@ if (APPLE) | ||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) | ||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE) | ||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64) | ||
else() | ||
@@ -151,4 +164,29 @@ message(FATAL_ERROR "Accelerate framework not found") | ||
# copy ggml-metal.metal to bin directory | ||
# copy ggml-common.h and ggml-metal.metal to bin directory | ||
configure_file(ggml-common.h bin/ggml-common.h COPYONLY) | ||
configure_file(ggml-metal.metal bin/ggml-metal.metal COPYONLY) | ||
if (WHISPER_METAL_EMBED_LIBRARY) | ||
enable_language(ASM) | ||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_METAL_EMBED_LIBRARY) | ||
set(METALLIB_SOURCE "${CMAKE_SOURCE_DIR}/ggml-metal.metal") | ||
file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated") | ||
set(EMBED_METALLIB_ASSEMBLY "${CMAKE_BINARY_DIR}/autogenerated/ggml-embed-metallib.s") | ||
add_custom_command( | ||
OUTPUT ${EMBED_METALLIB_ASSEMBLY} | ||
COMMAND echo ".section __DATA,__ggml_metallib" > ${EMBED_METALLIB_ASSEMBLY} | ||
COMMAND echo ".globl _ggml_metallib_start" >> ${EMBED_METALLIB_ASSEMBLY} | ||
COMMAND echo "_ggml_metallib_start:" >> ${EMBED_METALLIB_ASSEMBLY} | ||
COMMAND echo ".incbin \\\"${METALLIB_SOURCE}\\\"" >> ${EMBED_METALLIB_ASSEMBLY} | ||
COMMAND echo ".globl _ggml_metallib_end" >> ${EMBED_METALLIB_ASSEMBLY} | ||
COMMAND echo "_ggml_metallib_end:" >> ${EMBED_METALLIB_ASSEMBLY} | ||
DEPENDS ${METALLIB_SOURCE} | ||
COMMENT "Generate assembly for embedded Metal library" | ||
) | ||
set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${EMBED_METALLIB_ASSEMBLY}) | ||
endif() | ||
endif() | ||
@@ -177,19 +215,64 @@ | ||
set(WHISPER_BLAS ON) | ||
# BLA_PKGCONFIG_BLAS is supported since CMake 3.25. | ||
# FindBLAS.cmake pkg-config logic seems incomplete, because when | ||
# BLA_SIZEOF_INTEGER is 8, then it should search for blas64 instead of blas. | ||
# blas.pc/blas64.pc are not always provided, so let's be more specific | ||
# and go with openblas.pc/openblas64.pc if WHISPER_OPENBLAS is on. | ||
if (WHISPER_OPENBLAS_INTERFACE64) | ||
set(WHISPER_BLAS_LIB "openblas64") | ||
else () | ||
set(WHISPER_BLAS_LIB "openblas") | ||
endif () | ||
set(BLA_PKGCONFIG_BLAS ${WHISPER_BLAS_LIB}) | ||
# OpenBLAS prebuilt libraries for Windows do not have "64" suffix in filename. | ||
# (But .pc file has "64" suffix in filename for USE_64BITINT=1 Windows build.) | ||
if (MSVC) | ||
set(WHISPER_BLAS_LIB "openblas") | ||
endif () | ||
endif() | ||
if (WHISPER_BLAS) | ||
if (WIN32) | ||
if(DEFINED ENV{OPENBLAS_PATH}) | ||
set(BLAS_LIBRARIES $ENV{OPENBLAS_PATH}/lib/libopenblas.dll.a) | ||
message(STATUS "Libraries ${BLAS_LIBRARIES}") | ||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS) | ||
include_directories($ENV{OPENBLAS_PATH}/include) | ||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${BLAS_LIBRARIES}) | ||
if (NOT "$ENV{OPENBLAS_PATH}" STREQUAL "") | ||
if (WHISPER_STATIC) | ||
set(WHISPER_BLAS_LIB_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX}) | ||
set(WHISPER_BLAS_LIB_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
else () | ||
message(FATAL_ERROR "BLAS library was not found. Environment variable OPENBLAS_PATH not defined.") | ||
if (CMAKE_IMPORT_LIBRARY_SUFFIX) | ||
set(WHISPER_BLAS_LIB_PREFIX ${CMAKE_IMPORT_LIBRARY_PREFIX}) | ||
set(WHISPER_BLAS_LIB_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX}) | ||
else () | ||
set(WHISPER_BLAS_LIB_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX}) | ||
set(WHISPER_BLAS_LIB_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
endif () | ||
endif () | ||
# OpenBLAS prebuilt libraries hardcode "lib" prefix in filename even on Windows | ||
if (WHISPER_OPENBLAS) | ||
set(WHISPER_BLAS_LIB_PREFIX "lib") | ||
endif () | ||
message(STATUS "BLAS compatible library path provided") | ||
set(BLAS_LIBRARIES "$ENV{OPENBLAS_PATH}/lib/${WHISPER_BLAS_LIB_PREFIX}${WHISPER_BLAS_LIB}${WHISPER_BLAS_LIB_SUFFIX}") | ||
message(STATUS "Libraries ${BLAS_LIBRARIES}") | ||
set(BLAS_INCLUDE_DIRS "$ENV{OPENBLAS_PATH}/include") | ||
message(STATUS "Include dirs ${BLAS_INCLUDE_DIRS}") | ||
if (NOT EXISTS "${BLAS_LIBRARIES}") | ||
message(FATAL_ERROR "BLAS library was not found. Environment variable OPENBLAS_PATH misdefined.") | ||
endif () | ||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS) | ||
include_directories(${BLAS_INCLUDE_DIRS}) | ||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${BLAS_LIBRARIES}) | ||
else () | ||
set(BLA_STATIC 1) | ||
if (WHISPER_STATIC) | ||
# FindBLAS.cmake pkg-config logic seems incomplete, because when | ||
# BLA_STATIC is on, then it should use pkg_check_modules_static | ||
# instead of pkg_check_modules. | ||
# Some manual variable overriding may be necessary if you don't | ||
# achieve desired results. | ||
set(BLA_STATIC 1) | ||
endif () | ||
set(BLA_VENDOR ${WHISPER_BLAS_VENDOR}) | ||
set(BLA_SIZEOF_INTEGER 8) | ||
if (WHISPER_OPENBLAS_INTERFACE64) | ||
set(BLA_SIZEOF_INTEGER 8) | ||
else () | ||
set(BLA_SIZEOF_INTEGER 4) | ||
endif() | ||
set(BLA_PREFER_PKGCONFIG 1) | ||
@@ -201,3 +284,10 @@ find_package(BLAS) | ||
message(STATUS "Libraries ${BLAS_LIBRARIES}") | ||
find_path(BLAS_INCLUDE_DIRS cblas.h /usr/include/openblas /usr/local/include/openblas $ENV{BLAS_HOME}/include) | ||
if (NOT DEFINED BLAS_INCLUDE_DIRS) | ||
if (PKGC_BLAS_FOUND) | ||
set(BLAS_INCLUDE_DIRS "${PKGC_BLAS_INCLUDE_DIRS}") | ||
else () | ||
find_path(BLAS_INCLUDE_DIRS cblas.h /usr/include/openblas) | ||
endif() | ||
endif() | ||
message(STATUS "Include dirs ${BLAS_INCLUDE_DIRS}") | ||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS) | ||
@@ -213,2 +303,7 @@ include_directories(${BLAS_INCLUDE_DIRS}) | ||
if (WHISPER_CUBLAS) | ||
message(WARNING "WHISPER_CUBLAS is deprecated and will be removed in the future.\nUse WHISPER_CUDA instead") | ||
set(WHISPER_CUDA ON) | ||
endif() | ||
if (WHISPER_CUDA) | ||
cmake_minimum_required(VERSION 3.17) | ||
@@ -223,8 +318,15 @@ | ||
set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h) | ||
file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu") | ||
list(APPEND GGML_SOURCES_CUDA ggml-cuda.h) | ||
list(APPEND GGML_SOURCES_CUDA ggml-cuda.cu) | ||
add_compile_definitions(GGML_USE_CUBLAS) | ||
add_compile_definitions(GGML_USE_CUDA) | ||
if (WHISPER_STATIC) | ||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) | ||
if (WIN32) | ||
# As of 12.3.1 CUDA Tookit for Windows does not offer a static cublas library | ||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt) | ||
else () | ||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) | ||
endif() | ||
else() | ||
@@ -234,2 +336,3 @@ set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) | ||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cuda_driver) | ||
else() | ||
@@ -256,3 +359,3 @@ message(FATAL_ERROR "cuBLAS not found") | ||
message(STATUS "HIP and hipBLAS found") | ||
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS) | ||
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA) | ||
add_library(ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h) | ||
@@ -291,2 +394,26 @@ set_property(TARGET ggml-rocm PROPERTY POSITION_INDEPENDENT_CODE ON) | ||
if (WHISPER_SYCL) | ||
if ( NOT DEFINED ENV{ONEAPI_ROOT}) | ||
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh") | ||
endif() | ||
#todo: AOT | ||
find_package(IntelSYCL REQUIRED) | ||
if (WHISPER_SYCL_F16) | ||
add_compile_definitions(GGML_SYCL_F16) | ||
endif() | ||
add_compile_definitions(GGML_USE_SYCL) | ||
add_compile_options(-I./) #include DPCT | ||
add_compile_options(-I/${SYCL_INCLUDE_DIR}) | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib") | ||
set(GGML_HEADERS_SYCL ggml-sycl.h) | ||
set(GGML_SOURCES_SYCL ggml-sycl.cpp) | ||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) | ||
endif() | ||
# compiler flags | ||
@@ -323,3 +450,4 @@ | ||
if (NOT MSVC) | ||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla") | ||
# TODO: temporary disabled until we figure out ggml-metal.m | ||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla") | ||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations") | ||
@@ -353,4 +481,4 @@ endif() | ||
if (EMSCRIPTEN) | ||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread") | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") | ||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -s TOTAL_STACK=5242880") | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880") | ||
else() | ||
@@ -487,2 +615,4 @@ if(NOT WHISPER_NO_AVX) | ||
${GGML_SOURCES_OPENCL} | ||
${GGML_SOURCES_SYCL} | ||
${GGML_HEADERS_SYCL} | ||
whisper.h | ||
@@ -492,2 +622,8 @@ whisper.cpp | ||
# Set the version numbers | ||
set_target_properties(whisper PROPERTIES | ||
VERSION ${PROJECT_VERSION} | ||
SOVERSION ${SOVERSION} | ||
) | ||
include(DefaultTargetOptions) | ||
@@ -516,2 +652,3 @@ | ||
if (BUILD_SHARED_LIBS) | ||
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) | ||
target_link_libraries(${TARGET} PUBLIC | ||
@@ -540,3 +677,9 @@ ${CMAKE_DL_LIBS} | ||
message(STATUS "GGML CUDA sources found, configuring CUDA architecture") | ||
set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES OFF) | ||
# Only configure gmml CUDA architectures is not globally set | ||
if (NOT DEFINED GGML_CUDA_ARCHITECTURES) | ||
# Not overriden by user, so set defaults | ||
set(GGML_CUDA_ARCHITECTURES 52 61 70) | ||
endif() | ||
message(STATUS "GGML Configuring CUDA architectures ${GGML_CUDA_ARCHITECTURES}") | ||
set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES ${GGML_CUDA_ARCHITECTURES}) | ||
set_property(TARGET whisper PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto") | ||
@@ -553,3 +696,3 @@ endif() | ||
set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "whisper.h") | ||
set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "ggml.h;whisper.h") | ||
@@ -556,0 +699,0 @@ include(GNUInstallDirs) |
@@ -1,17 +0,14 @@ | ||
## Whisper model files in custom ggml format | ||
## Whisper model files in custom `ggml` format | ||
The [original Whisper PyTorch models provided by OpenAI](https://github.com/openai/whisper/blob/main/whisper/__init__.py#L17-L27) | ||
The [original Whisper PyTorch models provided by OpenAI](https://github.com/openai/whisper/blob/main/whisper/__init__.py#L17-L30) | ||
are converted to custom `ggml` format in order to be able to load them in C/C++. | ||
Conversion is performed using the [convert-pt-to-ggml.py](convert-pt-to-ggml.py) script. | ||
You can either obtain the original models and generate the `ggml` files yourself using the conversion script, | ||
or you can use the [download-ggml-model.sh](download-ggml-model.sh) script to download the already converted models. | ||
Currently, they are hosted on the following locations: | ||
There are three ways to obtain `ggml` models: | ||
- https://huggingface.co/ggerganov/whisper.cpp | ||
- https://ggml.ggerganov.com | ||
### 1. Use [download-ggml-model.sh](download-ggml-model.sh) to download pre-converted models | ||
Sample download: | ||
Example download: | ||
```java | ||
```text | ||
$ ./download-ggml-model.sh base.en | ||
@@ -26,6 +23,16 @@ Downloading ggml model base.en ... | ||
To convert the files yourself, use the convert-pt-to-ggml.py script. Here is an example usage. | ||
The original PyTorch files are assumed to have been downloaded into ~/.cache/whisper | ||
Change `~/path/to/repo/whisper/` to the location for your copy of the Whisper source: | ||
``` | ||
### 2. Manually download pre-converted models | ||
`ggml` models are available from the following locations: | ||
- https://huggingface.co/ggerganov/whisper.cpp/tree/main | ||
- https://ggml.ggerganov.com | ||
### 3. Convert with [convert-pt-to-ggml.py](convert-pt-to-ggml.py) | ||
Download one of the [models provided by OpenAI](https://github.com/openai/whisper/blob/main/whisper/__init__.py#L17-L30) and generate the `ggml` files using the [convert-pt-to-ggml.py](convert-pt-to-ggml.py) script. | ||
Example conversion, assuming the original PyTorch files have been downloaded into `~/.cache/whisper`. Change `~/path/to/repo/whisper/` to the location for your copy of the Whisper source: | ||
```bash | ||
mkdir models/whisper-medium | ||
@@ -37,22 +44,23 @@ python models/convert-pt-to-ggml.py ~/.cache/whisper/medium.pt ~/path/to/repo/whisper/ ./models/whisper-medium | ||
A third option to obtain the model files is to download them from Hugging Face: | ||
## Available models | ||
https://huggingface.co/ggerganov/whisper.cpp/tree/main | ||
| Model | Disk | SHA | | ||
| ------------- | ------- | ------------------------------------------ | | ||
| tiny | 75 MiB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` | | ||
| tiny.en | 75 MiB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` | | ||
| base | 142 MiB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` | | ||
| base.en | 142 MiB | `137c40403d78fd54d454da0f9bd998f78703390c` | | ||
| small | 466 MiB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` | | ||
| small.en | 466 MiB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` | | ||
| small.en-tdrz | 465 MiB | `b6c6e7e89af1a35c08e6de56b66ca6a02a2fdfa1` | | ||
| medium | 1.5 GiB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` | | ||
| medium.en | 1.5 GiB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` | | ||
| large-v1 | 2.9 GiB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` | | ||
| large-v2 | 2.9 GiB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` | | ||
| large-v2-q5_0 | 1.1 GiB | `00e39f2196344e901b3a2bd5814807a769bd1630` | | ||
| large-v3 | 2.9 GiB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` | | ||
| large-v3-q5_0 | 1.1 GiB | `e6e2ed78495d403bef4b7cff42ef4aaadcfea8de` | | ||
## Available models | ||
Models are multilingual unless the model name includes `.en`. Models ending in `-q5_0` are [quantized](../README.md#quantization). Models ending in `-tdrz` support local diarization (marking of speaker turns) using [tinydiarize](https://github.com/akashmjn/tinydiarize). More information about models is available [upstream (openai/whisper)](https://github.com/openai/whisper#available-models-and-languages). The list above is a subset of the models supported by the [download-ggml-model.sh](download-ggml-model.sh) script, but many more are available at https://huggingface.co/ggerganov/whisper.cpp/tree/main and elsewhere. | ||
| Model | Disk | SHA | | ||
| --- | --- | --- | | ||
| tiny | 75 MiB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` | | ||
| tiny.en | 75 MiB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` | | ||
| base | 142 MiB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` | | ||
| base.en | 142 MiB | `137c40403d78fd54d454da0f9bd998f78703390c` | | ||
| small | 466 MiB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` | | ||
| small.en | 466 MiB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` | | ||
| medium | 1.5 GiB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` | | ||
| medium.en | 1.5 GiB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` | | ||
| large-v1 | 2.9 GiB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` | | ||
| large-v2 | 2.9 GiB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` | | ||
| large-v3 | 2.9 GiB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` | | ||
## Model files for testing purposes | ||
@@ -59,0 +67,0 @@ |
@@ -9,3 +9,3 @@ # whisper.cpp | ||
Stable: [v1.5.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126) | ||
Stable: [v1.5.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.4) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126) | ||
@@ -37,5 +37,6 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model: | ||
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166) | ||
- [x] [docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp) | ||
The entire high-level implementation of the model is contained in [whisper.h](whisper.h) and [whisper.cpp](whisper.cpp). | ||
The rest of the code is part of the [ggml](https://github.com/ggerganov/ggml) machine learning library. | ||
The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library. | ||
@@ -65,12 +66,14 @@ Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications. | ||
The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD | ||
intrinsics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since | ||
the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products. | ||
The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD intrinsics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products. | ||
## Quick start | ||
First clone the repository. | ||
First clone the repository: | ||
Then, download one of the Whisper models converted in [ggml format](models). For example: | ||
```bash | ||
git clone https://github.com/ggerganov/whisper.cpp.git | ||
``` | ||
Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example: | ||
```bash | ||
@@ -80,4 +83,2 @@ bash ./models/download-ggml-model.sh base.en | ||
If you wish to convert the Whisper models to ggml format yourself, instructions are in [models/README.md](models/README.md). | ||
Now build the [main](examples/main) example and transcribe an audio file like this: | ||
@@ -97,3 +98,3 @@ | ||
```java | ||
```text | ||
$ make base.en | ||
@@ -214,3 +215,3 @@ | ||
```java | ||
```bash | ||
ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav | ||
@@ -247,5 +248,5 @@ ``` | ||
| Model | Disk | Mem | | ||
| --- | --- | --- | | ||
| tiny | 75 MiB | ~273 MB | | ||
| Model | Disk | Mem | | ||
| ------ | ------- | ------- | | ||
| tiny | 75 MiB | ~273 MB | | ||
| base | 142 MiB | ~388 MB | | ||
@@ -287,3 +288,4 @@ | small | 466 MiB | ~852 MB | | ||
- Python 3.10 is recommended. | ||
- [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step: | ||
- MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination. | ||
- [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step: | ||
- To create an environment, use: `conda create -n py310-whisper python=3.10 -y` | ||
@@ -314,4 +316,4 @@ - To activate the environment, use: `conda activate py310-whisper` | ||
```bash | ||
./main -m models/ggml-base.en.bin -f samples/jfk.wav | ||
```text | ||
$ ./main -m models/ggml-base.en.bin -f samples/jfk.wav | ||
@@ -344,3 +346,4 @@ ... | ||
Windows: | ||
``` | ||
```powershell | ||
cd models | ||
@@ -350,7 +353,8 @@ python -m venv openvino_conv_env | ||
python -m pip install --upgrade pip | ||
pip install -r openvino-conversion-requirements.txt | ||
pip install -r requirements-openvino.txt | ||
``` | ||
Linux and macOS: | ||
``` | ||
```bash | ||
cd models | ||
@@ -360,3 +364,3 @@ python3 -m venv openvino_conv_env | ||
python -m pip install --upgrade pip | ||
pip install -r openvino-conversion-requirements.txt | ||
pip install -r requirements-openvino.txt | ||
``` | ||
@@ -370,3 +374,3 @@ | ||
This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as ggml models, as that | ||
This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that | ||
is the default location that the OpenVINO extension will search at runtime. | ||
@@ -381,2 +385,3 @@ | ||
Linux: | ||
```bash | ||
@@ -387,3 +392,4 @@ source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh | ||
Windows (cmd): | ||
``` | ||
```powershell | ||
C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat | ||
@@ -393,2 +399,3 @@ ``` | ||
And then build the project using cmake: | ||
```bash | ||
@@ -400,5 +407,6 @@ cmake -B build -DWHISPER_OPENVINO=1 | ||
- Run the examples as usual. For example: | ||
```bash | ||
./main -m models/ggml-base.en.bin -f samples/jfk.wav | ||
```text | ||
$ ./main -m models/ggml-base.en.bin -f samples/jfk.wav | ||
... | ||
@@ -426,7 +434,7 @@ | ||
Now build `whisper.cpp` with cuBLAS support: | ||
Now build `whisper.cpp` with CUDA support: | ||
``` | ||
make clean | ||
WHISPER_CUBLAS=1 make -j | ||
WHISPER_CUDA=1 make -j | ||
``` | ||
@@ -454,3 +462,2 @@ | ||
Run all the examples as usual. | ||
@@ -470,2 +477,34 @@ | ||
## Docker | ||
### Prerequisites | ||
- Docker must be installed and running on your system. | ||
- Create a folder to store big models & intermediate files (ex. /whisper/models) | ||
### Images | ||
We have two Docker images available for this project: | ||
1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`) | ||
2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`) | ||
### Usage | ||
```shell | ||
# download model and persist it in a local folder | ||
docker run -it --rm \ | ||
-v path/to/models:/models \ | ||
whisper.cpp:main "./models/download-ggml-model.sh base /models" | ||
# transcribe an audio file | ||
docker run -it --rm \ | ||
-v path/to/models:/models \ | ||
-v path/to/audios:/audios \ | ||
whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav" | ||
# transcribe an audio file in samples folder | ||
docker run -it --rm \ | ||
-v path/to/models:/models \ | ||
whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav" | ||
``` | ||
## Limitations | ||
@@ -483,3 +522,3 @@ | ||
```java | ||
```text | ||
$ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8 | ||
@@ -556,2 +595,3 @@ | ||
``` | ||
</details> | ||
@@ -565,3 +605,3 @@ | ||
```java | ||
```bash | ||
make stream | ||
@@ -578,3 +618,3 @@ ./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000 | ||
```java | ||
```bash | ||
./main -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors | ||
@@ -589,4 +629,4 @@ ``` | ||
```java | ||
./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16 | ||
```text | ||
$ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16 | ||
@@ -614,4 +654,4 @@ whisper_model_load: loading model from './models/ggml-base.en.bin' | ||
```java | ||
./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1 | ||
```text | ||
$ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1 | ||
@@ -686,3 +726,3 @@ whisper_model_load: loading model from './models/ggml-base.en.bin' | ||
```java | ||
```bash | ||
./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts | ||
@@ -697,3 +737,3 @@ source ./samples/jfk.wav.wts | ||
```java | ||
```bash | ||
./main -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts | ||
@@ -708,3 +748,3 @@ source ./samples/mm0.wav.wts | ||
```java | ||
```bash | ||
./main -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts | ||
@@ -723,3 +763,3 @@ source ./samples/gb0.wav.wts | ||
```java | ||
```bash | ||
./extra/bench-wts.sh samples/jfk.wav | ||
@@ -753,5 +793,4 @@ ffplay ./samples/jfk.wav.all.mp4 | ||
## `ggml` format | ||
## ggml format | ||
The original models are converted to a custom binary format. This allows to pack everything needed into a single file: | ||
@@ -770,24 +809,23 @@ | ||
For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or the README | ||
in [models](models). | ||
For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md). | ||
## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings) | ||
- [X] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310) | ||
- [X] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309) | ||
- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310) | ||
- [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309) | ||
- React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn) | ||
- [X] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312) | ||
- [X] Java: | ||
- [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312) | ||
- [x] Java: | ||
- [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni) | ||
- [X] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507) | ||
- [X] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313) | ||
- [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507) | ||
- [x] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313) | ||
- [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper) | ||
- [X] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422) | ||
- [x] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422) | ||
- [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net) | ||
- [NickDarvey/whisper](https://github.com/NickDarvey/whisper) | ||
- [X] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9) | ||
- [x] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9) | ||
- [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython) | ||
- [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11) | ||
- [X] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper) | ||
- [X] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity) | ||
- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper) | ||
- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity) | ||
@@ -799,18 +837,19 @@ ## Examples | ||
| Example | Web | Description | | ||
| --- | --- | --- | | ||
| [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper | | ||
| [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine | | ||
| [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture | | ||
| [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic | | ||
| [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot | | ||
| [talk-llama](examples/talk-llama) | | Talk with a LLaMA bot | | ||
| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp | | ||
| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp | | ||
| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp | | ||
| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim | | ||
| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture | | ||
| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) | | ||
| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) | | ||
| [server](examples/server) | | HTTP transcription server with OAI-like API | | ||
| Example | Web | Description | | ||
| --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | | ||
| [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper | | ||
| [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine | | ||
| [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture | | ||
| [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic | | ||
| [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess | | ||
| [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot | | ||
| [talk-llama](examples/talk-llama) | | Talk with a LLaMA bot | | ||
| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp | | ||
| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp | | ||
| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp | | ||
| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim | | ||
| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture | | ||
| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) | | ||
| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) | | ||
| [server](examples/server) | | HTTP transcription server with OAI-like API | | ||
@@ -817,0 +856,0 @@ ## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions) |
{ | ||
"name": "@pr0gramm/fluester", | ||
"version": "0.6.0", | ||
"version": "0.6.1", | ||
"license": "MIT", | ||
@@ -30,2 +30,3 @@ "description": "Node.js bindings for OpenAI's Whisper. Optimized for CPU.", | ||
"bun": "^1.1.2", | ||
"lefthook": "^1.6.8", | ||
"typedoc": "^0.25.13", | ||
@@ -32,0 +33,0 @@ "typescript": "^5.4.4" |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
6029609
203
6832
7