New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

@ricky0123/vad-web

Package Overview
Dependencies
Maintainers
1
Versions
23
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@ricky0123/vad-web - npm Package Compare versions

Comparing version 0.0.14 to 0.0.15

15

dist/_common/frame-processor.d.ts

@@ -31,2 +31,6 @@ import { SpeechProbabilities } from "./models";

minSpeechFrames: number;
/**
* If true, when the user pauses the VAD, it may trigger `onSpeechEnd`.
*/
submitUserSpeechOnPause: boolean;
}

@@ -60,3 +64,12 @@ export declare const defaultFrameProcessorOptions: FrameProcessorOptions;

reset: () => void;
pause: () => void;
pause: () => {
msg: Message;
audio: Float32Array;
} | {
msg: Message;
audio?: undefined;
} | {
msg?: undefined;
audio?: undefined;
};
resume: () => void;

@@ -63,0 +76,0 @@ endSegment: () => {

@@ -18,2 +18,3 @@ "use strict";

minSpeechFrames: 3,
submitUserSpeechOnPause: false,
};

@@ -68,3 +69,9 @@ function validateOptions(options) {

this.active = false;
this.reset();
if (this.options.submitUserSpeechOnPause) {
return this.endSegment();
}
else {
this.reset();
return {};
}
};

@@ -71,0 +78,0 @@ this.resume = () => {

@@ -33,2 +33,3 @@ "use strict";

minSpeechFrames: this.options.minSpeechFrames,
submitUserSpeechOnPause: this.options.submitUserSpeechOnPause,
});

@@ -35,0 +36,0 @@ this.frameProcessor.resume();

6

dist/bundle.dev.js

@@ -29,3 +29,3 @@ /*

eval("\n/*\nSome of this code, together with the default options found in index.ts,\nwere taken (or took inspiration) from https://github.com/snakers4/silero-vad\n*/\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.FrameProcessor = exports.validateOptions = exports.defaultFrameProcessorOptions = void 0;\nconst messages_1 = __webpack_require__(/*! ./messages */ \"./dist/_common/messages.js\");\nconst logging_1 = __webpack_require__(/*! ./logging */ \"./dist/_common/logging.js\");\nconst RECOMMENDED_FRAME_SAMPLES = [512, 1024, 1536];\nexports.defaultFrameProcessorOptions = {\n positiveSpeechThreshold: 0.5,\n negativeSpeechThreshold: 0.5 - 0.15,\n preSpeechPadFrames: 1,\n redemptionFrames: 8,\n frameSamples: 1536,\n minSpeechFrames: 3,\n};\nfunction validateOptions(options) {\n if (!RECOMMENDED_FRAME_SAMPLES.includes(options.frameSamples)) {\n logging_1.log.warn(\"You are using an unusual frame size\");\n }\n if (options.positiveSpeechThreshold < 0 ||\n options.negativeSpeechThreshold > 1) {\n logging_1.log.error(\"postiveSpeechThreshold should be a number between 0 and 1\");\n }\n if (options.negativeSpeechThreshold < 0 ||\n options.negativeSpeechThreshold > options.positiveSpeechThreshold) {\n logging_1.log.error(\"negativeSpeechThreshold should be between 0 and postiveSpeechThreshold\");\n }\n if (options.preSpeechPadFrames < 0) {\n logging_1.log.error(\"preSpeechPadFrames should be positive\");\n }\n if (options.redemptionFrames < 0) {\n logging_1.log.error(\"preSpeechPadFrames should be positive\");\n }\n}\nexports.validateOptions = validateOptions;\nconst concatArrays = (arrays) => {\n const sizes = arrays.reduce((out, next) => {\n out.push(out.at(-1) + next.length);\n return out;\n }, [0]);\n const outArray = new Float32Array(sizes.at(-1));\n arrays.forEach((arr, index) => {\n const place = sizes[index];\n outArray.set(arr, place);\n });\n return outArray;\n};\nclass FrameProcessor {\n constructor(modelProcessFunc, modelResetFunc, options) {\n this.modelProcessFunc = modelProcessFunc;\n this.modelResetFunc = modelResetFunc;\n this.options = options;\n this.speaking = false;\n this.redemptionCounter = 0;\n this.active = false;\n this.reset = () => {\n this.speaking = false;\n this.audioBuffer = [];\n this.modelResetFunc();\n this.redemptionCounter = 0;\n };\n this.pause = () => {\n this.active = false;\n this.reset();\n };\n this.resume = () => {\n this.active = true;\n };\n this.endSegment = () => {\n const audioBuffer = this.audioBuffer;\n this.audioBuffer = [];\n const speaking = this.speaking;\n this.reset();\n const speechFrameCount = audioBuffer.reduce((acc, item) => {\n return acc + +item.isSpeech;\n }, 0);\n if (speaking) {\n if (speechFrameCount >= this.options.minSpeechFrames) {\n const audio = concatArrays(audioBuffer.map((item) => item.frame));\n return { msg: messages_1.Message.SpeechEnd, audio };\n }\n else {\n return { msg: messages_1.Message.VADMisfire };\n }\n }\n return {};\n };\n this.process = async (frame) => {\n if (!this.active) {\n return {};\n }\n const probs = await this.modelProcessFunc(frame);\n this.audioBuffer.push({\n frame,\n isSpeech: probs.isSpeech >= this.options.positiveSpeechThreshold,\n });\n if (probs.isSpeech >= this.options.positiveSpeechThreshold &&\n this.redemptionCounter) {\n this.redemptionCounter = 0;\n }\n if (probs.isSpeech >= this.options.positiveSpeechThreshold &&\n !this.speaking) {\n this.speaking = true;\n return { probs, msg: messages_1.Message.SpeechStart };\n }\n if (probs.isSpeech < this.options.negativeSpeechThreshold &&\n this.speaking &&\n ++this.redemptionCounter >= this.options.redemptionFrames) {\n this.redemptionCounter = 0;\n this.speaking = false;\n const audioBuffer = this.audioBuffer;\n this.audioBuffer = [];\n const speechFrameCount = audioBuffer.reduce((acc, item) => {\n return acc + +item.isSpeech;\n }, 0);\n if (speechFrameCount >= this.options.minSpeechFrames) {\n const audio = concatArrays(audioBuffer.map((item) => item.frame));\n return { probs, msg: messages_1.Message.SpeechEnd, audio };\n }\n else {\n return { probs, msg: messages_1.Message.VADMisfire };\n }\n }\n if (!this.speaking) {\n while (this.audioBuffer.length > this.options.preSpeechPadFrames) {\n this.audioBuffer.shift();\n }\n }\n return { probs };\n };\n this.audioBuffer = [];\n this.reset();\n }\n}\nexports.FrameProcessor = FrameProcessor;\n//# sourceMappingURL=frame-processor.js.map\n\n//# sourceURL=webpack://vad/./dist/_common/frame-processor.js?");
eval("\n/*\nSome of this code, together with the default options found in index.ts,\nwere taken (or took inspiration) from https://github.com/snakers4/silero-vad\n*/\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.FrameProcessor = exports.validateOptions = exports.defaultFrameProcessorOptions = void 0;\nconst messages_1 = __webpack_require__(/*! ./messages */ \"./dist/_common/messages.js\");\nconst logging_1 = __webpack_require__(/*! ./logging */ \"./dist/_common/logging.js\");\nconst RECOMMENDED_FRAME_SAMPLES = [512, 1024, 1536];\nexports.defaultFrameProcessorOptions = {\n positiveSpeechThreshold: 0.5,\n negativeSpeechThreshold: 0.5 - 0.15,\n preSpeechPadFrames: 1,\n redemptionFrames: 8,\n frameSamples: 1536,\n minSpeechFrames: 3,\n submitUserSpeechOnPause: false,\n};\nfunction validateOptions(options) {\n if (!RECOMMENDED_FRAME_SAMPLES.includes(options.frameSamples)) {\n logging_1.log.warn(\"You are using an unusual frame size\");\n }\n if (options.positiveSpeechThreshold < 0 ||\n options.negativeSpeechThreshold > 1) {\n logging_1.log.error(\"postiveSpeechThreshold should be a number between 0 and 1\");\n }\n if (options.negativeSpeechThreshold < 0 ||\n options.negativeSpeechThreshold > options.positiveSpeechThreshold) {\n logging_1.log.error(\"negativeSpeechThreshold should be between 0 and postiveSpeechThreshold\");\n }\n if (options.preSpeechPadFrames < 0) {\n logging_1.log.error(\"preSpeechPadFrames should be positive\");\n }\n if (options.redemptionFrames < 0) {\n logging_1.log.error(\"preSpeechPadFrames should be positive\");\n }\n}\nexports.validateOptions = validateOptions;\nconst concatArrays = (arrays) => {\n const sizes = arrays.reduce((out, next) => {\n out.push(out.at(-1) + next.length);\n return out;\n }, [0]);\n const outArray = new Float32Array(sizes.at(-1));\n arrays.forEach((arr, index) => {\n const place = sizes[index];\n outArray.set(arr, place);\n });\n return outArray;\n};\nclass FrameProcessor {\n constructor(modelProcessFunc, modelResetFunc, options) {\n this.modelProcessFunc = modelProcessFunc;\n this.modelResetFunc = modelResetFunc;\n this.options = options;\n this.speaking = false;\n this.redemptionCounter = 0;\n this.active = false;\n this.reset = () => {\n this.speaking = false;\n this.audioBuffer = [];\n this.modelResetFunc();\n this.redemptionCounter = 0;\n };\n this.pause = () => {\n this.active = false;\n if (this.options.submitUserSpeechOnPause) {\n return this.endSegment();\n }\n else {\n this.reset();\n return {};\n }\n };\n this.resume = () => {\n this.active = true;\n };\n this.endSegment = () => {\n const audioBuffer = this.audioBuffer;\n this.audioBuffer = [];\n const speaking = this.speaking;\n this.reset();\n const speechFrameCount = audioBuffer.reduce((acc, item) => {\n return acc + +item.isSpeech;\n }, 0);\n if (speaking) {\n if (speechFrameCount >= this.options.minSpeechFrames) {\n const audio = concatArrays(audioBuffer.map((item) => item.frame));\n return { msg: messages_1.Message.SpeechEnd, audio };\n }\n else {\n return { msg: messages_1.Message.VADMisfire };\n }\n }\n return {};\n };\n this.process = async (frame) => {\n if (!this.active) {\n return {};\n }\n const probs = await this.modelProcessFunc(frame);\n this.audioBuffer.push({\n frame,\n isSpeech: probs.isSpeech >= this.options.positiveSpeechThreshold,\n });\n if (probs.isSpeech >= this.options.positiveSpeechThreshold &&\n this.redemptionCounter) {\n this.redemptionCounter = 0;\n }\n if (probs.isSpeech >= this.options.positiveSpeechThreshold &&\n !this.speaking) {\n this.speaking = true;\n return { probs, msg: messages_1.Message.SpeechStart };\n }\n if (probs.isSpeech < this.options.negativeSpeechThreshold &&\n this.speaking &&\n ++this.redemptionCounter >= this.options.redemptionFrames) {\n this.redemptionCounter = 0;\n this.speaking = false;\n const audioBuffer = this.audioBuffer;\n this.audioBuffer = [];\n const speechFrameCount = audioBuffer.reduce((acc, item) => {\n return acc + +item.isSpeech;\n }, 0);\n if (speechFrameCount >= this.options.minSpeechFrames) {\n const audio = concatArrays(audioBuffer.map((item) => item.frame));\n return { probs, msg: messages_1.Message.SpeechEnd, audio };\n }\n else {\n return { probs, msg: messages_1.Message.VADMisfire };\n }\n }\n if (!this.speaking) {\n while (this.audioBuffer.length > this.options.preSpeechPadFrames) {\n this.audioBuffer.shift();\n }\n }\n return { probs };\n };\n this.audioBuffer = [];\n this.reset();\n }\n}\nexports.FrameProcessor = FrameProcessor;\n//# sourceMappingURL=frame-processor.js.map\n\n//# sourceURL=webpack://vad/./dist/_common/frame-processor.js?");

@@ -80,3 +80,3 @@ /***/ }),

eval("\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.PlatformAgnosticNonRealTimeVAD = exports.defaultNonRealTimeVADOptions = void 0;\nconst frame_processor_1 = __webpack_require__(/*! ./frame-processor */ \"./dist/_common/frame-processor.js\");\nconst messages_1 = __webpack_require__(/*! ./messages */ \"./dist/_common/messages.js\");\nconst models_1 = __webpack_require__(/*! ./models */ \"./dist/_common/models.js\");\nconst resampler_1 = __webpack_require__(/*! ./resampler */ \"./dist/_common/resampler.js\");\nexports.defaultNonRealTimeVADOptions = {\n ...frame_processor_1.defaultFrameProcessorOptions,\n};\nclass PlatformAgnosticNonRealTimeVAD {\n static async _new(modelFetcher, ort, options = {}) {\n const vad = new this(modelFetcher, ort, {\n ...exports.defaultNonRealTimeVADOptions,\n ...options,\n });\n await vad.init();\n return vad;\n }\n constructor(modelFetcher, ort, options) {\n this.modelFetcher = modelFetcher;\n this.ort = ort;\n this.options = options;\n this.init = async () => {\n const model = await models_1.Silero.new(this.ort, this.modelFetcher);\n this.frameProcessor = new frame_processor_1.FrameProcessor(model.process, model.reset_state, {\n frameSamples: this.options.frameSamples,\n positiveSpeechThreshold: this.options.positiveSpeechThreshold,\n negativeSpeechThreshold: this.options.negativeSpeechThreshold,\n redemptionFrames: this.options.redemptionFrames,\n preSpeechPadFrames: this.options.preSpeechPadFrames,\n minSpeechFrames: this.options.minSpeechFrames,\n });\n this.frameProcessor.resume();\n };\n this.run = async function* (inputAudio, sampleRate) {\n const resamplerOptions = {\n nativeSampleRate: sampleRate,\n targetSampleRate: 16000,\n targetFrameSize: this.options.frameSamples,\n };\n const resampler = new resampler_1.Resampler(resamplerOptions);\n const frames = resampler.process(inputAudio);\n let start, end;\n for (const i of [...Array(frames.length)].keys()) {\n const f = frames[i];\n const { msg, audio } = await this.frameProcessor.process(f);\n switch (msg) {\n case messages_1.Message.SpeechStart:\n start = (i * this.options.frameSamples) / 16;\n break;\n case messages_1.Message.SpeechEnd:\n end = ((i + 1) * this.options.frameSamples) / 16;\n // @ts-ignore\n yield { audio, start, end };\n break;\n default:\n break;\n }\n }\n const { msg, audio } = this.frameProcessor.endSegment();\n if (msg == messages_1.Message.SpeechEnd) {\n yield {\n audio,\n // @ts-ignore\n start,\n end: (frames.length * this.options.frameSamples) / 16,\n };\n }\n };\n (0, frame_processor_1.validateOptions)(options);\n }\n}\nexports.PlatformAgnosticNonRealTimeVAD = PlatformAgnosticNonRealTimeVAD;\n//# sourceMappingURL=non-real-time-vad.js.map\n\n//# sourceURL=webpack://vad/./dist/_common/non-real-time-vad.js?");
eval("\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.PlatformAgnosticNonRealTimeVAD = exports.defaultNonRealTimeVADOptions = void 0;\nconst frame_processor_1 = __webpack_require__(/*! ./frame-processor */ \"./dist/_common/frame-processor.js\");\nconst messages_1 = __webpack_require__(/*! ./messages */ \"./dist/_common/messages.js\");\nconst models_1 = __webpack_require__(/*! ./models */ \"./dist/_common/models.js\");\nconst resampler_1 = __webpack_require__(/*! ./resampler */ \"./dist/_common/resampler.js\");\nexports.defaultNonRealTimeVADOptions = {\n ...frame_processor_1.defaultFrameProcessorOptions,\n};\nclass PlatformAgnosticNonRealTimeVAD {\n static async _new(modelFetcher, ort, options = {}) {\n const vad = new this(modelFetcher, ort, {\n ...exports.defaultNonRealTimeVADOptions,\n ...options,\n });\n await vad.init();\n return vad;\n }\n constructor(modelFetcher, ort, options) {\n this.modelFetcher = modelFetcher;\n this.ort = ort;\n this.options = options;\n this.init = async () => {\n const model = await models_1.Silero.new(this.ort, this.modelFetcher);\n this.frameProcessor = new frame_processor_1.FrameProcessor(model.process, model.reset_state, {\n frameSamples: this.options.frameSamples,\n positiveSpeechThreshold: this.options.positiveSpeechThreshold,\n negativeSpeechThreshold: this.options.negativeSpeechThreshold,\n redemptionFrames: this.options.redemptionFrames,\n preSpeechPadFrames: this.options.preSpeechPadFrames,\n minSpeechFrames: this.options.minSpeechFrames,\n submitUserSpeechOnPause: this.options.submitUserSpeechOnPause,\n });\n this.frameProcessor.resume();\n };\n this.run = async function* (inputAudio, sampleRate) {\n const resamplerOptions = {\n nativeSampleRate: sampleRate,\n targetSampleRate: 16000,\n targetFrameSize: this.options.frameSamples,\n };\n const resampler = new resampler_1.Resampler(resamplerOptions);\n const frames = resampler.process(inputAudio);\n let start, end;\n for (const i of [...Array(frames.length)].keys()) {\n const f = frames[i];\n const { msg, audio } = await this.frameProcessor.process(f);\n switch (msg) {\n case messages_1.Message.SpeechStart:\n start = (i * this.options.frameSamples) / 16;\n break;\n case messages_1.Message.SpeechEnd:\n end = ((i + 1) * this.options.frameSamples) / 16;\n // @ts-ignore\n yield { audio, start, end };\n break;\n default:\n break;\n }\n }\n const { msg, audio } = this.frameProcessor.endSegment();\n if (msg == messages_1.Message.SpeechEnd) {\n yield {\n audio,\n // @ts-ignore\n start,\n end: (frames.length * this.options.frameSamples) / 16,\n };\n }\n };\n (0, frame_processor_1.validateOptions)(options);\n }\n}\nexports.PlatformAgnosticNonRealTimeVAD = PlatformAgnosticNonRealTimeVAD;\n//# sourceMappingURL=non-real-time-vad.js.map\n\n//# sourceURL=webpack://vad/./dist/_common/non-real-time-vad.js?");

@@ -141,3 +141,3 @@ /***/ }),

eval("\nvar __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n var desc = Object.getOwnPropertyDescriptor(m, k);\n if (!desc || (\"get\" in desc ? !m.__esModule : desc.writable || desc.configurable)) {\n desc = { enumerable: true, get: function() { return m[k]; } };\n }\n Object.defineProperty(o, k2, desc);\n}) : (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n o[k2] = m[k];\n}));\nvar __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {\n Object.defineProperty(o, \"default\", { enumerable: true, value: v });\n}) : function(o, v) {\n o[\"default\"] = v;\n});\nvar __importStar = (this && this.__importStar) || function (mod) {\n if (mod && mod.__esModule) return mod;\n var result = {};\n if (mod != null) for (var k in mod) if (k !== \"default\" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);\n __setModuleDefault(result, mod);\n return result;\n};\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.AudioNodeVAD = exports.MicVAD = exports.defaultRealTimeVADOptions = void 0;\nconst ort = __importStar(__webpack_require__(/*! onnxruntime-web */ \"onnxruntime-web\"));\nconst _common_1 = __webpack_require__(/*! ./_common */ \"./dist/_common/index.js\");\nconst asset_path_1 = __webpack_require__(/*! ./asset-path */ \"./dist/asset-path.js\");\nconst default_model_fetcher_1 = __webpack_require__(/*! ./default-model-fetcher */ \"./dist/default-model-fetcher.js\");\nexports.defaultRealTimeVADOptions = {\n ..._common_1.defaultFrameProcessorOptions,\n onFrameProcessed: (probabilities) => { },\n onVADMisfire: () => {\n _common_1.log.debug(\"VAD misfire\");\n },\n onSpeechStart: () => {\n _common_1.log.debug(\"Detected speech start\");\n },\n onSpeechEnd: () => {\n _common_1.log.debug(\"Detected speech end\");\n },\n workletURL: (0, asset_path_1.assetPath)(\"vad.worklet.bundle.min.js\"),\n modelURL: (0, asset_path_1.assetPath)(\"silero_vad.onnx\"),\n modelFetcher: default_model_fetcher_1.defaultModelFetcher,\n stream: undefined,\n};\nclass MicVAD {\n static async new(options = {}) {\n const fullOptions = {\n ...exports.defaultRealTimeVADOptions,\n ...options,\n };\n (0, _common_1.validateOptions)(fullOptions);\n let stream;\n if (fullOptions.stream === undefined)\n stream = await navigator.mediaDevices.getUserMedia({\n audio: {\n ...fullOptions.additionalAudioConstraints,\n channelCount: 1,\n echoCancellation: true,\n autoGainControl: true,\n noiseSuppression: true,\n },\n });\n else\n stream = fullOptions.stream;\n const audioContext = new AudioContext();\n const sourceNode = new MediaStreamAudioSourceNode(audioContext, {\n mediaStream: stream,\n });\n const audioNodeVAD = await AudioNodeVAD.new(audioContext, fullOptions);\n audioNodeVAD.receive(sourceNode);\n return new MicVAD(fullOptions, audioContext, stream, audioNodeVAD, sourceNode);\n }\n constructor(options, audioContext, stream, audioNodeVAD, sourceNode, listening = false) {\n this.options = options;\n this.audioContext = audioContext;\n this.stream = stream;\n this.audioNodeVAD = audioNodeVAD;\n this.sourceNode = sourceNode;\n this.listening = listening;\n this.pause = () => {\n this.audioNodeVAD.pause();\n this.listening = false;\n };\n this.start = () => {\n this.audioNodeVAD.start();\n this.listening = true;\n };\n this.destroy = () => {\n if (this.listening) {\n this.pause();\n }\n this.sourceNode.disconnect();\n this.audioNodeVAD.destroy();\n this.audioContext.close();\n };\n }\n}\nexports.MicVAD = MicVAD;\nclass AudioNodeVAD {\n static async new(ctx, options = {}) {\n const fullOptions = {\n ...exports.defaultRealTimeVADOptions,\n ...options,\n };\n (0, _common_1.validateOptions)(fullOptions);\n await ctx.audioWorklet.addModule(fullOptions.workletURL);\n const vadNode = new AudioWorkletNode(ctx, \"vad-helper-worklet\", {\n processorOptions: {\n frameSamples: fullOptions.frameSamples,\n },\n });\n const model = await _common_1.Silero.new(ort, () => fullOptions.modelFetcher(fullOptions.modelURL));\n const frameProcessor = new _common_1.FrameProcessor(model.process, model.reset_state, {\n frameSamples: fullOptions.frameSamples,\n positiveSpeechThreshold: fullOptions.positiveSpeechThreshold,\n negativeSpeechThreshold: fullOptions.negativeSpeechThreshold,\n redemptionFrames: fullOptions.redemptionFrames,\n preSpeechPadFrames: fullOptions.preSpeechPadFrames,\n minSpeechFrames: fullOptions.minSpeechFrames,\n });\n const audioNodeVAD = new AudioNodeVAD(ctx, fullOptions, frameProcessor, vadNode);\n vadNode.port.onmessage = async (ev) => {\n switch (ev.data?.message) {\n case _common_1.Message.AudioFrame:\n const buffer = ev.data.data;\n const frame = new Float32Array(buffer);\n await audioNodeVAD.processFrame(frame);\n break;\n default:\n break;\n }\n };\n return audioNodeVAD;\n }\n constructor(ctx, options, frameProcessor, entryNode) {\n this.ctx = ctx;\n this.options = options;\n this.frameProcessor = frameProcessor;\n this.entryNode = entryNode;\n this.pause = () => {\n this.frameProcessor.pause();\n };\n this.start = () => {\n this.frameProcessor.resume();\n };\n this.receive = (node) => {\n node.connect(this.entryNode);\n };\n this.processFrame = async (frame) => {\n const { probs, msg, audio } = await this.frameProcessor.process(frame);\n if (probs !== undefined) {\n this.options.onFrameProcessed(probs);\n }\n switch (msg) {\n case _common_1.Message.SpeechStart:\n this.options.onSpeechStart();\n break;\n case _common_1.Message.VADMisfire:\n this.options.onVADMisfire();\n break;\n case _common_1.Message.SpeechEnd:\n this.options.onSpeechEnd(audio);\n break;\n default:\n break;\n }\n };\n this.destroy = () => {\n this.entryNode.port.postMessage({\n message: _common_1.Message.SpeechStop,\n });\n this.entryNode.disconnect();\n };\n }\n}\nexports.AudioNodeVAD = AudioNodeVAD;\n//# sourceMappingURL=real-time-vad.js.map\n\n//# sourceURL=webpack://vad/./dist/real-time-vad.js?");
eval("\nvar __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n var desc = Object.getOwnPropertyDescriptor(m, k);\n if (!desc || (\"get\" in desc ? !m.__esModule : desc.writable || desc.configurable)) {\n desc = { enumerable: true, get: function() { return m[k]; } };\n }\n Object.defineProperty(o, k2, desc);\n}) : (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n o[k2] = m[k];\n}));\nvar __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {\n Object.defineProperty(o, \"default\", { enumerable: true, value: v });\n}) : function(o, v) {\n o[\"default\"] = v;\n});\nvar __importStar = (this && this.__importStar) || function (mod) {\n if (mod && mod.__esModule) return mod;\n var result = {};\n if (mod != null) for (var k in mod) if (k !== \"default\" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);\n __setModuleDefault(result, mod);\n return result;\n};\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.AudioNodeVAD = exports.MicVAD = exports.defaultRealTimeVADOptions = void 0;\nconst ort = __importStar(__webpack_require__(/*! onnxruntime-web */ \"onnxruntime-web\"));\nconst _common_1 = __webpack_require__(/*! ./_common */ \"./dist/_common/index.js\");\nconst asset_path_1 = __webpack_require__(/*! ./asset-path */ \"./dist/asset-path.js\");\nconst default_model_fetcher_1 = __webpack_require__(/*! ./default-model-fetcher */ \"./dist/default-model-fetcher.js\");\nexports.defaultRealTimeVADOptions = {\n ..._common_1.defaultFrameProcessorOptions,\n onFrameProcessed: (probabilities) => { },\n onVADMisfire: () => {\n _common_1.log.debug(\"VAD misfire\");\n },\n onSpeechStart: () => {\n _common_1.log.debug(\"Detected speech start\");\n },\n onSpeechEnd: () => {\n _common_1.log.debug(\"Detected speech end\");\n },\n workletURL: (0, asset_path_1.assetPath)(\"vad.worklet.bundle.min.js\"),\n modelURL: (0, asset_path_1.assetPath)(\"silero_vad.onnx\"),\n modelFetcher: default_model_fetcher_1.defaultModelFetcher,\n stream: undefined,\n};\nclass MicVAD {\n static async new(options = {}) {\n const fullOptions = {\n ...exports.defaultRealTimeVADOptions,\n ...options,\n };\n (0, _common_1.validateOptions)(fullOptions);\n let stream;\n if (fullOptions.stream === undefined)\n stream = await navigator.mediaDevices.getUserMedia({\n audio: {\n ...fullOptions.additionalAudioConstraints,\n channelCount: 1,\n echoCancellation: true,\n autoGainControl: true,\n noiseSuppression: true,\n },\n });\n else\n stream = fullOptions.stream;\n const audioContext = new AudioContext();\n const sourceNode = new MediaStreamAudioSourceNode(audioContext, {\n mediaStream: stream,\n });\n const audioNodeVAD = await AudioNodeVAD.new(audioContext, fullOptions);\n audioNodeVAD.receive(sourceNode);\n return new MicVAD(fullOptions, audioContext, stream, audioNodeVAD, sourceNode);\n }\n constructor(options, audioContext, stream, audioNodeVAD, sourceNode, listening = false) {\n this.options = options;\n this.audioContext = audioContext;\n this.stream = stream;\n this.audioNodeVAD = audioNodeVAD;\n this.sourceNode = sourceNode;\n this.listening = listening;\n this.pause = () => {\n this.audioNodeVAD.pause();\n this.listening = false;\n };\n this.start = () => {\n this.audioNodeVAD.start();\n this.listening = true;\n };\n this.destroy = () => {\n if (this.listening) {\n this.pause();\n }\n this.sourceNode.disconnect();\n this.audioNodeVAD.destroy();\n this.audioContext.close();\n };\n }\n}\nexports.MicVAD = MicVAD;\nclass AudioNodeVAD {\n static async new(ctx, options = {}) {\n const fullOptions = {\n ...exports.defaultRealTimeVADOptions,\n ...options,\n };\n (0, _common_1.validateOptions)(fullOptions);\n await ctx.audioWorklet.addModule(fullOptions.workletURL);\n const vadNode = new AudioWorkletNode(ctx, \"vad-helper-worklet\", {\n processorOptions: {\n frameSamples: fullOptions.frameSamples,\n },\n });\n const model = await _common_1.Silero.new(ort, () => fullOptions.modelFetcher(fullOptions.modelURL));\n const frameProcessor = new _common_1.FrameProcessor(model.process, model.reset_state, {\n frameSamples: fullOptions.frameSamples,\n positiveSpeechThreshold: fullOptions.positiveSpeechThreshold,\n negativeSpeechThreshold: fullOptions.negativeSpeechThreshold,\n redemptionFrames: fullOptions.redemptionFrames,\n preSpeechPadFrames: fullOptions.preSpeechPadFrames,\n minSpeechFrames: fullOptions.minSpeechFrames,\n submitUserSpeechOnPause: fullOptions.submitUserSpeechOnPause,\n });\n const audioNodeVAD = new AudioNodeVAD(ctx, fullOptions, frameProcessor, vadNode);\n vadNode.port.onmessage = async (ev) => {\n switch (ev.data?.message) {\n case _common_1.Message.AudioFrame:\n const buffer = ev.data.data;\n const frame = new Float32Array(buffer);\n await audioNodeVAD.processFrame(frame);\n break;\n default:\n break;\n }\n };\n return audioNodeVAD;\n }\n constructor(ctx, options, frameProcessor, entryNode) {\n this.ctx = ctx;\n this.options = options;\n this.frameProcessor = frameProcessor;\n this.entryNode = entryNode;\n this.pause = () => {\n const ev = this.frameProcessor.pause();\n this.handleFrameProcessorEvent(ev);\n };\n this.start = () => {\n this.frameProcessor.resume();\n };\n this.receive = (node) => {\n node.connect(this.entryNode);\n };\n this.processFrame = async (frame) => {\n const ev = await this.frameProcessor.process(frame);\n this.handleFrameProcessorEvent(ev);\n };\n this.handleFrameProcessorEvent = (ev) => {\n if (ev.probs !== undefined) {\n this.options.onFrameProcessed(ev.probs);\n }\n switch (ev.msg) {\n case _common_1.Message.SpeechStart:\n this.options.onSpeechStart();\n break;\n case _common_1.Message.VADMisfire:\n this.options.onVADMisfire();\n break;\n case _common_1.Message.SpeechEnd:\n this.options.onSpeechEnd(ev.audio);\n break;\n default:\n break;\n }\n };\n this.destroy = () => {\n this.entryNode.port.postMessage({\n message: _common_1.Message.SpeechStop,\n });\n this.entryNode.disconnect();\n };\n }\n}\nexports.AudioNodeVAD = AudioNodeVAD;\n//# sourceMappingURL=real-time-vad.js.map\n\n//# sourceURL=webpack://vad/./dist/real-time-vad.js?");

@@ -144,0 +144,0 @@ /***/ }),

@@ -1,1 +0,1 @@

!function(e,t){"object"==typeof exports&&"object"==typeof module?module.exports=t(require("onnxruntime-web")):"function"==typeof define&&define.amd?define(["onnxruntime-web"],t):"object"==typeof exports?exports.vad=t(require("onnxruntime-web")):e.vad=t(e.ort)}(self,(e=>(()=>{"use strict";var t={428:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.FrameProcessor=t.validateOptions=t.defaultFrameProcessorOptions=void 0;const o=s(294),r=s(842),i=[512,1024,1536];t.defaultFrameProcessorOptions={positiveSpeechThreshold:.5,negativeSpeechThreshold:.35,preSpeechPadFrames:1,redemptionFrames:8,frameSamples:1536,minSpeechFrames:3},t.validateOptions=function(e){i.includes(e.frameSamples)||r.log.warn("You are using an unusual frame size"),(e.positiveSpeechThreshold<0||e.negativeSpeechThreshold>1)&&r.log.error("postiveSpeechThreshold should be a number between 0 and 1"),(e.negativeSpeechThreshold<0||e.negativeSpeechThreshold>e.positiveSpeechThreshold)&&r.log.error("negativeSpeechThreshold should be between 0 and postiveSpeechThreshold"),e.preSpeechPadFrames<0&&r.log.error("preSpeechPadFrames should be positive"),e.redemptionFrames<0&&r.log.error("preSpeechPadFrames should be positive")};const n=e=>{const t=e.reduce(((e,t)=>(e.push(e.at(-1)+t.length),e)),[0]),s=new Float32Array(t.at(-1));return e.forEach(((e,o)=>{const r=t[o];s.set(e,r)})),s};t.FrameProcessor=class{constructor(e,t,s){this.modelProcessFunc=e,this.modelResetFunc=t,this.options=s,this.speaking=!1,this.redemptionCounter=0,this.active=!1,this.reset=()=>{this.speaking=!1,this.audioBuffer=[],this.modelResetFunc(),this.redemptionCounter=0},this.pause=()=>{this.active=!1,this.reset()},this.resume=()=>{this.active=!0},this.endSegment=()=>{const e=this.audioBuffer;this.audioBuffer=[];const t=this.speaking;this.reset();const s=e.reduce(((e,t)=>e+ +t.isSpeech),0);if(t){if(s>=this.options.minSpeechFrames){const t=n(e.map((e=>e.frame)));return{msg:o.Message.SpeechEnd,audio:t}}return{msg:o.Message.VADMisfire}}return{}},this.process=async e=>{if(!this.active)return{};const t=await this.modelProcessFunc(e);if(this.audioBuffer.push({frame:e,isSpeech:t.isSpeech>=this.options.positiveSpeechThreshold}),t.isSpeech>=this.options.positiveSpeechThreshold&&this.redemptionCounter&&(this.redemptionCounter=0),t.isSpeech>=this.options.positiveSpeechThreshold&&!this.speaking)return this.speaking=!0,{probs:t,msg:o.Message.SpeechStart};if(t.isSpeech<this.options.negativeSpeechThreshold&&this.speaking&&++this.redemptionCounter>=this.options.redemptionFrames){this.redemptionCounter=0,this.speaking=!1;const e=this.audioBuffer;if(this.audioBuffer=[],e.reduce(((e,t)=>e+ +t.isSpeech),0)>=this.options.minSpeechFrames){const s=n(e.map((e=>e.frame)));return{probs:t,msg:o.Message.SpeechEnd,audio:s}}return{probs:t,msg:o.Message.VADMisfire}}if(!this.speaking)for(;this.audioBuffer.length>this.options.preSpeechPadFrames;)this.audioBuffer.shift();return{probs:t}},this.audioBuffer=[],this.reset()}}},14:function(e,t,s){var o=this&&this.__createBinding||(Object.create?function(e,t,s,o){void 0===o&&(o=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,o,r)}:function(e,t,s,o){void 0===o&&(o=s),e[o]=t[s]}),r=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),i=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var s in e)"default"!==s&&Object.prototype.hasOwnProperty.call(e,s)&&o(t,e,s);return r(t,e),t},n=this&&this.__exportStar||function(e,t){for(var s in e)"default"===s||Object.prototype.hasOwnProperty.call(t,s)||o(t,e,s)};Object.defineProperty(t,"__esModule",{value:!0}),t.utils=void 0;const a=i(s(26));t.utils={minFramesForTargetMS:a.minFramesForTargetMS,arrayBufferToBase64:a.arrayBufferToBase64,encodeWAV:a.encodeWAV},n(s(405),t),n(s(428),t),n(s(294),t),n(s(842),t),n(s(260),t),n(s(724),t)},842:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.log=t.LOG_PREFIX=void 0,t.LOG_PREFIX="[VAD]";const s=["error","debug","warn"].reduce(((e,s)=>(e[s]=function(e){return(...s)=>{console[e](t.LOG_PREFIX,...s)}}(s),e)),{});t.log=s},294:(e,t)=>{var s;Object.defineProperty(t,"__esModule",{value:!0}),t.Message=void 0,function(e){e.AudioFrame="AUDIO_FRAME",e.SpeechStart="SPEECH_START",e.VADMisfire="VAD_MISFIRE",e.SpeechEnd="SPEECH_END",e.SpeechStop="SPEECH_STOP"}(s||(t.Message=s={}))},260:(e,t,s)=>{var o;Object.defineProperty(t,"__esModule",{value:!0}),t.Silero=void 0;const r=s(842);class i{constructor(e,t){this.ort=e,this.modelFetcher=t,this.init=async()=>{r.log.debug("initializing vad");const e=await this.modelFetcher();this._session=await this.ort.InferenceSession.create(e),this._sr=new this.ort.Tensor("int64",[16000n]),this.reset_state(),r.log.debug("vad is initialized")},this.reset_state=()=>{const e=Array(128).fill(0);this._h=new this.ort.Tensor("float32",e,[2,1,64]),this._c=new this.ort.Tensor("float32",e,[2,1,64])},this.process=async e=>{const t={input:new this.ort.Tensor("float32",e,[1,e.length]),h:this._h,c:this._c,sr:this._sr},s=await this._session.run(t);this._h=s.hn,this._c=s.cn;const[o]=s.output.data;return{notSpeech:1-o,isSpeech:o}}}}t.Silero=i,o=i,i.new=async(e,t)=>{const s=new o(e,t);return await s.init(),s}},405:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.PlatformAgnosticNonRealTimeVAD=t.defaultNonRealTimeVADOptions=void 0;const o=s(428),r=s(294),i=s(260),n=s(724);t.defaultNonRealTimeVADOptions={...o.defaultFrameProcessorOptions},t.PlatformAgnosticNonRealTimeVAD=class{static async _new(e,s,o={}){const r=new this(e,s,{...t.defaultNonRealTimeVADOptions,...o});return await r.init(),r}constructor(e,t,s){this.modelFetcher=e,this.ort=t,this.options=s,this.init=async()=>{const e=await i.Silero.new(this.ort,this.modelFetcher);this.frameProcessor=new o.FrameProcessor(e.process,e.reset_state,{frameSamples:this.options.frameSamples,positiveSpeechThreshold:this.options.positiveSpeechThreshold,negativeSpeechThreshold:this.options.negativeSpeechThreshold,redemptionFrames:this.options.redemptionFrames,preSpeechPadFrames:this.options.preSpeechPadFrames,minSpeechFrames:this.options.minSpeechFrames}),this.frameProcessor.resume()},this.run=async function*(e,t){const s={nativeSampleRate:t,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples},o=new n.Resampler(s).process(e);let i,a;for(const e of[...Array(o.length)].keys()){const t=o[e],{msg:s,audio:n}=await this.frameProcessor.process(t);switch(s){case r.Message.SpeechStart:i=e*this.options.frameSamples/16;break;case r.Message.SpeechEnd:a=(e+1)*this.options.frameSamples/16,yield{audio:n,start:i,end:a}}}const{msg:c,audio:h}=this.frameProcessor.endSegment();c==r.Message.SpeechEnd&&(yield{audio:h,start:i,end:o.length*this.options.frameSamples/16})},(0,o.validateOptions)(s)}}},724:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.Resampler=void 0;const o=s(842);t.Resampler=class{constructor(e){this.options=e,this.process=e=>{const t=[];for(const t of e)this.inputBuffer.push(t);for(;this.inputBuffer.length*this.options.targetSampleRate/this.options.nativeSampleRate>this.options.targetFrameSize;){const e=new Float32Array(this.options.targetFrameSize);let s=0,o=0;for(;s<this.options.targetFrameSize;){let t=0,r=0;for(;o<Math.min(this.inputBuffer.length,(s+1)*this.options.nativeSampleRate/this.options.targetSampleRate);)t+=this.inputBuffer[o],r++,o++;e[s]=t/r,s++}this.inputBuffer=this.inputBuffer.slice(o),t.push(e)}return t},e.nativeSampleRate<16e3&&o.log.error("nativeSampleRate is too low. Should have 16000 = targetSampleRate <= nativeSampleRate"),this.inputBuffer=[]}}},26:(e,t)=>{function s(e,t,s){for(var o=0;o<s.length;o++)e.setUint8(t+o,s.charCodeAt(o))}Object.defineProperty(t,"__esModule",{value:!0}),t.encodeWAV=t.arrayBufferToBase64=t.minFramesForTargetMS=void 0,t.minFramesForTargetMS=function(e,t,s=16e3){return Math.ceil(e*s/1e3/t)},t.arrayBufferToBase64=function(e){for(var t="",s=new Uint8Array(e),o=s.byteLength,r=0;r<o;r++)t+=String.fromCharCode(s[r]);return btoa(t)},t.encodeWAV=function(e,t=3,o=16e3,r=1,i=32){var n=i/8,a=r*n,c=new ArrayBuffer(44+e.length*n),h=new DataView(c);return s(h,0,"RIFF"),h.setUint32(4,36+e.length*n,!0),s(h,8,"WAVE"),s(h,12,"fmt "),h.setUint32(16,16,!0),h.setUint16(20,t,!0),h.setUint16(22,r,!0),h.setUint32(24,o,!0),h.setUint32(28,o*a,!0),h.setUint16(32,a,!0),h.setUint16(34,i,!0),s(h,36,"data"),h.setUint32(40,e.length*n,!0),1===t?function(e,t,s){for(var o=0;o<s.length;o++,t+=2){var r=Math.max(-1,Math.min(1,s[o]));e.setInt16(t,r<0?32768*r:32767*r,!0)}}(h,44,e):function(e,t,s){for(var o=0;o<s.length;o++,t+=4)e.setFloat32(t,s[o],!0)}(h,44,e),c}},485:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.assetPath=void 0;const s="undefined"!=typeof window&&void 0!==window.document?window.document.currentScript:null;let o="";s&&(o=s.src.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/")),t.assetPath=e=>o+e},973:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.defaultModelFetcher=void 0,t.defaultModelFetcher=e=>fetch(e).then((e=>e.arrayBuffer()))},590:function(e,t,s){var o=this&&this.__createBinding||(Object.create?function(e,t,s,o){void 0===o&&(o=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,o,r)}:function(e,t,s,o){void 0===o&&(o=s),e[o]=t[s]}),r=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),i=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var s in e)"default"!==s&&Object.prototype.hasOwnProperty.call(e,s)&&o(t,e,s);return r(t,e),t};Object.defineProperty(t,"__esModule",{value:!0}),t.defaultRealTimeVADOptions=t.AudioNodeVAD=t.MicVAD=t.NonRealTimeVAD=t.Message=t.FrameProcessor=t.utils=t.defaultNonRealTimeVADOptions=void 0;const n=i(s(656)),a=s(14);Object.defineProperty(t,"FrameProcessor",{enumerable:!0,get:function(){return a.FrameProcessor}}),Object.defineProperty(t,"Message",{enumerable:!0,get:function(){return a.Message}});const c=s(787),h=s(973),d=s(485);t.defaultNonRealTimeVADOptions={modelURL:(0,d.assetPath)("silero_vad.onnx"),modelFetcher:h.defaultModelFetcher};class l extends a.PlatformAgnosticNonRealTimeVAD{static async new(e={}){const{modelURL:s,modelFetcher:o}={...t.defaultNonRealTimeVADOptions,...e};return await this._new((()=>o(s)),n,e)}}t.NonRealTimeVAD=l,t.utils={audioFileToArray:c.audioFileToArray,...a.utils};var u=s(746);Object.defineProperty(t,"MicVAD",{enumerable:!0,get:function(){return u.MicVAD}}),Object.defineProperty(t,"AudioNodeVAD",{enumerable:!0,get:function(){return u.AudioNodeVAD}}),Object.defineProperty(t,"defaultRealTimeVADOptions",{enumerable:!0,get:function(){return u.defaultRealTimeVADOptions}})},746:function(e,t,s){var o=this&&this.__createBinding||(Object.create?function(e,t,s,o){void 0===o&&(o=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,o,r)}:function(e,t,s,o){void 0===o&&(o=s),e[o]=t[s]}),r=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),i=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var s in e)"default"!==s&&Object.prototype.hasOwnProperty.call(e,s)&&o(t,e,s);return r(t,e),t};Object.defineProperty(t,"__esModule",{value:!0}),t.AudioNodeVAD=t.MicVAD=t.defaultRealTimeVADOptions=void 0;const n=i(s(656)),a=s(14),c=s(485),h=s(973);t.defaultRealTimeVADOptions={...a.defaultFrameProcessorOptions,onFrameProcessed:e=>{},onVADMisfire:()=>{a.log.debug("VAD misfire")},onSpeechStart:()=>{a.log.debug("Detected speech start")},onSpeechEnd:()=>{a.log.debug("Detected speech end")},workletURL:(0,c.assetPath)("vad.worklet.bundle.min.js"),modelURL:(0,c.assetPath)("silero_vad.onnx"),modelFetcher:h.defaultModelFetcher,stream:void 0};class d{static async new(e={}){const s={...t.defaultRealTimeVADOptions,...e};let o;(0,a.validateOptions)(s),o=void 0===s.stream?await navigator.mediaDevices.getUserMedia({audio:{...s.additionalAudioConstraints,channelCount:1,echoCancellation:!0,autoGainControl:!0,noiseSuppression:!0}}):s.stream;const r=new AudioContext,i=new MediaStreamAudioSourceNode(r,{mediaStream:o}),n=await l.new(r,s);return n.receive(i),new d(s,r,o,n,i)}constructor(e,t,s,o,r,i=!1){this.options=e,this.audioContext=t,this.stream=s,this.audioNodeVAD=o,this.sourceNode=r,this.listening=i,this.pause=()=>{this.audioNodeVAD.pause(),this.listening=!1},this.start=()=>{this.audioNodeVAD.start(),this.listening=!0},this.destroy=()=>{this.listening&&this.pause(),this.sourceNode.disconnect(),this.audioNodeVAD.destroy(),this.audioContext.close()}}}t.MicVAD=d;class l{static async new(e,s={}){const o={...t.defaultRealTimeVADOptions,...s};(0,a.validateOptions)(o),await e.audioWorklet.addModule(o.workletURL);const r=new AudioWorkletNode(e,"vad-helper-worklet",{processorOptions:{frameSamples:o.frameSamples}}),i=await a.Silero.new(n,(()=>o.modelFetcher(o.modelURL))),c=new a.FrameProcessor(i.process,i.reset_state,{frameSamples:o.frameSamples,positiveSpeechThreshold:o.positiveSpeechThreshold,negativeSpeechThreshold:o.negativeSpeechThreshold,redemptionFrames:o.redemptionFrames,preSpeechPadFrames:o.preSpeechPadFrames,minSpeechFrames:o.minSpeechFrames}),h=new l(e,o,c,r);return r.port.onmessage=async e=>{if(e.data?.message===a.Message.AudioFrame){const t=e.data.data,s=new Float32Array(t);await h.processFrame(s)}},h}constructor(e,t,s,o){this.ctx=e,this.options=t,this.frameProcessor=s,this.entryNode=o,this.pause=()=>{this.frameProcessor.pause()},this.start=()=>{this.frameProcessor.resume()},this.receive=e=>{e.connect(this.entryNode)},this.processFrame=async e=>{const{probs:t,msg:s,audio:o}=await this.frameProcessor.process(e);switch(void 0!==t&&this.options.onFrameProcessed(t),s){case a.Message.SpeechStart:this.options.onSpeechStart();break;case a.Message.VADMisfire:this.options.onVADMisfire();break;case a.Message.SpeechEnd:this.options.onSpeechEnd(o)}},this.destroy=()=>{this.entryNode.port.postMessage({message:a.Message.SpeechStop}),this.entryNode.disconnect()}}}t.AudioNodeVAD=l},787:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.audioFileToArray=void 0,t.audioFileToArray=async function(e){const t=new OfflineAudioContext(1,1,44100),s=new FileReader;let o=null;if(await new Promise((r=>{s.addEventListener("loadend",(e=>{const i=s.result;t.decodeAudioData(i,(e=>{o=e,t.startRendering().then((e=>{console.log("Rendering completed successfully"),r()})).catch((e=>{console.error(`Rendering failed: ${e}`)}))}),(e=>{console.log(`Error with decoding audio data: ${e}`)}))})),s.readAsArrayBuffer(e)})),null===o)throw Error("some shit");let r=o,i=new Float32Array(r.length);for(let e=0;e<r.length;e++)for(let t=0;t<r.numberOfChannels;t++)i[e]+=r.getChannelData(t)[e];return{audio:i,sampleRate:r.sampleRate}}},656:t=>{t.exports=e}},s={};return function e(o){var r=s[o];if(void 0!==r)return r.exports;var i=s[o]={exports:{}};return t[o].call(i.exports,i,i.exports,e),i.exports}(590)})()));
!function(e,t){"object"==typeof exports&&"object"==typeof module?module.exports=t(require("onnxruntime-web")):"function"==typeof define&&define.amd?define(["onnxruntime-web"],t):"object"==typeof exports?exports.vad=t(require("onnxruntime-web")):e.vad=t(e.ort)}(self,(e=>(()=>{"use strict";var t={428:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.FrameProcessor=t.validateOptions=t.defaultFrameProcessorOptions=void 0;const o=s(294),r=s(842),i=[512,1024,1536];t.defaultFrameProcessorOptions={positiveSpeechThreshold:.5,negativeSpeechThreshold:.35,preSpeechPadFrames:1,redemptionFrames:8,frameSamples:1536,minSpeechFrames:3,submitUserSpeechOnPause:!1},t.validateOptions=function(e){i.includes(e.frameSamples)||r.log.warn("You are using an unusual frame size"),(e.positiveSpeechThreshold<0||e.negativeSpeechThreshold>1)&&r.log.error("postiveSpeechThreshold should be a number between 0 and 1"),(e.negativeSpeechThreshold<0||e.negativeSpeechThreshold>e.positiveSpeechThreshold)&&r.log.error("negativeSpeechThreshold should be between 0 and postiveSpeechThreshold"),e.preSpeechPadFrames<0&&r.log.error("preSpeechPadFrames should be positive"),e.redemptionFrames<0&&r.log.error("preSpeechPadFrames should be positive")};const n=e=>{const t=e.reduce(((e,t)=>(e.push(e.at(-1)+t.length),e)),[0]),s=new Float32Array(t.at(-1));return e.forEach(((e,o)=>{const r=t[o];s.set(e,r)})),s};t.FrameProcessor=class{constructor(e,t,s){this.modelProcessFunc=e,this.modelResetFunc=t,this.options=s,this.speaking=!1,this.redemptionCounter=0,this.active=!1,this.reset=()=>{this.speaking=!1,this.audioBuffer=[],this.modelResetFunc(),this.redemptionCounter=0},this.pause=()=>(this.active=!1,this.options.submitUserSpeechOnPause?this.endSegment():(this.reset(),{})),this.resume=()=>{this.active=!0},this.endSegment=()=>{const e=this.audioBuffer;this.audioBuffer=[];const t=this.speaking;this.reset();const s=e.reduce(((e,t)=>e+ +t.isSpeech),0);if(t){if(s>=this.options.minSpeechFrames){const t=n(e.map((e=>e.frame)));return{msg:o.Message.SpeechEnd,audio:t}}return{msg:o.Message.VADMisfire}}return{}},this.process=async e=>{if(!this.active)return{};const t=await this.modelProcessFunc(e);if(this.audioBuffer.push({frame:e,isSpeech:t.isSpeech>=this.options.positiveSpeechThreshold}),t.isSpeech>=this.options.positiveSpeechThreshold&&this.redemptionCounter&&(this.redemptionCounter=0),t.isSpeech>=this.options.positiveSpeechThreshold&&!this.speaking)return this.speaking=!0,{probs:t,msg:o.Message.SpeechStart};if(t.isSpeech<this.options.negativeSpeechThreshold&&this.speaking&&++this.redemptionCounter>=this.options.redemptionFrames){this.redemptionCounter=0,this.speaking=!1;const e=this.audioBuffer;if(this.audioBuffer=[],e.reduce(((e,t)=>e+ +t.isSpeech),0)>=this.options.minSpeechFrames){const s=n(e.map((e=>e.frame)));return{probs:t,msg:o.Message.SpeechEnd,audio:s}}return{probs:t,msg:o.Message.VADMisfire}}if(!this.speaking)for(;this.audioBuffer.length>this.options.preSpeechPadFrames;)this.audioBuffer.shift();return{probs:t}},this.audioBuffer=[],this.reset()}}},14:function(e,t,s){var o=this&&this.__createBinding||(Object.create?function(e,t,s,o){void 0===o&&(o=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,o,r)}:function(e,t,s,o){void 0===o&&(o=s),e[o]=t[s]}),r=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),i=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var s in e)"default"!==s&&Object.prototype.hasOwnProperty.call(e,s)&&o(t,e,s);return r(t,e),t},n=this&&this.__exportStar||function(e,t){for(var s in e)"default"===s||Object.prototype.hasOwnProperty.call(t,s)||o(t,e,s)};Object.defineProperty(t,"__esModule",{value:!0}),t.utils=void 0;const a=i(s(26));t.utils={minFramesForTargetMS:a.minFramesForTargetMS,arrayBufferToBase64:a.arrayBufferToBase64,encodeWAV:a.encodeWAV},n(s(405),t),n(s(428),t),n(s(294),t),n(s(842),t),n(s(260),t),n(s(724),t)},842:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.log=t.LOG_PREFIX=void 0,t.LOG_PREFIX="[VAD]";const s=["error","debug","warn"].reduce(((e,s)=>(e[s]=function(e){return(...s)=>{console[e](t.LOG_PREFIX,...s)}}(s),e)),{});t.log=s},294:(e,t)=>{var s;Object.defineProperty(t,"__esModule",{value:!0}),t.Message=void 0,function(e){e.AudioFrame="AUDIO_FRAME",e.SpeechStart="SPEECH_START",e.VADMisfire="VAD_MISFIRE",e.SpeechEnd="SPEECH_END",e.SpeechStop="SPEECH_STOP"}(s||(t.Message=s={}))},260:(e,t,s)=>{var o;Object.defineProperty(t,"__esModule",{value:!0}),t.Silero=void 0;const r=s(842);class i{constructor(e,t){this.ort=e,this.modelFetcher=t,this.init=async()=>{r.log.debug("initializing vad");const e=await this.modelFetcher();this._session=await this.ort.InferenceSession.create(e),this._sr=new this.ort.Tensor("int64",[16000n]),this.reset_state(),r.log.debug("vad is initialized")},this.reset_state=()=>{const e=Array(128).fill(0);this._h=new this.ort.Tensor("float32",e,[2,1,64]),this._c=new this.ort.Tensor("float32",e,[2,1,64])},this.process=async e=>{const t={input:new this.ort.Tensor("float32",e,[1,e.length]),h:this._h,c:this._c,sr:this._sr},s=await this._session.run(t);this._h=s.hn,this._c=s.cn;const[o]=s.output.data;return{notSpeech:1-o,isSpeech:o}}}}t.Silero=i,o=i,i.new=async(e,t)=>{const s=new o(e,t);return await s.init(),s}},405:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.PlatformAgnosticNonRealTimeVAD=t.defaultNonRealTimeVADOptions=void 0;const o=s(428),r=s(294),i=s(260),n=s(724);t.defaultNonRealTimeVADOptions={...o.defaultFrameProcessorOptions},t.PlatformAgnosticNonRealTimeVAD=class{static async _new(e,s,o={}){const r=new this(e,s,{...t.defaultNonRealTimeVADOptions,...o});return await r.init(),r}constructor(e,t,s){this.modelFetcher=e,this.ort=t,this.options=s,this.init=async()=>{const e=await i.Silero.new(this.ort,this.modelFetcher);this.frameProcessor=new o.FrameProcessor(e.process,e.reset_state,{frameSamples:this.options.frameSamples,positiveSpeechThreshold:this.options.positiveSpeechThreshold,negativeSpeechThreshold:this.options.negativeSpeechThreshold,redemptionFrames:this.options.redemptionFrames,preSpeechPadFrames:this.options.preSpeechPadFrames,minSpeechFrames:this.options.minSpeechFrames,submitUserSpeechOnPause:this.options.submitUserSpeechOnPause}),this.frameProcessor.resume()},this.run=async function*(e,t){const s={nativeSampleRate:t,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples},o=new n.Resampler(s).process(e);let i,a;for(const e of[...Array(o.length)].keys()){const t=o[e],{msg:s,audio:n}=await this.frameProcessor.process(t);switch(s){case r.Message.SpeechStart:i=e*this.options.frameSamples/16;break;case r.Message.SpeechEnd:a=(e+1)*this.options.frameSamples/16,yield{audio:n,start:i,end:a}}}const{msg:c,audio:h}=this.frameProcessor.endSegment();c==r.Message.SpeechEnd&&(yield{audio:h,start:i,end:o.length*this.options.frameSamples/16})},(0,o.validateOptions)(s)}}},724:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.Resampler=void 0;const o=s(842);t.Resampler=class{constructor(e){this.options=e,this.process=e=>{const t=[];for(const t of e)this.inputBuffer.push(t);for(;this.inputBuffer.length*this.options.targetSampleRate/this.options.nativeSampleRate>this.options.targetFrameSize;){const e=new Float32Array(this.options.targetFrameSize);let s=0,o=0;for(;s<this.options.targetFrameSize;){let t=0,r=0;for(;o<Math.min(this.inputBuffer.length,(s+1)*this.options.nativeSampleRate/this.options.targetSampleRate);)t+=this.inputBuffer[o],r++,o++;e[s]=t/r,s++}this.inputBuffer=this.inputBuffer.slice(o),t.push(e)}return t},e.nativeSampleRate<16e3&&o.log.error("nativeSampleRate is too low. Should have 16000 = targetSampleRate <= nativeSampleRate"),this.inputBuffer=[]}}},26:(e,t)=>{function s(e,t,s){for(var o=0;o<s.length;o++)e.setUint8(t+o,s.charCodeAt(o))}Object.defineProperty(t,"__esModule",{value:!0}),t.encodeWAV=t.arrayBufferToBase64=t.minFramesForTargetMS=void 0,t.minFramesForTargetMS=function(e,t,s=16e3){return Math.ceil(e*s/1e3/t)},t.arrayBufferToBase64=function(e){for(var t="",s=new Uint8Array(e),o=s.byteLength,r=0;r<o;r++)t+=String.fromCharCode(s[r]);return btoa(t)},t.encodeWAV=function(e,t=3,o=16e3,r=1,i=32){var n=i/8,a=r*n,c=new ArrayBuffer(44+e.length*n),h=new DataView(c);return s(h,0,"RIFF"),h.setUint32(4,36+e.length*n,!0),s(h,8,"WAVE"),s(h,12,"fmt "),h.setUint32(16,16,!0),h.setUint16(20,t,!0),h.setUint16(22,r,!0),h.setUint32(24,o,!0),h.setUint32(28,o*a,!0),h.setUint16(32,a,!0),h.setUint16(34,i,!0),s(h,36,"data"),h.setUint32(40,e.length*n,!0),1===t?function(e,t,s){for(var o=0;o<s.length;o++,t+=2){var r=Math.max(-1,Math.min(1,s[o]));e.setInt16(t,r<0?32768*r:32767*r,!0)}}(h,44,e):function(e,t,s){for(var o=0;o<s.length;o++,t+=4)e.setFloat32(t,s[o],!0)}(h,44,e),c}},485:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.assetPath=void 0;const s="undefined"!=typeof window&&void 0!==window.document?window.document.currentScript:null;let o="";s&&(o=s.src.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/")),t.assetPath=e=>o+e},973:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.defaultModelFetcher=void 0,t.defaultModelFetcher=e=>fetch(e).then((e=>e.arrayBuffer()))},590:function(e,t,s){var o=this&&this.__createBinding||(Object.create?function(e,t,s,o){void 0===o&&(o=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,o,r)}:function(e,t,s,o){void 0===o&&(o=s),e[o]=t[s]}),r=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),i=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var s in e)"default"!==s&&Object.prototype.hasOwnProperty.call(e,s)&&o(t,e,s);return r(t,e),t};Object.defineProperty(t,"__esModule",{value:!0}),t.defaultRealTimeVADOptions=t.AudioNodeVAD=t.MicVAD=t.NonRealTimeVAD=t.Message=t.FrameProcessor=t.utils=t.defaultNonRealTimeVADOptions=void 0;const n=i(s(656)),a=s(14);Object.defineProperty(t,"FrameProcessor",{enumerable:!0,get:function(){return a.FrameProcessor}}),Object.defineProperty(t,"Message",{enumerable:!0,get:function(){return a.Message}});const c=s(787),h=s(973),u=s(485);t.defaultNonRealTimeVADOptions={modelURL:(0,u.assetPath)("silero_vad.onnx"),modelFetcher:h.defaultModelFetcher};class d extends a.PlatformAgnosticNonRealTimeVAD{static async new(e={}){const{modelURL:s,modelFetcher:o}={...t.defaultNonRealTimeVADOptions,...e};return await this._new((()=>o(s)),n,e)}}t.NonRealTimeVAD=d,t.utils={audioFileToArray:c.audioFileToArray,...a.utils};var l=s(746);Object.defineProperty(t,"MicVAD",{enumerable:!0,get:function(){return l.MicVAD}}),Object.defineProperty(t,"AudioNodeVAD",{enumerable:!0,get:function(){return l.AudioNodeVAD}}),Object.defineProperty(t,"defaultRealTimeVADOptions",{enumerable:!0,get:function(){return l.defaultRealTimeVADOptions}})},746:function(e,t,s){var o=this&&this.__createBinding||(Object.create?function(e,t,s,o){void 0===o&&(o=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,o,r)}:function(e,t,s,o){void 0===o&&(o=s),e[o]=t[s]}),r=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),i=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var s in e)"default"!==s&&Object.prototype.hasOwnProperty.call(e,s)&&o(t,e,s);return r(t,e),t};Object.defineProperty(t,"__esModule",{value:!0}),t.AudioNodeVAD=t.MicVAD=t.defaultRealTimeVADOptions=void 0;const n=i(s(656)),a=s(14),c=s(485),h=s(973);t.defaultRealTimeVADOptions={...a.defaultFrameProcessorOptions,onFrameProcessed:e=>{},onVADMisfire:()=>{a.log.debug("VAD misfire")},onSpeechStart:()=>{a.log.debug("Detected speech start")},onSpeechEnd:()=>{a.log.debug("Detected speech end")},workletURL:(0,c.assetPath)("vad.worklet.bundle.min.js"),modelURL:(0,c.assetPath)("silero_vad.onnx"),modelFetcher:h.defaultModelFetcher,stream:void 0};class u{static async new(e={}){const s={...t.defaultRealTimeVADOptions,...e};let o;(0,a.validateOptions)(s),o=void 0===s.stream?await navigator.mediaDevices.getUserMedia({audio:{...s.additionalAudioConstraints,channelCount:1,echoCancellation:!0,autoGainControl:!0,noiseSuppression:!0}}):s.stream;const r=new AudioContext,i=new MediaStreamAudioSourceNode(r,{mediaStream:o}),n=await d.new(r,s);return n.receive(i),new u(s,r,o,n,i)}constructor(e,t,s,o,r,i=!1){this.options=e,this.audioContext=t,this.stream=s,this.audioNodeVAD=o,this.sourceNode=r,this.listening=i,this.pause=()=>{this.audioNodeVAD.pause(),this.listening=!1},this.start=()=>{this.audioNodeVAD.start(),this.listening=!0},this.destroy=()=>{this.listening&&this.pause(),this.sourceNode.disconnect(),this.audioNodeVAD.destroy(),this.audioContext.close()}}}t.MicVAD=u;class d{static async new(e,s={}){const o={...t.defaultRealTimeVADOptions,...s};(0,a.validateOptions)(o),await e.audioWorklet.addModule(o.workletURL);const r=new AudioWorkletNode(e,"vad-helper-worklet",{processorOptions:{frameSamples:o.frameSamples}}),i=await a.Silero.new(n,(()=>o.modelFetcher(o.modelURL))),c=new a.FrameProcessor(i.process,i.reset_state,{frameSamples:o.frameSamples,positiveSpeechThreshold:o.positiveSpeechThreshold,negativeSpeechThreshold:o.negativeSpeechThreshold,redemptionFrames:o.redemptionFrames,preSpeechPadFrames:o.preSpeechPadFrames,minSpeechFrames:o.minSpeechFrames,submitUserSpeechOnPause:o.submitUserSpeechOnPause}),h=new d(e,o,c,r);return r.port.onmessage=async e=>{if(e.data?.message===a.Message.AudioFrame){const t=e.data.data,s=new Float32Array(t);await h.processFrame(s)}},h}constructor(e,t,s,o){this.ctx=e,this.options=t,this.frameProcessor=s,this.entryNode=o,this.pause=()=>{const e=this.frameProcessor.pause();this.handleFrameProcessorEvent(e)},this.start=()=>{this.frameProcessor.resume()},this.receive=e=>{e.connect(this.entryNode)},this.processFrame=async e=>{const t=await this.frameProcessor.process(e);this.handleFrameProcessorEvent(t)},this.handleFrameProcessorEvent=e=>{switch(void 0!==e.probs&&this.options.onFrameProcessed(e.probs),e.msg){case a.Message.SpeechStart:this.options.onSpeechStart();break;case a.Message.VADMisfire:this.options.onVADMisfire();break;case a.Message.SpeechEnd:this.options.onSpeechEnd(e.audio)}},this.destroy=()=>{this.entryNode.port.postMessage({message:a.Message.SpeechStop}),this.entryNode.disconnect()}}}t.AudioNodeVAD=d},787:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.audioFileToArray=void 0,t.audioFileToArray=async function(e){const t=new OfflineAudioContext(1,1,44100),s=new FileReader;let o=null;if(await new Promise((r=>{s.addEventListener("loadend",(e=>{const i=s.result;t.decodeAudioData(i,(e=>{o=e,t.startRendering().then((e=>{console.log("Rendering completed successfully"),r()})).catch((e=>{console.error(`Rendering failed: ${e}`)}))}),(e=>{console.log(`Error with decoding audio data: ${e}`)}))})),s.readAsArrayBuffer(e)})),null===o)throw Error("some shit");let r=o,i=new Float32Array(r.length);for(let e=0;e<r.length;e++)for(let t=0;t<r.numberOfChannels;t++)i[e]+=r.getChannelData(t)[e];return{audio:i,sampleRate:r.sampleRate}}},656:t=>{t.exports=e}},s={};return function e(o){var r=s[o];if(void 0!==r)return r.exports;var i=s[o]={exports:{}};return t[o].call(i.exports,i,i.exports,e),i.exports}(590)})()));

@@ -1,2 +0,2 @@

import { SpeechProbabilities, FrameProcessor, FrameProcessorOptions } from "./_common";
import { Message, SpeechProbabilities, FrameProcessor, FrameProcessorOptions } from "./_common";
interface RealTimeVADCallbacks {

@@ -61,2 +61,7 @@ /** Callback to run after each frame. The size (number of samples) of a frame is given by `frameSamples`. */

processFrame: (frame: Float32Array) => Promise<void>;
handleFrameProcessorEvent: (ev: Partial<{
probs: SpeechProbabilities;
msg: Message;
audio: Float32Array;
}>) => void;
destroy: () => void;

@@ -63,0 +68,0 @@ }

@@ -123,2 +123,3 @@ "use strict";

minSpeechFrames: fullOptions.minSpeechFrames,
submitUserSpeechOnPause: fullOptions.submitUserSpeechOnPause,
});

@@ -145,3 +146,4 @@ const audioNodeVAD = new AudioNodeVAD(ctx, fullOptions, frameProcessor, vadNode);

this.pause = () => {
this.frameProcessor.pause();
const ev = this.frameProcessor.pause();
this.handleFrameProcessorEvent(ev);
};

@@ -155,7 +157,10 @@ this.start = () => {

this.processFrame = async (frame) => {
const { probs, msg, audio } = await this.frameProcessor.process(frame);
if (probs !== undefined) {
this.options.onFrameProcessed(probs);
const ev = await this.frameProcessor.process(frame);
this.handleFrameProcessorEvent(ev);
};
this.handleFrameProcessorEvent = (ev) => {
if (ev.probs !== undefined) {
this.options.onFrameProcessed(ev.probs);
}
switch (msg) {
switch (ev.msg) {
case _common_1.Message.SpeechStart:

@@ -168,3 +173,3 @@ this.options.onSpeechStart();

case _common_1.Message.SpeechEnd:
this.options.onSpeechEnd(audio);
this.options.onSpeechEnd(ev.audio);
break;

@@ -171,0 +176,0 @@ default:

@@ -19,3 +19,3 @@ /*

eval("\n/*\nSome of this code, together with the default options found in index.ts,\nwere taken (or took inspiration) from https://github.com/snakers4/silero-vad\n*/\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.FrameProcessor = exports.validateOptions = exports.defaultFrameProcessorOptions = void 0;\nconst messages_1 = __webpack_require__(/*! ./messages */ \"./dist/_common/messages.js\");\nconst logging_1 = __webpack_require__(/*! ./logging */ \"./dist/_common/logging.js\");\nconst RECOMMENDED_FRAME_SAMPLES = [512, 1024, 1536];\nexports.defaultFrameProcessorOptions = {\n positiveSpeechThreshold: 0.5,\n negativeSpeechThreshold: 0.5 - 0.15,\n preSpeechPadFrames: 1,\n redemptionFrames: 8,\n frameSamples: 1536,\n minSpeechFrames: 3,\n};\nfunction validateOptions(options) {\n if (!RECOMMENDED_FRAME_SAMPLES.includes(options.frameSamples)) {\n logging_1.log.warn(\"You are using an unusual frame size\");\n }\n if (options.positiveSpeechThreshold < 0 ||\n options.negativeSpeechThreshold > 1) {\n logging_1.log.error(\"postiveSpeechThreshold should be a number between 0 and 1\");\n }\n if (options.negativeSpeechThreshold < 0 ||\n options.negativeSpeechThreshold > options.positiveSpeechThreshold) {\n logging_1.log.error(\"negativeSpeechThreshold should be between 0 and postiveSpeechThreshold\");\n }\n if (options.preSpeechPadFrames < 0) {\n logging_1.log.error(\"preSpeechPadFrames should be positive\");\n }\n if (options.redemptionFrames < 0) {\n logging_1.log.error(\"preSpeechPadFrames should be positive\");\n }\n}\nexports.validateOptions = validateOptions;\nconst concatArrays = (arrays) => {\n const sizes = arrays.reduce((out, next) => {\n out.push(out.at(-1) + next.length);\n return out;\n }, [0]);\n const outArray = new Float32Array(sizes.at(-1));\n arrays.forEach((arr, index) => {\n const place = sizes[index];\n outArray.set(arr, place);\n });\n return outArray;\n};\nclass FrameProcessor {\n constructor(modelProcessFunc, modelResetFunc, options) {\n this.modelProcessFunc = modelProcessFunc;\n this.modelResetFunc = modelResetFunc;\n this.options = options;\n this.speaking = false;\n this.redemptionCounter = 0;\n this.active = false;\n this.reset = () => {\n this.speaking = false;\n this.audioBuffer = [];\n this.modelResetFunc();\n this.redemptionCounter = 0;\n };\n this.pause = () => {\n this.active = false;\n this.reset();\n };\n this.resume = () => {\n this.active = true;\n };\n this.endSegment = () => {\n const audioBuffer = this.audioBuffer;\n this.audioBuffer = [];\n const speaking = this.speaking;\n this.reset();\n const speechFrameCount = audioBuffer.reduce((acc, item) => {\n return acc + +item.isSpeech;\n }, 0);\n if (speaking) {\n if (speechFrameCount >= this.options.minSpeechFrames) {\n const audio = concatArrays(audioBuffer.map((item) => item.frame));\n return { msg: messages_1.Message.SpeechEnd, audio };\n }\n else {\n return { msg: messages_1.Message.VADMisfire };\n }\n }\n return {};\n };\n this.process = async (frame) => {\n if (!this.active) {\n return {};\n }\n const probs = await this.modelProcessFunc(frame);\n this.audioBuffer.push({\n frame,\n isSpeech: probs.isSpeech >= this.options.positiveSpeechThreshold,\n });\n if (probs.isSpeech >= this.options.positiveSpeechThreshold &&\n this.redemptionCounter) {\n this.redemptionCounter = 0;\n }\n if (probs.isSpeech >= this.options.positiveSpeechThreshold &&\n !this.speaking) {\n this.speaking = true;\n return { probs, msg: messages_1.Message.SpeechStart };\n }\n if (probs.isSpeech < this.options.negativeSpeechThreshold &&\n this.speaking &&\n ++this.redemptionCounter >= this.options.redemptionFrames) {\n this.redemptionCounter = 0;\n this.speaking = false;\n const audioBuffer = this.audioBuffer;\n this.audioBuffer = [];\n const speechFrameCount = audioBuffer.reduce((acc, item) => {\n return acc + +item.isSpeech;\n }, 0);\n if (speechFrameCount >= this.options.minSpeechFrames) {\n const audio = concatArrays(audioBuffer.map((item) => item.frame));\n return { probs, msg: messages_1.Message.SpeechEnd, audio };\n }\n else {\n return { probs, msg: messages_1.Message.VADMisfire };\n }\n }\n if (!this.speaking) {\n while (this.audioBuffer.length > this.options.preSpeechPadFrames) {\n this.audioBuffer.shift();\n }\n }\n return { probs };\n };\n this.audioBuffer = [];\n this.reset();\n }\n}\nexports.FrameProcessor = FrameProcessor;\n//# sourceMappingURL=frame-processor.js.map\n\n//# sourceURL=webpack://@ricky0123/vad-web/./dist/_common/frame-processor.js?");
eval("\n/*\nSome of this code, together with the default options found in index.ts,\nwere taken (or took inspiration) from https://github.com/snakers4/silero-vad\n*/\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.FrameProcessor = exports.validateOptions = exports.defaultFrameProcessorOptions = void 0;\nconst messages_1 = __webpack_require__(/*! ./messages */ \"./dist/_common/messages.js\");\nconst logging_1 = __webpack_require__(/*! ./logging */ \"./dist/_common/logging.js\");\nconst RECOMMENDED_FRAME_SAMPLES = [512, 1024, 1536];\nexports.defaultFrameProcessorOptions = {\n positiveSpeechThreshold: 0.5,\n negativeSpeechThreshold: 0.5 - 0.15,\n preSpeechPadFrames: 1,\n redemptionFrames: 8,\n frameSamples: 1536,\n minSpeechFrames: 3,\n submitUserSpeechOnPause: false,\n};\nfunction validateOptions(options) {\n if (!RECOMMENDED_FRAME_SAMPLES.includes(options.frameSamples)) {\n logging_1.log.warn(\"You are using an unusual frame size\");\n }\n if (options.positiveSpeechThreshold < 0 ||\n options.negativeSpeechThreshold > 1) {\n logging_1.log.error(\"postiveSpeechThreshold should be a number between 0 and 1\");\n }\n if (options.negativeSpeechThreshold < 0 ||\n options.negativeSpeechThreshold > options.positiveSpeechThreshold) {\n logging_1.log.error(\"negativeSpeechThreshold should be between 0 and postiveSpeechThreshold\");\n }\n if (options.preSpeechPadFrames < 0) {\n logging_1.log.error(\"preSpeechPadFrames should be positive\");\n }\n if (options.redemptionFrames < 0) {\n logging_1.log.error(\"preSpeechPadFrames should be positive\");\n }\n}\nexports.validateOptions = validateOptions;\nconst concatArrays = (arrays) => {\n const sizes = arrays.reduce((out, next) => {\n out.push(out.at(-1) + next.length);\n return out;\n }, [0]);\n const outArray = new Float32Array(sizes.at(-1));\n arrays.forEach((arr, index) => {\n const place = sizes[index];\n outArray.set(arr, place);\n });\n return outArray;\n};\nclass FrameProcessor {\n constructor(modelProcessFunc, modelResetFunc, options) {\n this.modelProcessFunc = modelProcessFunc;\n this.modelResetFunc = modelResetFunc;\n this.options = options;\n this.speaking = false;\n this.redemptionCounter = 0;\n this.active = false;\n this.reset = () => {\n this.speaking = false;\n this.audioBuffer = [];\n this.modelResetFunc();\n this.redemptionCounter = 0;\n };\n this.pause = () => {\n this.active = false;\n if (this.options.submitUserSpeechOnPause) {\n return this.endSegment();\n }\n else {\n this.reset();\n return {};\n }\n };\n this.resume = () => {\n this.active = true;\n };\n this.endSegment = () => {\n const audioBuffer = this.audioBuffer;\n this.audioBuffer = [];\n const speaking = this.speaking;\n this.reset();\n const speechFrameCount = audioBuffer.reduce((acc, item) => {\n return acc + +item.isSpeech;\n }, 0);\n if (speaking) {\n if (speechFrameCount >= this.options.minSpeechFrames) {\n const audio = concatArrays(audioBuffer.map((item) => item.frame));\n return { msg: messages_1.Message.SpeechEnd, audio };\n }\n else {\n return { msg: messages_1.Message.VADMisfire };\n }\n }\n return {};\n };\n this.process = async (frame) => {\n if (!this.active) {\n return {};\n }\n const probs = await this.modelProcessFunc(frame);\n this.audioBuffer.push({\n frame,\n isSpeech: probs.isSpeech >= this.options.positiveSpeechThreshold,\n });\n if (probs.isSpeech >= this.options.positiveSpeechThreshold &&\n this.redemptionCounter) {\n this.redemptionCounter = 0;\n }\n if (probs.isSpeech >= this.options.positiveSpeechThreshold &&\n !this.speaking) {\n this.speaking = true;\n return { probs, msg: messages_1.Message.SpeechStart };\n }\n if (probs.isSpeech < this.options.negativeSpeechThreshold &&\n this.speaking &&\n ++this.redemptionCounter >= this.options.redemptionFrames) {\n this.redemptionCounter = 0;\n this.speaking = false;\n const audioBuffer = this.audioBuffer;\n this.audioBuffer = [];\n const speechFrameCount = audioBuffer.reduce((acc, item) => {\n return acc + +item.isSpeech;\n }, 0);\n if (speechFrameCount >= this.options.minSpeechFrames) {\n const audio = concatArrays(audioBuffer.map((item) => item.frame));\n return { probs, msg: messages_1.Message.SpeechEnd, audio };\n }\n else {\n return { probs, msg: messages_1.Message.VADMisfire };\n }\n }\n if (!this.speaking) {\n while (this.audioBuffer.length > this.options.preSpeechPadFrames) {\n this.audioBuffer.shift();\n }\n }\n return { probs };\n };\n this.audioBuffer = [];\n this.reset();\n }\n}\nexports.FrameProcessor = FrameProcessor;\n//# sourceMappingURL=frame-processor.js.map\n\n//# sourceURL=webpack://@ricky0123/vad-web/./dist/_common/frame-processor.js?");

@@ -70,3 +70,3 @@ /***/ }),

eval("\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.PlatformAgnosticNonRealTimeVAD = exports.defaultNonRealTimeVADOptions = void 0;\nconst frame_processor_1 = __webpack_require__(/*! ./frame-processor */ \"./dist/_common/frame-processor.js\");\nconst messages_1 = __webpack_require__(/*! ./messages */ \"./dist/_common/messages.js\");\nconst models_1 = __webpack_require__(/*! ./models */ \"./dist/_common/models.js\");\nconst resampler_1 = __webpack_require__(/*! ./resampler */ \"./dist/_common/resampler.js\");\nexports.defaultNonRealTimeVADOptions = {\n ...frame_processor_1.defaultFrameProcessorOptions,\n};\nclass PlatformAgnosticNonRealTimeVAD {\n static async _new(modelFetcher, ort, options = {}) {\n const vad = new this(modelFetcher, ort, {\n ...exports.defaultNonRealTimeVADOptions,\n ...options,\n });\n await vad.init();\n return vad;\n }\n constructor(modelFetcher, ort, options) {\n this.modelFetcher = modelFetcher;\n this.ort = ort;\n this.options = options;\n this.init = async () => {\n const model = await models_1.Silero.new(this.ort, this.modelFetcher);\n this.frameProcessor = new frame_processor_1.FrameProcessor(model.process, model.reset_state, {\n frameSamples: this.options.frameSamples,\n positiveSpeechThreshold: this.options.positiveSpeechThreshold,\n negativeSpeechThreshold: this.options.negativeSpeechThreshold,\n redemptionFrames: this.options.redemptionFrames,\n preSpeechPadFrames: this.options.preSpeechPadFrames,\n minSpeechFrames: this.options.minSpeechFrames,\n });\n this.frameProcessor.resume();\n };\n this.run = async function* (inputAudio, sampleRate) {\n const resamplerOptions = {\n nativeSampleRate: sampleRate,\n targetSampleRate: 16000,\n targetFrameSize: this.options.frameSamples,\n };\n const resampler = new resampler_1.Resampler(resamplerOptions);\n const frames = resampler.process(inputAudio);\n let start, end;\n for (const i of [...Array(frames.length)].keys()) {\n const f = frames[i];\n const { msg, audio } = await this.frameProcessor.process(f);\n switch (msg) {\n case messages_1.Message.SpeechStart:\n start = (i * this.options.frameSamples) / 16;\n break;\n case messages_1.Message.SpeechEnd:\n end = ((i + 1) * this.options.frameSamples) / 16;\n // @ts-ignore\n yield { audio, start, end };\n break;\n default:\n break;\n }\n }\n const { msg, audio } = this.frameProcessor.endSegment();\n if (msg == messages_1.Message.SpeechEnd) {\n yield {\n audio,\n // @ts-ignore\n start,\n end: (frames.length * this.options.frameSamples) / 16,\n };\n }\n };\n (0, frame_processor_1.validateOptions)(options);\n }\n}\nexports.PlatformAgnosticNonRealTimeVAD = PlatformAgnosticNonRealTimeVAD;\n//# sourceMappingURL=non-real-time-vad.js.map\n\n//# sourceURL=webpack://@ricky0123/vad-web/./dist/_common/non-real-time-vad.js?");
eval("\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.PlatformAgnosticNonRealTimeVAD = exports.defaultNonRealTimeVADOptions = void 0;\nconst frame_processor_1 = __webpack_require__(/*! ./frame-processor */ \"./dist/_common/frame-processor.js\");\nconst messages_1 = __webpack_require__(/*! ./messages */ \"./dist/_common/messages.js\");\nconst models_1 = __webpack_require__(/*! ./models */ \"./dist/_common/models.js\");\nconst resampler_1 = __webpack_require__(/*! ./resampler */ \"./dist/_common/resampler.js\");\nexports.defaultNonRealTimeVADOptions = {\n ...frame_processor_1.defaultFrameProcessorOptions,\n};\nclass PlatformAgnosticNonRealTimeVAD {\n static async _new(modelFetcher, ort, options = {}) {\n const vad = new this(modelFetcher, ort, {\n ...exports.defaultNonRealTimeVADOptions,\n ...options,\n });\n await vad.init();\n return vad;\n }\n constructor(modelFetcher, ort, options) {\n this.modelFetcher = modelFetcher;\n this.ort = ort;\n this.options = options;\n this.init = async () => {\n const model = await models_1.Silero.new(this.ort, this.modelFetcher);\n this.frameProcessor = new frame_processor_1.FrameProcessor(model.process, model.reset_state, {\n frameSamples: this.options.frameSamples,\n positiveSpeechThreshold: this.options.positiveSpeechThreshold,\n negativeSpeechThreshold: this.options.negativeSpeechThreshold,\n redemptionFrames: this.options.redemptionFrames,\n preSpeechPadFrames: this.options.preSpeechPadFrames,\n minSpeechFrames: this.options.minSpeechFrames,\n submitUserSpeechOnPause: this.options.submitUserSpeechOnPause,\n });\n this.frameProcessor.resume();\n };\n this.run = async function* (inputAudio, sampleRate) {\n const resamplerOptions = {\n nativeSampleRate: sampleRate,\n targetSampleRate: 16000,\n targetFrameSize: this.options.frameSamples,\n };\n const resampler = new resampler_1.Resampler(resamplerOptions);\n const frames = resampler.process(inputAudio);\n let start, end;\n for (const i of [...Array(frames.length)].keys()) {\n const f = frames[i];\n const { msg, audio } = await this.frameProcessor.process(f);\n switch (msg) {\n case messages_1.Message.SpeechStart:\n start = (i * this.options.frameSamples) / 16;\n break;\n case messages_1.Message.SpeechEnd:\n end = ((i + 1) * this.options.frameSamples) / 16;\n // @ts-ignore\n yield { audio, start, end };\n break;\n default:\n break;\n }\n }\n const { msg, audio } = this.frameProcessor.endSegment();\n if (msg == messages_1.Message.SpeechEnd) {\n yield {\n audio,\n // @ts-ignore\n start,\n end: (frames.length * this.options.frameSamples) / 16,\n };\n }\n };\n (0, frame_processor_1.validateOptions)(options);\n }\n}\nexports.PlatformAgnosticNonRealTimeVAD = PlatformAgnosticNonRealTimeVAD;\n//# sourceMappingURL=non-real-time-vad.js.map\n\n//# sourceURL=webpack://@ricky0123/vad-web/./dist/_common/non-real-time-vad.js?");

@@ -73,0 +73,0 @@ /***/ }),

@@ -1,1 +0,1 @@

(()=>{"use strict";var e={428:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.FrameProcessor=t.validateOptions=t.defaultFrameProcessorOptions=void 0;const i=s(294),r=s(842),o=[512,1024,1536];t.defaultFrameProcessorOptions={positiveSpeechThreshold:.5,negativeSpeechThreshold:.35,preSpeechPadFrames:1,redemptionFrames:8,frameSamples:1536,minSpeechFrames:3},t.validateOptions=function(e){o.includes(e.frameSamples)||r.log.warn("You are using an unusual frame size"),(e.positiveSpeechThreshold<0||e.negativeSpeechThreshold>1)&&r.log.error("postiveSpeechThreshold should be a number between 0 and 1"),(e.negativeSpeechThreshold<0||e.negativeSpeechThreshold>e.positiveSpeechThreshold)&&r.log.error("negativeSpeechThreshold should be between 0 and postiveSpeechThreshold"),e.preSpeechPadFrames<0&&r.log.error("preSpeechPadFrames should be positive"),e.redemptionFrames<0&&r.log.error("preSpeechPadFrames should be positive")};const n=e=>{const t=e.reduce(((e,t)=>(e.push(e.at(-1)+t.length),e)),[0]),s=new Float32Array(t.at(-1));return e.forEach(((e,i)=>{const r=t[i];s.set(e,r)})),s};t.FrameProcessor=class{constructor(e,t,s){this.modelProcessFunc=e,this.modelResetFunc=t,this.options=s,this.speaking=!1,this.redemptionCounter=0,this.active=!1,this.reset=()=>{this.speaking=!1,this.audioBuffer=[],this.modelResetFunc(),this.redemptionCounter=0},this.pause=()=>{this.active=!1,this.reset()},this.resume=()=>{this.active=!0},this.endSegment=()=>{const e=this.audioBuffer;this.audioBuffer=[];const t=this.speaking;this.reset();const s=e.reduce(((e,t)=>e+ +t.isSpeech),0);if(t){if(s>=this.options.minSpeechFrames){const t=n(e.map((e=>e.frame)));return{msg:i.Message.SpeechEnd,audio:t}}return{msg:i.Message.VADMisfire}}return{}},this.process=async e=>{if(!this.active)return{};const t=await this.modelProcessFunc(e);if(this.audioBuffer.push({frame:e,isSpeech:t.isSpeech>=this.options.positiveSpeechThreshold}),t.isSpeech>=this.options.positiveSpeechThreshold&&this.redemptionCounter&&(this.redemptionCounter=0),t.isSpeech>=this.options.positiveSpeechThreshold&&!this.speaking)return this.speaking=!0,{probs:t,msg:i.Message.SpeechStart};if(t.isSpeech<this.options.negativeSpeechThreshold&&this.speaking&&++this.redemptionCounter>=this.options.redemptionFrames){this.redemptionCounter=0,this.speaking=!1;const e=this.audioBuffer;if(this.audioBuffer=[],e.reduce(((e,t)=>e+ +t.isSpeech),0)>=this.options.minSpeechFrames){const s=n(e.map((e=>e.frame)));return{probs:t,msg:i.Message.SpeechEnd,audio:s}}return{probs:t,msg:i.Message.VADMisfire}}if(!this.speaking)for(;this.audioBuffer.length>this.options.preSpeechPadFrames;)this.audioBuffer.shift();return{probs:t}},this.audioBuffer=[],this.reset()}}},14:function(e,t,s){var i=this&&this.__createBinding||(Object.create?function(e,t,s,i){void 0===i&&(i=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,i,r)}:function(e,t,s,i){void 0===i&&(i=s),e[i]=t[s]}),r=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),o=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var s in e)"default"!==s&&Object.prototype.hasOwnProperty.call(e,s)&&i(t,e,s);return r(t,e),t},n=this&&this.__exportStar||function(e,t){for(var s in e)"default"===s||Object.prototype.hasOwnProperty.call(t,s)||i(t,e,s)};Object.defineProperty(t,"__esModule",{value:!0}),t.utils=void 0;const a=o(s(26));t.utils={minFramesForTargetMS:a.minFramesForTargetMS,arrayBufferToBase64:a.arrayBufferToBase64,encodeWAV:a.encodeWAV},n(s(405),t),n(s(428),t),n(s(294),t),n(s(842),t),n(s(260),t),n(s(724),t)},842:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.log=t.LOG_PREFIX=void 0,t.LOG_PREFIX="[VAD]";const s=["error","debug","warn"].reduce(((e,s)=>(e[s]=function(e){return(...s)=>{console[e](t.LOG_PREFIX,...s)}}(s),e)),{});t.log=s},294:(e,t)=>{var s;Object.defineProperty(t,"__esModule",{value:!0}),t.Message=void 0,function(e){e.AudioFrame="AUDIO_FRAME",e.SpeechStart="SPEECH_START",e.VADMisfire="VAD_MISFIRE",e.SpeechEnd="SPEECH_END",e.SpeechStop="SPEECH_STOP"}(s||(t.Message=s={}))},260:(e,t,s)=>{var i;Object.defineProperty(t,"__esModule",{value:!0}),t.Silero=void 0;const r=s(842);class o{constructor(e,t){this.ort=e,this.modelFetcher=t,this.init=async()=>{r.log.debug("initializing vad");const e=await this.modelFetcher();this._session=await this.ort.InferenceSession.create(e),this._sr=new this.ort.Tensor("int64",[16000n]),this.reset_state(),r.log.debug("vad is initialized")},this.reset_state=()=>{const e=Array(128).fill(0);this._h=new this.ort.Tensor("float32",e,[2,1,64]),this._c=new this.ort.Tensor("float32",e,[2,1,64])},this.process=async e=>{const t={input:new this.ort.Tensor("float32",e,[1,e.length]),h:this._h,c:this._c,sr:this._sr},s=await this._session.run(t);this._h=s.hn,this._c=s.cn;const[i]=s.output.data;return{notSpeech:1-i,isSpeech:i}}}}t.Silero=o,i=o,o.new=async(e,t)=>{const s=new i(e,t);return await s.init(),s}},405:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.PlatformAgnosticNonRealTimeVAD=t.defaultNonRealTimeVADOptions=void 0;const i=s(428),r=s(294),o=s(260),n=s(724);t.defaultNonRealTimeVADOptions={...i.defaultFrameProcessorOptions},t.PlatformAgnosticNonRealTimeVAD=class{static async _new(e,s,i={}){const r=new this(e,s,{...t.defaultNonRealTimeVADOptions,...i});return await r.init(),r}constructor(e,t,s){this.modelFetcher=e,this.ort=t,this.options=s,this.init=async()=>{const e=await o.Silero.new(this.ort,this.modelFetcher);this.frameProcessor=new i.FrameProcessor(e.process,e.reset_state,{frameSamples:this.options.frameSamples,positiveSpeechThreshold:this.options.positiveSpeechThreshold,negativeSpeechThreshold:this.options.negativeSpeechThreshold,redemptionFrames:this.options.redemptionFrames,preSpeechPadFrames:this.options.preSpeechPadFrames,minSpeechFrames:this.options.minSpeechFrames}),this.frameProcessor.resume()},this.run=async function*(e,t){const s={nativeSampleRate:t,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples},i=new n.Resampler(s).process(e);let o,a;for(const e of[...Array(i.length)].keys()){const t=i[e],{msg:s,audio:n}=await this.frameProcessor.process(t);switch(s){case r.Message.SpeechStart:o=e*this.options.frameSamples/16;break;case r.Message.SpeechEnd:a=(e+1)*this.options.frameSamples/16,yield{audio:n,start:o,end:a}}}const{msg:h,audio:p}=this.frameProcessor.endSegment();h==r.Message.SpeechEnd&&(yield{audio:p,start:o,end:i.length*this.options.frameSamples/16})},(0,i.validateOptions)(s)}}},724:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.Resampler=void 0;const i=s(842);t.Resampler=class{constructor(e){this.options=e,this.process=e=>{const t=[];for(const t of e)this.inputBuffer.push(t);for(;this.inputBuffer.length*this.options.targetSampleRate/this.options.nativeSampleRate>this.options.targetFrameSize;){const e=new Float32Array(this.options.targetFrameSize);let s=0,i=0;for(;s<this.options.targetFrameSize;){let t=0,r=0;for(;i<Math.min(this.inputBuffer.length,(s+1)*this.options.nativeSampleRate/this.options.targetSampleRate);)t+=this.inputBuffer[i],r++,i++;e[s]=t/r,s++}this.inputBuffer=this.inputBuffer.slice(i),t.push(e)}return t},e.nativeSampleRate<16e3&&i.log.error("nativeSampleRate is too low. Should have 16000 = targetSampleRate <= nativeSampleRate"),this.inputBuffer=[]}}},26:(e,t)=>{function s(e,t,s){for(var i=0;i<s.length;i++)e.setUint8(t+i,s.charCodeAt(i))}Object.defineProperty(t,"__esModule",{value:!0}),t.encodeWAV=t.arrayBufferToBase64=t.minFramesForTargetMS=void 0,t.minFramesForTargetMS=function(e,t,s=16e3){return Math.ceil(e*s/1e3/t)},t.arrayBufferToBase64=function(e){for(var t="",s=new Uint8Array(e),i=s.byteLength,r=0;r<i;r++)t+=String.fromCharCode(s[r]);return btoa(t)},t.encodeWAV=function(e,t=3,i=16e3,r=1,o=32){var n=o/8,a=r*n,h=new ArrayBuffer(44+e.length*n),p=new DataView(h);return s(p,0,"RIFF"),p.setUint32(4,36+e.length*n,!0),s(p,8,"WAVE"),s(p,12,"fmt "),p.setUint32(16,16,!0),p.setUint16(20,t,!0),p.setUint16(22,r,!0),p.setUint32(24,i,!0),p.setUint32(28,i*a,!0),p.setUint16(32,a,!0),p.setUint16(34,o,!0),s(p,36,"data"),p.setUint32(40,e.length*n,!0),1===t?function(e,t,s){for(var i=0;i<s.length;i++,t+=2){var r=Math.max(-1,Math.min(1,s[i]));e.setInt16(t,r<0?32768*r:32767*r,!0)}}(p,44,e):function(e,t,s){for(var i=0;i<s.length;i++,t+=4)e.setFloat32(t,s[i],!0)}(p,44,e),h}}},t={};function s(i){var r=t[i];if(void 0!==r)return r.exports;var o=t[i]={exports:{}};return e[i].call(o.exports,o,o.exports,s),o.exports}(()=>{const e=s(14);class t extends AudioWorkletProcessor{constructor(t){super(),this._initialized=!1,this._stopProcessing=!1,this.init=async()=>{e.log.debug("initializing worklet"),this.resampler=new e.Resampler({nativeSampleRate:sampleRate,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples}),this._initialized=!0,e.log.debug("initialized worklet")},this.options=t.processorOptions,this.port.onmessage=t=>{t.data.message===e.Message.SpeechStop&&(this._stopProcessing=!0)},this.init()}process(t,s,i){if(this._stopProcessing)return!1;const r=t[0][0];if(this._initialized&&r instanceof Float32Array){const t=this.resampler.process(r);for(const s of t)this.port.postMessage({message:e.Message.AudioFrame,data:s.buffer},[s.buffer])}return!0}}registerProcessor("vad-helper-worklet",t)})()})();
(()=>{"use strict";var e={428:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.FrameProcessor=t.validateOptions=t.defaultFrameProcessorOptions=void 0;const i=s(294),r=s(842),o=[512,1024,1536];t.defaultFrameProcessorOptions={positiveSpeechThreshold:.5,negativeSpeechThreshold:.35,preSpeechPadFrames:1,redemptionFrames:8,frameSamples:1536,minSpeechFrames:3,submitUserSpeechOnPause:!1},t.validateOptions=function(e){o.includes(e.frameSamples)||r.log.warn("You are using an unusual frame size"),(e.positiveSpeechThreshold<0||e.negativeSpeechThreshold>1)&&r.log.error("postiveSpeechThreshold should be a number between 0 and 1"),(e.negativeSpeechThreshold<0||e.negativeSpeechThreshold>e.positiveSpeechThreshold)&&r.log.error("negativeSpeechThreshold should be between 0 and postiveSpeechThreshold"),e.preSpeechPadFrames<0&&r.log.error("preSpeechPadFrames should be positive"),e.redemptionFrames<0&&r.log.error("preSpeechPadFrames should be positive")};const n=e=>{const t=e.reduce(((e,t)=>(e.push(e.at(-1)+t.length),e)),[0]),s=new Float32Array(t.at(-1));return e.forEach(((e,i)=>{const r=t[i];s.set(e,r)})),s};t.FrameProcessor=class{constructor(e,t,s){this.modelProcessFunc=e,this.modelResetFunc=t,this.options=s,this.speaking=!1,this.redemptionCounter=0,this.active=!1,this.reset=()=>{this.speaking=!1,this.audioBuffer=[],this.modelResetFunc(),this.redemptionCounter=0},this.pause=()=>(this.active=!1,this.options.submitUserSpeechOnPause?this.endSegment():(this.reset(),{})),this.resume=()=>{this.active=!0},this.endSegment=()=>{const e=this.audioBuffer;this.audioBuffer=[];const t=this.speaking;this.reset();const s=e.reduce(((e,t)=>e+ +t.isSpeech),0);if(t){if(s>=this.options.minSpeechFrames){const t=n(e.map((e=>e.frame)));return{msg:i.Message.SpeechEnd,audio:t}}return{msg:i.Message.VADMisfire}}return{}},this.process=async e=>{if(!this.active)return{};const t=await this.modelProcessFunc(e);if(this.audioBuffer.push({frame:e,isSpeech:t.isSpeech>=this.options.positiveSpeechThreshold}),t.isSpeech>=this.options.positiveSpeechThreshold&&this.redemptionCounter&&(this.redemptionCounter=0),t.isSpeech>=this.options.positiveSpeechThreshold&&!this.speaking)return this.speaking=!0,{probs:t,msg:i.Message.SpeechStart};if(t.isSpeech<this.options.negativeSpeechThreshold&&this.speaking&&++this.redemptionCounter>=this.options.redemptionFrames){this.redemptionCounter=0,this.speaking=!1;const e=this.audioBuffer;if(this.audioBuffer=[],e.reduce(((e,t)=>e+ +t.isSpeech),0)>=this.options.minSpeechFrames){const s=n(e.map((e=>e.frame)));return{probs:t,msg:i.Message.SpeechEnd,audio:s}}return{probs:t,msg:i.Message.VADMisfire}}if(!this.speaking)for(;this.audioBuffer.length>this.options.preSpeechPadFrames;)this.audioBuffer.shift();return{probs:t}},this.audioBuffer=[],this.reset()}}},14:function(e,t,s){var i=this&&this.__createBinding||(Object.create?function(e,t,s,i){void 0===i&&(i=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,i,r)}:function(e,t,s,i){void 0===i&&(i=s),e[i]=t[s]}),r=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),o=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var s in e)"default"!==s&&Object.prototype.hasOwnProperty.call(e,s)&&i(t,e,s);return r(t,e),t},n=this&&this.__exportStar||function(e,t){for(var s in e)"default"===s||Object.prototype.hasOwnProperty.call(t,s)||i(t,e,s)};Object.defineProperty(t,"__esModule",{value:!0}),t.utils=void 0;const a=o(s(26));t.utils={minFramesForTargetMS:a.minFramesForTargetMS,arrayBufferToBase64:a.arrayBufferToBase64,encodeWAV:a.encodeWAV},n(s(405),t),n(s(428),t),n(s(294),t),n(s(842),t),n(s(260),t),n(s(724),t)},842:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.log=t.LOG_PREFIX=void 0,t.LOG_PREFIX="[VAD]";const s=["error","debug","warn"].reduce(((e,s)=>(e[s]=function(e){return(...s)=>{console[e](t.LOG_PREFIX,...s)}}(s),e)),{});t.log=s},294:(e,t)=>{var s;Object.defineProperty(t,"__esModule",{value:!0}),t.Message=void 0,function(e){e.AudioFrame="AUDIO_FRAME",e.SpeechStart="SPEECH_START",e.VADMisfire="VAD_MISFIRE",e.SpeechEnd="SPEECH_END",e.SpeechStop="SPEECH_STOP"}(s||(t.Message=s={}))},260:(e,t,s)=>{var i;Object.defineProperty(t,"__esModule",{value:!0}),t.Silero=void 0;const r=s(842);class o{constructor(e,t){this.ort=e,this.modelFetcher=t,this.init=async()=>{r.log.debug("initializing vad");const e=await this.modelFetcher();this._session=await this.ort.InferenceSession.create(e),this._sr=new this.ort.Tensor("int64",[16000n]),this.reset_state(),r.log.debug("vad is initialized")},this.reset_state=()=>{const e=Array(128).fill(0);this._h=new this.ort.Tensor("float32",e,[2,1,64]),this._c=new this.ort.Tensor("float32",e,[2,1,64])},this.process=async e=>{const t={input:new this.ort.Tensor("float32",e,[1,e.length]),h:this._h,c:this._c,sr:this._sr},s=await this._session.run(t);this._h=s.hn,this._c=s.cn;const[i]=s.output.data;return{notSpeech:1-i,isSpeech:i}}}}t.Silero=o,i=o,o.new=async(e,t)=>{const s=new i(e,t);return await s.init(),s}},405:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.PlatformAgnosticNonRealTimeVAD=t.defaultNonRealTimeVADOptions=void 0;const i=s(428),r=s(294),o=s(260),n=s(724);t.defaultNonRealTimeVADOptions={...i.defaultFrameProcessorOptions},t.PlatformAgnosticNonRealTimeVAD=class{static async _new(e,s,i={}){const r=new this(e,s,{...t.defaultNonRealTimeVADOptions,...i});return await r.init(),r}constructor(e,t,s){this.modelFetcher=e,this.ort=t,this.options=s,this.init=async()=>{const e=await o.Silero.new(this.ort,this.modelFetcher);this.frameProcessor=new i.FrameProcessor(e.process,e.reset_state,{frameSamples:this.options.frameSamples,positiveSpeechThreshold:this.options.positiveSpeechThreshold,negativeSpeechThreshold:this.options.negativeSpeechThreshold,redemptionFrames:this.options.redemptionFrames,preSpeechPadFrames:this.options.preSpeechPadFrames,minSpeechFrames:this.options.minSpeechFrames,submitUserSpeechOnPause:this.options.submitUserSpeechOnPause}),this.frameProcessor.resume()},this.run=async function*(e,t){const s={nativeSampleRate:t,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples},i=new n.Resampler(s).process(e);let o,a;for(const e of[...Array(i.length)].keys()){const t=i[e],{msg:s,audio:n}=await this.frameProcessor.process(t);switch(s){case r.Message.SpeechStart:o=e*this.options.frameSamples/16;break;case r.Message.SpeechEnd:a=(e+1)*this.options.frameSamples/16,yield{audio:n,start:o,end:a}}}const{msg:h,audio:p}=this.frameProcessor.endSegment();h==r.Message.SpeechEnd&&(yield{audio:p,start:o,end:i.length*this.options.frameSamples/16})},(0,i.validateOptions)(s)}}},724:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.Resampler=void 0;const i=s(842);t.Resampler=class{constructor(e){this.options=e,this.process=e=>{const t=[];for(const t of e)this.inputBuffer.push(t);for(;this.inputBuffer.length*this.options.targetSampleRate/this.options.nativeSampleRate>this.options.targetFrameSize;){const e=new Float32Array(this.options.targetFrameSize);let s=0,i=0;for(;s<this.options.targetFrameSize;){let t=0,r=0;for(;i<Math.min(this.inputBuffer.length,(s+1)*this.options.nativeSampleRate/this.options.targetSampleRate);)t+=this.inputBuffer[i],r++,i++;e[s]=t/r,s++}this.inputBuffer=this.inputBuffer.slice(i),t.push(e)}return t},e.nativeSampleRate<16e3&&i.log.error("nativeSampleRate is too low. Should have 16000 = targetSampleRate <= nativeSampleRate"),this.inputBuffer=[]}}},26:(e,t)=>{function s(e,t,s){for(var i=0;i<s.length;i++)e.setUint8(t+i,s.charCodeAt(i))}Object.defineProperty(t,"__esModule",{value:!0}),t.encodeWAV=t.arrayBufferToBase64=t.minFramesForTargetMS=void 0,t.minFramesForTargetMS=function(e,t,s=16e3){return Math.ceil(e*s/1e3/t)},t.arrayBufferToBase64=function(e){for(var t="",s=new Uint8Array(e),i=s.byteLength,r=0;r<i;r++)t+=String.fromCharCode(s[r]);return btoa(t)},t.encodeWAV=function(e,t=3,i=16e3,r=1,o=32){var n=o/8,a=r*n,h=new ArrayBuffer(44+e.length*n),p=new DataView(h);return s(p,0,"RIFF"),p.setUint32(4,36+e.length*n,!0),s(p,8,"WAVE"),s(p,12,"fmt "),p.setUint32(16,16,!0),p.setUint16(20,t,!0),p.setUint16(22,r,!0),p.setUint32(24,i,!0),p.setUint32(28,i*a,!0),p.setUint16(32,a,!0),p.setUint16(34,o,!0),s(p,36,"data"),p.setUint32(40,e.length*n,!0),1===t?function(e,t,s){for(var i=0;i<s.length;i++,t+=2){var r=Math.max(-1,Math.min(1,s[i]));e.setInt16(t,r<0?32768*r:32767*r,!0)}}(p,44,e):function(e,t,s){for(var i=0;i<s.length;i++,t+=4)e.setFloat32(t,s[i],!0)}(p,44,e),h}}},t={};function s(i){var r=t[i];if(void 0!==r)return r.exports;var o=t[i]={exports:{}};return e[i].call(o.exports,o,o.exports,s),o.exports}(()=>{const e=s(14);class t extends AudioWorkletProcessor{constructor(t){super(),this._initialized=!1,this._stopProcessing=!1,this.init=async()=>{e.log.debug("initializing worklet"),this.resampler=new e.Resampler({nativeSampleRate:sampleRate,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples}),this._initialized=!0,e.log.debug("initialized worklet")},this.options=t.processorOptions,this.port.onmessage=t=>{t.data.message===e.Message.SpeechStop&&(this._stopProcessing=!0)},this.init()}process(t,s,i){if(this._stopProcessing)return!1;const r=t[0][0];if(this._initialized&&r instanceof Float32Array){const t=this.resampler.process(r);for(const s of t)this.port.postMessage({message:e.Message.AudioFrame,data:s.buffer},[s.buffer])}return!0}}registerProcessor("vad-helper-worklet",t)})()})();

@@ -15,3 +15,3 @@ {

"homepage": "https://github.com/ricky0123/vad",
"version": "0.0.14",
"version": "0.0.15",
"license": "ISC",

@@ -18,0 +18,0 @@ "main": "dist/index.js",

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc