@ricky0123/vad-web
Advanced tools
Comparing version 0.0.20 to 0.0.21
@@ -139,3 +139,3 @@ /* | ||
eval("\nvar __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n var desc = Object.getOwnPropertyDescriptor(m, k);\n if (!desc || (\"get\" in desc ? !m.__esModule : desc.writable || desc.configurable)) {\n desc = { enumerable: true, get: function() { return m[k]; } };\n }\n Object.defineProperty(o, k2, desc);\n}) : (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n o[k2] = m[k];\n}));\nvar __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {\n Object.defineProperty(o, \"default\", { enumerable: true, value: v });\n}) : function(o, v) {\n o[\"default\"] = v;\n});\nvar __importStar = (this && this.__importStar) || function (mod) {\n if (mod && mod.__esModule) return mod;\n var result = {};\n if (mod != null) for (var k in mod) if (k !== \"default\" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);\n __setModuleDefault(result, mod);\n return result;\n};\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.AudioNodeVAD = exports.MicVAD = exports.getDefaultRealTimeVADOptions = exports.ort = exports.DEFAULT_MODEL = void 0;\nconst ortInstance = __importStar(__webpack_require__(/*! onnxruntime-web */ \"onnxruntime-web\"));\nconst default_model_fetcher_1 = __webpack_require__(/*! ./default-model-fetcher */ \"./dist/default-model-fetcher.js\");\nconst frame_processor_1 = __webpack_require__(/*! ./frame-processor */ \"./dist/frame-processor.js\");\nconst logging_1 = __webpack_require__(/*! ./logging */ \"./dist/logging.js\");\nconst messages_1 = __webpack_require__(/*! ./messages */ \"./dist/messages.js\");\nconst models_1 = __webpack_require__(/*! ./models */ \"./dist/models/index.js\");\nexports.DEFAULT_MODEL = \"legacy\";\nexports.ort = ortInstance;\nconst workletFile = \"vad.worklet.bundle.min.js\";\nconst sileroV5File = \"silero_vad_v5.onnx\";\nconst sileroLegacyFile = \"silero_vad_legacy.onnx\";\nconst getDefaultRealTimeVADOptions = (model) => {\n const frameProcessorOptions = model === \"v5\"\n ? frame_processor_1.defaultV5FrameProcessorOptions\n : frame_processor_1.defaultLegacyFrameProcessorOptions;\n return {\n ...frameProcessorOptions,\n onFrameProcessed: (probabilities) => { },\n onVADMisfire: () => {\n logging_1.log.debug(\"VAD misfire\");\n },\n onSpeechStart: () => {\n logging_1.log.debug(\"Detected speech start\");\n },\n onSpeechEnd: () => {\n logging_1.log.debug(\"Detected speech end\");\n },\n baseAssetPath: \"https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.19/dist/\",\n onnxWASMBasePath: \"https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/\",\n stream: undefined,\n ortConfig: undefined,\n model: exports.DEFAULT_MODEL,\n workletOptions: {},\n };\n};\nexports.getDefaultRealTimeVADOptions = getDefaultRealTimeVADOptions;\nclass MicVAD {\n static async new(options = {}) {\n const fullOptions = {\n ...(0, exports.getDefaultRealTimeVADOptions)(options.model ?? exports.DEFAULT_MODEL),\n ...options,\n };\n (0, frame_processor_1.validateOptions)(fullOptions);\n let stream;\n if (fullOptions.stream === undefined)\n stream = await navigator.mediaDevices.getUserMedia({\n audio: {\n ...fullOptions.additionalAudioConstraints,\n channelCount: 1,\n echoCancellation: true,\n autoGainControl: true,\n noiseSuppression: true,\n },\n });\n else\n stream = fullOptions.stream;\n const audioContext = new AudioContext();\n const sourceNode = new MediaStreamAudioSourceNode(audioContext, {\n mediaStream: stream,\n });\n const audioNodeVAD = await AudioNodeVAD.new(audioContext, fullOptions);\n audioNodeVAD.receive(sourceNode);\n return new MicVAD(fullOptions, audioContext, stream, audioNodeVAD, sourceNode);\n }\n constructor(options, audioContext, stream, audioNodeVAD, sourceNode, listening = false) {\n this.options = options;\n this.audioContext = audioContext;\n this.stream = stream;\n this.audioNodeVAD = audioNodeVAD;\n this.sourceNode = sourceNode;\n this.listening = listening;\n this.pause = () => {\n this.audioNodeVAD.pause();\n this.listening = false;\n };\n this.start = () => {\n this.audioNodeVAD.start();\n this.listening = true;\n };\n this.destroy = () => {\n if (this.listening) {\n this.pause();\n }\n if (this.options.stream === undefined) {\n this.stream.getTracks().forEach((track) => track.stop());\n }\n this.sourceNode.disconnect();\n this.audioNodeVAD.destroy();\n this.audioContext.close();\n };\n }\n}\nexports.MicVAD = MicVAD;\nclass AudioNodeVAD {\n static async new(ctx, options = {}) {\n const fullOptions = {\n ...(0, exports.getDefaultRealTimeVADOptions)(options.model ?? exports.DEFAULT_MODEL),\n ...options,\n };\n (0, frame_processor_1.validateOptions)(fullOptions);\n exports.ort.env.wasm.wasmPaths = fullOptions.onnxWASMBasePath;\n if (fullOptions.ortConfig !== undefined) {\n fullOptions.ortConfig(exports.ort);\n }\n const workletURL = fullOptions.baseAssetPath + workletFile;\n try {\n await ctx.audioWorklet.addModule(workletURL);\n }\n catch (e) {\n console.error(`Encountered an error while loading worklet ${workletURL}`);\n throw e;\n }\n let workletOptions = fullOptions.workletOptions;\n workletOptions.processorOptions = {\n ...(fullOptions.workletOptions.processorOptions ?? {}),\n frameSamples: fullOptions.frameSamples,\n };\n const vadNode = new AudioWorkletNode(ctx, \"vad-helper-worklet\", workletOptions);\n const modelFile = fullOptions.model === \"v5\" ? sileroV5File : sileroLegacyFile;\n const modelURL = fullOptions.baseAssetPath + modelFile;\n const modelFactory = fullOptions.model === \"v5\" ? models_1.SileroV5.new : models_1.SileroLegacy.new;\n let model;\n try {\n model = await modelFactory(exports.ort, () => (0, default_model_fetcher_1.defaultModelFetcher)(modelURL));\n }\n catch (e) {\n console.error(`Encountered an error while loading model file ${modelURL}`);\n throw e;\n }\n const frameProcessor = new frame_processor_1.FrameProcessor(model.process, model.reset_state, {\n frameSamples: fullOptions.frameSamples,\n positiveSpeechThreshold: fullOptions.positiveSpeechThreshold,\n negativeSpeechThreshold: fullOptions.negativeSpeechThreshold,\n redemptionFrames: fullOptions.redemptionFrames,\n preSpeechPadFrames: fullOptions.preSpeechPadFrames,\n minSpeechFrames: fullOptions.minSpeechFrames,\n submitUserSpeechOnPause: fullOptions.submitUserSpeechOnPause,\n });\n const audioNodeVAD = new AudioNodeVAD(ctx, fullOptions, frameProcessor, vadNode);\n vadNode.port.onmessage = async (ev) => {\n switch (ev.data?.message) {\n case messages_1.Message.AudioFrame:\n let buffer = ev.data.data;\n if (!(buffer instanceof ArrayBuffer)) {\n buffer = new ArrayBuffer(ev.data.data.byteLength);\n new Uint8Array(buffer).set(new Uint8Array(ev.data.data));\n }\n const frame = new Float32Array(buffer);\n await audioNodeVAD.processFrame(frame);\n break;\n default:\n break;\n }\n };\n return audioNodeVAD;\n }\n constructor(ctx, options, frameProcessor, entryNode) {\n this.ctx = ctx;\n this.options = options;\n this.frameProcessor = frameProcessor;\n this.entryNode = entryNode;\n this.pause = () => {\n const ev = this.frameProcessor.pause();\n this.handleFrameProcessorEvent(ev);\n };\n this.start = () => {\n this.frameProcessor.resume();\n };\n this.receive = (node) => {\n node.connect(this.entryNode);\n };\n this.processFrame = async (frame) => {\n const ev = await this.frameProcessor.process(frame);\n this.handleFrameProcessorEvent(ev);\n };\n this.handleFrameProcessorEvent = (ev) => {\n if (ev.probs !== undefined) {\n this.options.onFrameProcessed(ev.probs, ev.frame);\n }\n switch (ev.msg) {\n case messages_1.Message.SpeechStart:\n this.options.onSpeechStart();\n break;\n case messages_1.Message.VADMisfire:\n this.options.onVADMisfire();\n break;\n case messages_1.Message.SpeechEnd:\n this.options.onSpeechEnd(ev.audio);\n break;\n default:\n break;\n }\n };\n this.destroy = () => {\n this.entryNode.port.postMessage({\n message: messages_1.Message.SpeechStop,\n });\n this.entryNode.disconnect();\n };\n }\n}\nexports.AudioNodeVAD = AudioNodeVAD;\n//# sourceMappingURL=real-time-vad.js.map\n\n//# sourceURL=webpack://vad/./dist/real-time-vad.js?"); | ||
eval("\nvar __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n var desc = Object.getOwnPropertyDescriptor(m, k);\n if (!desc || (\"get\" in desc ? !m.__esModule : desc.writable || desc.configurable)) {\n desc = { enumerable: true, get: function() { return m[k]; } };\n }\n Object.defineProperty(o, k2, desc);\n}) : (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n o[k2] = m[k];\n}));\nvar __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {\n Object.defineProperty(o, \"default\", { enumerable: true, value: v });\n}) : function(o, v) {\n o[\"default\"] = v;\n});\nvar __importStar = (this && this.__importStar) || function (mod) {\n if (mod && mod.__esModule) return mod;\n var result = {};\n if (mod != null) for (var k in mod) if (k !== \"default\" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);\n __setModuleDefault(result, mod);\n return result;\n};\nObject.defineProperty(exports, \"__esModule\", ({ value: true }));\nexports.AudioNodeVAD = exports.MicVAD = exports.getDefaultRealTimeVADOptions = exports.ort = exports.DEFAULT_MODEL = void 0;\nconst ortInstance = __importStar(__webpack_require__(/*! onnxruntime-web */ \"onnxruntime-web\"));\nconst default_model_fetcher_1 = __webpack_require__(/*! ./default-model-fetcher */ \"./dist/default-model-fetcher.js\");\nconst frame_processor_1 = __webpack_require__(/*! ./frame-processor */ \"./dist/frame-processor.js\");\nconst logging_1 = __webpack_require__(/*! ./logging */ \"./dist/logging.js\");\nconst messages_1 = __webpack_require__(/*! ./messages */ \"./dist/messages.js\");\nconst models_1 = __webpack_require__(/*! ./models */ \"./dist/models/index.js\");\nexports.DEFAULT_MODEL = \"legacy\";\nexports.ort = ortInstance;\nconst workletFile = \"vad.worklet.bundle.min.js\";\nconst sileroV5File = \"silero_vad_v5.onnx\";\nconst sileroLegacyFile = \"silero_vad_legacy.onnx\";\nconst getDefaultRealTimeVADOptions = (model) => {\n const frameProcessorOptions = model === \"v5\"\n ? frame_processor_1.defaultV5FrameProcessorOptions\n : frame_processor_1.defaultLegacyFrameProcessorOptions;\n return {\n ...frameProcessorOptions,\n onFrameProcessed: (probabilities) => { },\n onVADMisfire: () => {\n logging_1.log.debug(\"VAD misfire\");\n },\n onSpeechStart: () => {\n logging_1.log.debug(\"Detected speech start\");\n },\n onSpeechEnd: () => {\n logging_1.log.debug(\"Detected speech end\");\n },\n baseAssetPath: \"https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.20/dist/\",\n onnxWASMBasePath: \"https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/\",\n stream: undefined,\n ortConfig: undefined,\n model: exports.DEFAULT_MODEL,\n workletOptions: {},\n };\n};\nexports.getDefaultRealTimeVADOptions = getDefaultRealTimeVADOptions;\nclass MicVAD {\n static async new(options = {}) {\n const fullOptions = {\n ...(0, exports.getDefaultRealTimeVADOptions)(options.model ?? exports.DEFAULT_MODEL),\n ...options,\n };\n (0, frame_processor_1.validateOptions)(fullOptions);\n let stream;\n if (fullOptions.stream === undefined)\n stream = await navigator.mediaDevices.getUserMedia({\n audio: {\n ...fullOptions.additionalAudioConstraints,\n channelCount: 1,\n echoCancellation: true,\n autoGainControl: true,\n noiseSuppression: true,\n },\n });\n else\n stream = fullOptions.stream;\n const audioContext = new AudioContext();\n const sourceNode = new MediaStreamAudioSourceNode(audioContext, {\n mediaStream: stream,\n });\n const audioNodeVAD = await AudioNodeVAD.new(audioContext, fullOptions);\n audioNodeVAD.receive(sourceNode);\n return new MicVAD(fullOptions, audioContext, stream, audioNodeVAD, sourceNode);\n }\n constructor(options, audioContext, stream, audioNodeVAD, sourceNode, listening = false) {\n this.options = options;\n this.audioContext = audioContext;\n this.stream = stream;\n this.audioNodeVAD = audioNodeVAD;\n this.sourceNode = sourceNode;\n this.listening = listening;\n this.pause = () => {\n this.audioNodeVAD.pause();\n this.listening = false;\n };\n this.start = () => {\n this.audioNodeVAD.start();\n this.listening = true;\n };\n this.destroy = () => {\n if (this.listening) {\n this.pause();\n }\n if (this.options.stream === undefined) {\n this.stream.getTracks().forEach((track) => track.stop());\n }\n this.sourceNode.disconnect();\n this.audioNodeVAD.destroy();\n this.audioContext.close();\n };\n }\n}\nexports.MicVAD = MicVAD;\nclass AudioNodeVAD {\n static async new(ctx, options = {}) {\n const fullOptions = {\n ...(0, exports.getDefaultRealTimeVADOptions)(options.model ?? exports.DEFAULT_MODEL),\n ...options,\n };\n (0, frame_processor_1.validateOptions)(fullOptions);\n exports.ort.env.wasm.wasmPaths = fullOptions.onnxWASMBasePath;\n if (fullOptions.ortConfig !== undefined) {\n fullOptions.ortConfig(exports.ort);\n }\n const workletURL = fullOptions.baseAssetPath + workletFile;\n try {\n await ctx.audioWorklet.addModule(workletURL);\n }\n catch (e) {\n console.error(`Encountered an error while loading worklet ${workletURL}`);\n throw e;\n }\n let workletOptions = fullOptions.workletOptions;\n workletOptions.processorOptions = {\n ...(fullOptions.workletOptions.processorOptions ?? {}),\n frameSamples: fullOptions.frameSamples,\n };\n const vadNode = new AudioWorkletNode(ctx, \"vad-helper-worklet\", workletOptions);\n const modelFile = fullOptions.model === \"v5\" ? sileroV5File : sileroLegacyFile;\n const modelURL = fullOptions.baseAssetPath + modelFile;\n const modelFactory = fullOptions.model === \"v5\" ? models_1.SileroV5.new : models_1.SileroLegacy.new;\n let model;\n try {\n model = await modelFactory(exports.ort, () => (0, default_model_fetcher_1.defaultModelFetcher)(modelURL));\n }\n catch (e) {\n console.error(`Encountered an error while loading model file ${modelURL}`);\n throw e;\n }\n const frameProcessor = new frame_processor_1.FrameProcessor(model.process, model.reset_state, {\n frameSamples: fullOptions.frameSamples,\n positiveSpeechThreshold: fullOptions.positiveSpeechThreshold,\n negativeSpeechThreshold: fullOptions.negativeSpeechThreshold,\n redemptionFrames: fullOptions.redemptionFrames,\n preSpeechPadFrames: fullOptions.preSpeechPadFrames,\n minSpeechFrames: fullOptions.minSpeechFrames,\n submitUserSpeechOnPause: fullOptions.submitUserSpeechOnPause,\n });\n const audioNodeVAD = new AudioNodeVAD(ctx, fullOptions, frameProcessor, vadNode);\n vadNode.port.onmessage = async (ev) => {\n switch (ev.data?.message) {\n case messages_1.Message.AudioFrame:\n let buffer = ev.data.data;\n if (!(buffer instanceof ArrayBuffer)) {\n buffer = new ArrayBuffer(ev.data.data.byteLength);\n new Uint8Array(buffer).set(new Uint8Array(ev.data.data));\n }\n const frame = new Float32Array(buffer);\n await audioNodeVAD.processFrame(frame);\n break;\n default:\n break;\n }\n };\n return audioNodeVAD;\n }\n constructor(ctx, options, frameProcessor, entryNode) {\n this.ctx = ctx;\n this.options = options;\n this.frameProcessor = frameProcessor;\n this.entryNode = entryNode;\n this.pause = () => {\n const ev = this.frameProcessor.pause();\n this.handleFrameProcessorEvent(ev);\n };\n this.start = () => {\n this.frameProcessor.resume();\n };\n this.receive = (node) => {\n node.connect(this.entryNode);\n };\n this.processFrame = async (frame) => {\n const ev = await this.frameProcessor.process(frame);\n this.handleFrameProcessorEvent(ev);\n };\n this.handleFrameProcessorEvent = (ev) => {\n if (ev.probs !== undefined) {\n this.options.onFrameProcessed(ev.probs, ev.frame);\n }\n switch (ev.msg) {\n case messages_1.Message.SpeechStart:\n this.options.onSpeechStart();\n break;\n case messages_1.Message.VADMisfire:\n this.options.onVADMisfire();\n break;\n case messages_1.Message.SpeechEnd:\n this.options.onSpeechEnd(ev.audio);\n break;\n default:\n break;\n }\n };\n this.destroy = () => {\n this.entryNode.port.postMessage({\n message: messages_1.Message.SpeechStop,\n });\n this.entryNode.disconnect();\n };\n }\n}\nexports.AudioNodeVAD = AudioNodeVAD;\n//# sourceMappingURL=real-time-vad.js.map\n\n//# sourceURL=webpack://vad/./dist/real-time-vad.js?"); | ||
@@ -142,0 +142,0 @@ /***/ }), |
@@ -1,1 +0,1 @@ | ||
!function(e,t){"object"==typeof exports&&"object"==typeof module?module.exports=t(require("onnxruntime-web")):"function"==typeof define&&define.amd?define(["onnxruntime-web"],t):"object"==typeof exports?exports.vad=t(require("onnxruntime-web")):e.vad=t(e.ort)}(self,(e=>(()=>{"use strict";var t={485:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.baseAssetPath=void 0;const s="undefined"!=typeof window&&void 0!==window.document?window.document.currentScript:null;let o="/";s&&(o=s.src.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/")),t.baseAssetPath=o},973:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.defaultModelFetcher=void 0,t.defaultModelFetcher=e=>fetch(e).then((e=>e.arrayBuffer()))},362:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.FrameProcessor=t.validateOptions=t.defaultV5FrameProcessorOptions=t.defaultLegacyFrameProcessorOptions=void 0;const o=s(710),r=s(954),i=[512,1024,1536];t.defaultLegacyFrameProcessorOptions={positiveSpeechThreshold:.5,negativeSpeechThreshold:.35,preSpeechPadFrames:1,redemptionFrames:8,frameSamples:1536,minSpeechFrames:3,submitUserSpeechOnPause:!1},t.defaultV5FrameProcessorOptions={positiveSpeechThreshold:.5,negativeSpeechThreshold:.35,preSpeechPadFrames:3,redemptionFrames:24,frameSamples:512,minSpeechFrames:9,submitUserSpeechOnPause:!1},t.validateOptions=function(e){i.includes(e.frameSamples)||o.log.warn("You are using an unusual frame size"),(e.positiveSpeechThreshold<0||e.positiveSpeechThreshold>1)&&o.log.error("positiveSpeechThreshold should be a number between 0 and 1"),(e.negativeSpeechThreshold<0||e.negativeSpeechThreshold>e.positiveSpeechThreshold)&&o.log.error("negativeSpeechThreshold should be between 0 and positiveSpeechThreshold"),e.preSpeechPadFrames<0&&o.log.error("preSpeechPadFrames should be positive"),e.redemptionFrames<0&&o.log.error("redemptionFrames should be positive")};const n=e=>{const t=e.reduce(((e,t)=>(e.push(e.at(-1)+t.length),e)),[0]),s=new Float32Array(t.at(-1));return e.forEach(((e,o)=>{const r=t[o];s.set(e,r)})),s};t.FrameProcessor=class{constructor(e,t,s){this.modelProcessFunc=e,this.modelResetFunc=t,this.options=s,this.speaking=!1,this.redemptionCounter=0,this.active=!1,this.reset=()=>{this.speaking=!1,this.audioBuffer=[],this.modelResetFunc(),this.redemptionCounter=0},this.pause=()=>(this.active=!1,this.options.submitUserSpeechOnPause?this.endSegment():(this.reset(),{})),this.resume=()=>{this.active=!0},this.endSegment=()=>{const e=this.audioBuffer;this.audioBuffer=[];const t=this.speaking;this.reset();const s=e.reduce(((e,t)=>e+ +t.isSpeech),0);if(t){if(s>=this.options.minSpeechFrames){const t=n(e.map((e=>e.frame)));return{msg:r.Message.SpeechEnd,audio:t}}return{msg:r.Message.VADMisfire}}return{}},this.process=async e=>{if(!this.active)return{};const t=await this.modelProcessFunc(e);if(this.audioBuffer.push({frame:e,isSpeech:t.isSpeech>=this.options.positiveSpeechThreshold}),t.isSpeech>=this.options.positiveSpeechThreshold&&this.redemptionCounter&&(this.redemptionCounter=0),t.isSpeech>=this.options.positiveSpeechThreshold&&!this.speaking)return this.speaking=!0,{probs:t,msg:r.Message.SpeechStart,frame:e};if(t.isSpeech<this.options.negativeSpeechThreshold&&this.speaking&&++this.redemptionCounter>=this.options.redemptionFrames){this.redemptionCounter=0,this.speaking=!1;const s=this.audioBuffer;if(this.audioBuffer=[],s.reduce(((e,t)=>e+ +t.isSpeech),0)>=this.options.minSpeechFrames){const o=n(s.map((e=>e.frame)));return{probs:t,msg:r.Message.SpeechEnd,audio:o,frame:e}}return{probs:t,msg:r.Message.VADMisfire,frame:e}}if(!this.speaking)for(;this.audioBuffer.length>this.options.preSpeechPadFrames;)this.audioBuffer.shift();return{probs:t,frame:e}},this.audioBuffer=[],this.reset()}}},590:function(e,t,s){var o=this&&this.__createBinding||(Object.create?function(e,t,s,o){void 0===o&&(o=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,o,r)}:function(e,t,s,o){void 0===o&&(o=s),e[o]=t[s]}),r=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),i=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var s in e)"default"!==s&&Object.prototype.hasOwnProperty.call(e,s)&&o(t,e,s);return r(t,e),t};Object.defineProperty(t,"__esModule",{value:!0}),t.NonRealTimeVAD=t.Message=t.FrameProcessor=t.getDefaultRealTimeVADOptions=t.MicVAD=t.DEFAULT_MODEL=t.AudioNodeVAD=t.utils=t.defaultNonRealTimeVADOptions=void 0;const n=i(s(656)),a=s(485),c=s(973),h=s(362);Object.defineProperty(t,"FrameProcessor",{enumerable:!0,get:function(){return h.FrameProcessor}});const u=s(954);Object.defineProperty(t,"Message",{enumerable:!0,get:function(){return u.Message}});const d=s(202),l=s(787);t.defaultNonRealTimeVADOptions={modelURL:a.baseAssetPath+"silero_vad_legacy.onnx",modelFetcher:c.defaultModelFetcher};class p extends d.PlatformAgnosticNonRealTimeVAD{static async new(e={}){const{modelURL:s,modelFetcher:o}={...t.defaultNonRealTimeVADOptions,...e};return await this._new((()=>o(s)),n,e)}}t.NonRealTimeVAD=p,t.utils={audioFileToArray:l.audioFileToArray,minFramesForTargetMS:l.minFramesForTargetMS,arrayBufferToBase64:l.arrayBufferToBase64,encodeWAV:l.encodeWAV};var f=s(746);Object.defineProperty(t,"AudioNodeVAD",{enumerable:!0,get:function(){return f.AudioNodeVAD}}),Object.defineProperty(t,"DEFAULT_MODEL",{enumerable:!0,get:function(){return f.DEFAULT_MODEL}}),Object.defineProperty(t,"MicVAD",{enumerable:!0,get:function(){return f.MicVAD}}),Object.defineProperty(t,"getDefaultRealTimeVADOptions",{enumerable:!0,get:function(){return f.getDefaultRealTimeVADOptions}})},710:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.log=t.LOG_PREFIX=void 0,t.LOG_PREFIX="[VAD]";const s=["error","debug","warn"].reduce(((e,s)=>(e[s]=function(e){return(...s)=>{console[e](t.LOG_PREFIX,...s)}}(s),e)),{});t.log=s},954:(e,t)=>{var s;Object.defineProperty(t,"__esModule",{value:!0}),t.Message=void 0,function(e){e.AudioFrame="AUDIO_FRAME",e.SpeechStart="SPEECH_START",e.VADMisfire="VAD_MISFIRE",e.SpeechEnd="SPEECH_END",e.SpeechStop="SPEECH_STOP"}(s||(t.Message=s={}))},650:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0})},559:function(e,t,s){var o=this&&this.__createBinding||(Object.create?function(e,t,s,o){void 0===o&&(o=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,o,r)}:function(e,t,s,o){void 0===o&&(o=s),e[o]=t[s]}),r=this&&this.__exportStar||function(e,t){for(var s in e)"default"===s||Object.prototype.hasOwnProperty.call(t,s)||o(t,e,s)};Object.defineProperty(t,"__esModule",{value:!0}),t.SileroV5=t.SileroLegacy=void 0,r(s(650),t);var i=s(143);Object.defineProperty(t,"SileroLegacy",{enumerable:!0,get:function(){return i.SileroLegacy}});var n=s(508);Object.defineProperty(t,"SileroV5",{enumerable:!0,get:function(){return n.SileroV5}})},143:(e,t,s)=>{var o;Object.defineProperty(t,"__esModule",{value:!0}),t.SileroLegacy=void 0;const r=s(710);class i{constructor(e,t,s,o,r){this.ortInstance=e,this._session=t,this._h=s,this._c=o,this._sr=r,this.reset_state=()=>{const e=Array(128).fill(0);this._h=new this.ortInstance.Tensor("float32",e,[2,1,64]),this._c=new this.ortInstance.Tensor("float32",e,[2,1,64])},this.process=async e=>{const t={input:new this.ortInstance.Tensor("float32",e,[1,e.length]),h:this._h,c:this._c,sr:this._sr},s=await this._session.run(t);this._h=s.hn,this._c=s.cn;const[o]=s.output?.data;return{notSpeech:1-o,isSpeech:o}}}}t.SileroLegacy=i,o=i,i.new=async(e,t)=>{r.log.debug("initializing vad");const s=await t(),i=await e.InferenceSession.create(s),n=new e.Tensor("int64",[16000n]),a=Array(128).fill(0),c=new e.Tensor("float32",a,[2,1,64]),h=new e.Tensor("float32",a,[2,1,64]);return r.log.debug("vad is initialized"),new o(e,i,c,h,n)}},508:(e,t,s)=>{var o;Object.defineProperty(t,"__esModule",{value:!0}),t.SileroV5=void 0;const r=s(710);function i(e){const t=Array(256).fill(0);return new e.Tensor("float32",t,[2,1,128])}class n{constructor(e,t,s,o){this._session=e,this._state=t,this._sr=s,this.ortInstance=o,this.reset_state=()=>{this._state=i(this.ortInstance)},this.process=async e=>{const t={input:new this.ortInstance.Tensor("float32",e,[1,e.length]),state:this._state,sr:this._sr},s=await this._session.run(t);this._state=s.stateN;const[o]=s.output?.data;return{notSpeech:1-o,isSpeech:o}}}}t.SileroV5=n,o=n,n.new=async(e,t)=>{r.log.debug("Loading VAD...");const s=await t(),n=await e.InferenceSession.create(s),a=new e.Tensor("int64",[16000n]),c=i(e);return r.log.debug("...finished loading VAD"),new o(n,c,a,e)}},202:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.PlatformAgnosticNonRealTimeVAD=t.defaultNonRealTimeVADOptions=void 0;const o=s(362),r=s(954),i=s(559),n=s(825);t.defaultNonRealTimeVADOptions={...o.defaultLegacyFrameProcessorOptions,ortConfig:void 0},t.PlatformAgnosticNonRealTimeVAD=class{static async _new(e,s,o={}){const r={...t.defaultNonRealTimeVADOptions,...o};void 0!==r.ortConfig&&r.ortConfig(s);const i=new this(e,s,r);return await i.init(),i}constructor(e,t,s){this.modelFetcher=e,this.ort=t,this.options=s,this.init=async()=>{const e=await i.SileroLegacy.new(this.ort,this.modelFetcher);this.frameProcessor=new o.FrameProcessor(e.process,e.reset_state,{frameSamples:this.options.frameSamples,positiveSpeechThreshold:this.options.positiveSpeechThreshold,negativeSpeechThreshold:this.options.negativeSpeechThreshold,redemptionFrames:this.options.redemptionFrames,preSpeechPadFrames:this.options.preSpeechPadFrames,minSpeechFrames:this.options.minSpeechFrames,submitUserSpeechOnPause:this.options.submitUserSpeechOnPause}),this.frameProcessor.resume()},this.run=async function*(e,t){const s={nativeSampleRate:t,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples},o=new n.Resampler(s);let i=0,a=0,c=0;for await(const t of o.stream(e)){const{msg:e,audio:s}=await this.frameProcessor.process(t);switch(e){case r.Message.SpeechStart:i=c*this.options.frameSamples/16;break;case r.Message.SpeechEnd:a=(c+1)*this.options.frameSamples/16,yield{audio:s,start:i,end:a}}c++}const{msg:h,audio:u}=this.frameProcessor.endSegment();h==r.Message.SpeechEnd&&(yield{audio:u,start:i,end:c*this.options.frameSamples/16})},(0,o.validateOptions)(s)}}},746:function(e,t,s){var o=this&&this.__createBinding||(Object.create?function(e,t,s,o){void 0===o&&(o=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,o,r)}:function(e,t,s,o){void 0===o&&(o=s),e[o]=t[s]}),r=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),i=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var s in e)"default"!==s&&Object.prototype.hasOwnProperty.call(e,s)&&o(t,e,s);return r(t,e),t};Object.defineProperty(t,"__esModule",{value:!0}),t.AudioNodeVAD=t.MicVAD=t.getDefaultRealTimeVADOptions=t.ort=t.DEFAULT_MODEL=void 0;const n=i(s(656)),a=s(973),c=s(362),h=s(710),u=s(954),d=s(559);t.DEFAULT_MODEL="legacy",t.ort=n,t.getDefaultRealTimeVADOptions=e=>({..."v5"===e?c.defaultV5FrameProcessorOptions:c.defaultLegacyFrameProcessorOptions,onFrameProcessed:e=>{},onVADMisfire:()=>{h.log.debug("VAD misfire")},onSpeechStart:()=>{h.log.debug("Detected speech start")},onSpeechEnd:()=>{h.log.debug("Detected speech end")},baseAssetPath:"https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.19/dist/",onnxWASMBasePath:"https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/",stream:void 0,ortConfig:void 0,model:t.DEFAULT_MODEL,workletOptions:{}});class l{static async new(e={}){const s={...(0,t.getDefaultRealTimeVADOptions)(e.model??t.DEFAULT_MODEL),...e};let o;(0,c.validateOptions)(s),o=void 0===s.stream?await navigator.mediaDevices.getUserMedia({audio:{...s.additionalAudioConstraints,channelCount:1,echoCancellation:!0,autoGainControl:!0,noiseSuppression:!0}}):s.stream;const r=new AudioContext,i=new MediaStreamAudioSourceNode(r,{mediaStream:o}),n=await p.new(r,s);return n.receive(i),new l(s,r,o,n,i)}constructor(e,t,s,o,r,i=!1){this.options=e,this.audioContext=t,this.stream=s,this.audioNodeVAD=o,this.sourceNode=r,this.listening=i,this.pause=()=>{this.audioNodeVAD.pause(),this.listening=!1},this.start=()=>{this.audioNodeVAD.start(),this.listening=!0},this.destroy=()=>{this.listening&&this.pause(),void 0===this.options.stream&&this.stream.getTracks().forEach((e=>e.stop())),this.sourceNode.disconnect(),this.audioNodeVAD.destroy(),this.audioContext.close()}}}t.MicVAD=l;class p{static async new(e,s={}){const o={...(0,t.getDefaultRealTimeVADOptions)(s.model??t.DEFAULT_MODEL),...s};(0,c.validateOptions)(o),t.ort.env.wasm.wasmPaths=o.onnxWASMBasePath,void 0!==o.ortConfig&&o.ortConfig(t.ort);const r=o.baseAssetPath+"vad.worklet.bundle.min.js";try{await e.audioWorklet.addModule(r)}catch(e){throw console.error(`Encountered an error while loading worklet ${r}`),e}let i=o.workletOptions;i.processorOptions={...o.workletOptions.processorOptions??{},frameSamples:o.frameSamples};const n=new AudioWorkletNode(e,"vad-helper-worklet",i),h="v5"===o.model?"silero_vad_v5.onnx":"silero_vad_legacy.onnx",l=o.baseAssetPath+h,f="v5"===o.model?d.SileroV5.new:d.SileroLegacy.new;let m;try{m=await f(t.ort,(()=>(0,a.defaultModelFetcher)(l)))}catch(e){throw console.error(`Encountered an error while loading model file ${l}`),e}const g=new c.FrameProcessor(m.process,m.reset_state,{frameSamples:o.frameSamples,positiveSpeechThreshold:o.positiveSpeechThreshold,negativeSpeechThreshold:o.negativeSpeechThreshold,redemptionFrames:o.redemptionFrames,preSpeechPadFrames:o.preSpeechPadFrames,minSpeechFrames:o.minSpeechFrames,submitUserSpeechOnPause:o.submitUserSpeechOnPause}),S=new p(e,o,g,n);return n.port.onmessage=async e=>{if(e.data?.message===u.Message.AudioFrame){let t=e.data.data;t instanceof ArrayBuffer||(t=new ArrayBuffer(e.data.data.byteLength),new Uint8Array(t).set(new Uint8Array(e.data.data)));const s=new Float32Array(t);await S.processFrame(s)}},S}constructor(e,t,s,o){this.ctx=e,this.options=t,this.frameProcessor=s,this.entryNode=o,this.pause=()=>{const e=this.frameProcessor.pause();this.handleFrameProcessorEvent(e)},this.start=()=>{this.frameProcessor.resume()},this.receive=e=>{e.connect(this.entryNode)},this.processFrame=async e=>{const t=await this.frameProcessor.process(e);this.handleFrameProcessorEvent(t)},this.handleFrameProcessorEvent=e=>{switch(void 0!==e.probs&&this.options.onFrameProcessed(e.probs,e.frame),e.msg){case u.Message.SpeechStart:this.options.onSpeechStart();break;case u.Message.VADMisfire:this.options.onVADMisfire();break;case u.Message.SpeechEnd:this.options.onSpeechEnd(e.audio)}},this.destroy=()=>{this.entryNode.port.postMessage({message:u.Message.SpeechStop}),this.entryNode.disconnect()}}}t.AudioNodeVAD=p},825:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.Resampler=void 0;const o=s(710);t.Resampler=class{constructor(e){this.options=e,this.process=e=>{const t=[];for(const s of e)for(this.inputBuffer.push(s);this.hasEnoughDataForFrame();){const e=this.generateOutputFrame();t.push(e)}return t},this.stream=async function*(e){for(const t of e)for(this.inputBuffer.push(t);this.hasEnoughDataForFrame();){const e=this.generateOutputFrame();yield e}},e.nativeSampleRate<16e3&&o.log.error("nativeSampleRate is too low. Should have 16000 = targetSampleRate <= nativeSampleRate"),this.inputBuffer=[]}hasEnoughDataForFrame(){return this.inputBuffer.length*this.options.targetSampleRate/this.options.nativeSampleRate>=this.options.targetFrameSize}generateOutputFrame(){const e=new Float32Array(this.options.targetFrameSize);let t=0,s=0;for(;t<this.options.targetFrameSize;){let o=0,r=0;for(;s<Math.min(this.inputBuffer.length,(t+1)*this.options.nativeSampleRate/this.options.targetSampleRate);){const e=this.inputBuffer[s];void 0!==e&&(o+=e,r++),s++}e[t]=o/r,t++}return this.inputBuffer=this.inputBuffer.slice(s),e}}},787:(e,t)=>{function s(e,t,s){for(var o=0;o<s.length;o++)e.setUint8(t+o,s.charCodeAt(o))}Object.defineProperty(t,"__esModule",{value:!0}),t.audioFileToArray=t.encodeWAV=t.arrayBufferToBase64=t.minFramesForTargetMS=void 0,t.minFramesForTargetMS=function(e,t,s=16e3){return Math.ceil(e*s/1e3/t)},t.arrayBufferToBase64=function(e){const t=new Uint8Array(e),s=t.byteLength,o=new Array(s);for(var r=0;r<s;r++){const e=t[r];if(void 0===e)break;o[r]=String.fromCharCode(e)}return btoa(o.join(""))},t.encodeWAV=function(e,t=3,o=16e3,r=1,i=32){var n=i/8,a=r*n,c=new ArrayBuffer(44+e.length*n),h=new DataView(c);return s(h,0,"RIFF"),h.setUint32(4,36+e.length*n,!0),s(h,8,"WAVE"),s(h,12,"fmt "),h.setUint32(16,16,!0),h.setUint16(20,t,!0),h.setUint16(22,r,!0),h.setUint32(24,o,!0),h.setUint32(28,o*a,!0),h.setUint16(32,a,!0),h.setUint16(34,i,!0),s(h,36,"data"),h.setUint32(40,e.length*n,!0),1===t?function(e,t,s){for(var o=0;o<s.length;o++,t+=2){var r=Math.max(-1,Math.min(1,s[o]));e.setInt16(t,r<0?32768*r:32767*r,!0)}}(h,44,e):function(e,t,s){for(var o=0;o<s.length;o++,t+=4)e.setFloat32(t,s[o],!0)}(h,44,e),c},t.audioFileToArray=async function(e){const t=new OfflineAudioContext(1,1,44100),s=new FileReader;let o=null;if(await new Promise((r=>{s.addEventListener("loadend",(e=>{const i=s.result;t.decodeAudioData(i,(e=>{o=e,t.startRendering().then((e=>{console.log("Rendering completed successfully"),r()})).catch((e=>{console.error(`Rendering failed: ${e}`)}))}),(e=>{console.log(`Error with decoding audio data: ${e}`)}))})),s.readAsArrayBuffer(e)})),null===o)throw Error("some shit");let r=o,i=new Float32Array(r.length);for(let e=0;e<r.length;e++)for(let t=0;t<r.numberOfChannels;t++)i[e]+=r.getChannelData(t)[e];return{audio:i,sampleRate:r.sampleRate}}},656:t=>{t.exports=e}},s={};return function e(o){var r=s[o];if(void 0!==r)return r.exports;var i=s[o]={exports:{}};return t[o].call(i.exports,i,i.exports,e),i.exports}(590)})())); | ||
!function(e,t){"object"==typeof exports&&"object"==typeof module?module.exports=t(require("onnxruntime-web")):"function"==typeof define&&define.amd?define(["onnxruntime-web"],t):"object"==typeof exports?exports.vad=t(require("onnxruntime-web")):e.vad=t(e.ort)}(self,(e=>(()=>{"use strict";var t={485:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.baseAssetPath=void 0;const s="undefined"!=typeof window&&void 0!==window.document?window.document.currentScript:null;let o="/";s&&(o=s.src.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/")),t.baseAssetPath=o},973:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.defaultModelFetcher=void 0,t.defaultModelFetcher=e=>fetch(e).then((e=>e.arrayBuffer()))},362:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.FrameProcessor=t.validateOptions=t.defaultV5FrameProcessorOptions=t.defaultLegacyFrameProcessorOptions=void 0;const o=s(710),r=s(954),i=[512,1024,1536];t.defaultLegacyFrameProcessorOptions={positiveSpeechThreshold:.5,negativeSpeechThreshold:.35,preSpeechPadFrames:1,redemptionFrames:8,frameSamples:1536,minSpeechFrames:3,submitUserSpeechOnPause:!1},t.defaultV5FrameProcessorOptions={positiveSpeechThreshold:.5,negativeSpeechThreshold:.35,preSpeechPadFrames:3,redemptionFrames:24,frameSamples:512,minSpeechFrames:9,submitUserSpeechOnPause:!1},t.validateOptions=function(e){i.includes(e.frameSamples)||o.log.warn("You are using an unusual frame size"),(e.positiveSpeechThreshold<0||e.positiveSpeechThreshold>1)&&o.log.error("positiveSpeechThreshold should be a number between 0 and 1"),(e.negativeSpeechThreshold<0||e.negativeSpeechThreshold>e.positiveSpeechThreshold)&&o.log.error("negativeSpeechThreshold should be between 0 and positiveSpeechThreshold"),e.preSpeechPadFrames<0&&o.log.error("preSpeechPadFrames should be positive"),e.redemptionFrames<0&&o.log.error("redemptionFrames should be positive")};const n=e=>{const t=e.reduce(((e,t)=>(e.push(e.at(-1)+t.length),e)),[0]),s=new Float32Array(t.at(-1));return e.forEach(((e,o)=>{const r=t[o];s.set(e,r)})),s};t.FrameProcessor=class{constructor(e,t,s){this.modelProcessFunc=e,this.modelResetFunc=t,this.options=s,this.speaking=!1,this.redemptionCounter=0,this.active=!1,this.reset=()=>{this.speaking=!1,this.audioBuffer=[],this.modelResetFunc(),this.redemptionCounter=0},this.pause=()=>(this.active=!1,this.options.submitUserSpeechOnPause?this.endSegment():(this.reset(),{})),this.resume=()=>{this.active=!0},this.endSegment=()=>{const e=this.audioBuffer;this.audioBuffer=[];const t=this.speaking;this.reset();const s=e.reduce(((e,t)=>e+ +t.isSpeech),0);if(t){if(s>=this.options.minSpeechFrames){const t=n(e.map((e=>e.frame)));return{msg:r.Message.SpeechEnd,audio:t}}return{msg:r.Message.VADMisfire}}return{}},this.process=async e=>{if(!this.active)return{};const t=await this.modelProcessFunc(e);if(this.audioBuffer.push({frame:e,isSpeech:t.isSpeech>=this.options.positiveSpeechThreshold}),t.isSpeech>=this.options.positiveSpeechThreshold&&this.redemptionCounter&&(this.redemptionCounter=0),t.isSpeech>=this.options.positiveSpeechThreshold&&!this.speaking)return this.speaking=!0,{probs:t,msg:r.Message.SpeechStart,frame:e};if(t.isSpeech<this.options.negativeSpeechThreshold&&this.speaking&&++this.redemptionCounter>=this.options.redemptionFrames){this.redemptionCounter=0,this.speaking=!1;const s=this.audioBuffer;if(this.audioBuffer=[],s.reduce(((e,t)=>e+ +t.isSpeech),0)>=this.options.minSpeechFrames){const o=n(s.map((e=>e.frame)));return{probs:t,msg:r.Message.SpeechEnd,audio:o,frame:e}}return{probs:t,msg:r.Message.VADMisfire,frame:e}}if(!this.speaking)for(;this.audioBuffer.length>this.options.preSpeechPadFrames;)this.audioBuffer.shift();return{probs:t,frame:e}},this.audioBuffer=[],this.reset()}}},590:function(e,t,s){var o=this&&this.__createBinding||(Object.create?function(e,t,s,o){void 0===o&&(o=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,o,r)}:function(e,t,s,o){void 0===o&&(o=s),e[o]=t[s]}),r=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),i=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var s in e)"default"!==s&&Object.prototype.hasOwnProperty.call(e,s)&&o(t,e,s);return r(t,e),t};Object.defineProperty(t,"__esModule",{value:!0}),t.NonRealTimeVAD=t.Message=t.FrameProcessor=t.getDefaultRealTimeVADOptions=t.MicVAD=t.DEFAULT_MODEL=t.AudioNodeVAD=t.utils=t.defaultNonRealTimeVADOptions=void 0;const n=i(s(656)),a=s(485),c=s(973),h=s(362);Object.defineProperty(t,"FrameProcessor",{enumerable:!0,get:function(){return h.FrameProcessor}});const u=s(954);Object.defineProperty(t,"Message",{enumerable:!0,get:function(){return u.Message}});const d=s(202),l=s(787);t.defaultNonRealTimeVADOptions={modelURL:a.baseAssetPath+"silero_vad_legacy.onnx",modelFetcher:c.defaultModelFetcher};class p extends d.PlatformAgnosticNonRealTimeVAD{static async new(e={}){const{modelURL:s,modelFetcher:o}={...t.defaultNonRealTimeVADOptions,...e};return await this._new((()=>o(s)),n,e)}}t.NonRealTimeVAD=p,t.utils={audioFileToArray:l.audioFileToArray,minFramesForTargetMS:l.minFramesForTargetMS,arrayBufferToBase64:l.arrayBufferToBase64,encodeWAV:l.encodeWAV};var f=s(746);Object.defineProperty(t,"AudioNodeVAD",{enumerable:!0,get:function(){return f.AudioNodeVAD}}),Object.defineProperty(t,"DEFAULT_MODEL",{enumerable:!0,get:function(){return f.DEFAULT_MODEL}}),Object.defineProperty(t,"MicVAD",{enumerable:!0,get:function(){return f.MicVAD}}),Object.defineProperty(t,"getDefaultRealTimeVADOptions",{enumerable:!0,get:function(){return f.getDefaultRealTimeVADOptions}})},710:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.log=t.LOG_PREFIX=void 0,t.LOG_PREFIX="[VAD]";const s=["error","debug","warn"].reduce(((e,s)=>(e[s]=function(e){return(...s)=>{console[e](t.LOG_PREFIX,...s)}}(s),e)),{});t.log=s},954:(e,t)=>{var s;Object.defineProperty(t,"__esModule",{value:!0}),t.Message=void 0,function(e){e.AudioFrame="AUDIO_FRAME",e.SpeechStart="SPEECH_START",e.VADMisfire="VAD_MISFIRE",e.SpeechEnd="SPEECH_END",e.SpeechStop="SPEECH_STOP"}(s||(t.Message=s={}))},650:(e,t)=>{Object.defineProperty(t,"__esModule",{value:!0})},559:function(e,t,s){var o=this&&this.__createBinding||(Object.create?function(e,t,s,o){void 0===o&&(o=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,o,r)}:function(e,t,s,o){void 0===o&&(o=s),e[o]=t[s]}),r=this&&this.__exportStar||function(e,t){for(var s in e)"default"===s||Object.prototype.hasOwnProperty.call(t,s)||o(t,e,s)};Object.defineProperty(t,"__esModule",{value:!0}),t.SileroV5=t.SileroLegacy=void 0,r(s(650),t);var i=s(143);Object.defineProperty(t,"SileroLegacy",{enumerable:!0,get:function(){return i.SileroLegacy}});var n=s(508);Object.defineProperty(t,"SileroV5",{enumerable:!0,get:function(){return n.SileroV5}})},143:(e,t,s)=>{var o;Object.defineProperty(t,"__esModule",{value:!0}),t.SileroLegacy=void 0;const r=s(710);class i{constructor(e,t,s,o,r){this.ortInstance=e,this._session=t,this._h=s,this._c=o,this._sr=r,this.reset_state=()=>{const e=Array(128).fill(0);this._h=new this.ortInstance.Tensor("float32",e,[2,1,64]),this._c=new this.ortInstance.Tensor("float32",e,[2,1,64])},this.process=async e=>{const t={input:new this.ortInstance.Tensor("float32",e,[1,e.length]),h:this._h,c:this._c,sr:this._sr},s=await this._session.run(t);this._h=s.hn,this._c=s.cn;const[o]=s.output?.data;return{notSpeech:1-o,isSpeech:o}}}}t.SileroLegacy=i,o=i,i.new=async(e,t)=>{r.log.debug("initializing vad");const s=await t(),i=await e.InferenceSession.create(s),n=new e.Tensor("int64",[16000n]),a=Array(128).fill(0),c=new e.Tensor("float32",a,[2,1,64]),h=new e.Tensor("float32",a,[2,1,64]);return r.log.debug("vad is initialized"),new o(e,i,c,h,n)}},508:(e,t,s)=>{var o;Object.defineProperty(t,"__esModule",{value:!0}),t.SileroV5=void 0;const r=s(710);function i(e){const t=Array(256).fill(0);return new e.Tensor("float32",t,[2,1,128])}class n{constructor(e,t,s,o){this._session=e,this._state=t,this._sr=s,this.ortInstance=o,this.reset_state=()=>{this._state=i(this.ortInstance)},this.process=async e=>{const t={input:new this.ortInstance.Tensor("float32",e,[1,e.length]),state:this._state,sr:this._sr},s=await this._session.run(t);this._state=s.stateN;const[o]=s.output?.data;return{notSpeech:1-o,isSpeech:o}}}}t.SileroV5=n,o=n,n.new=async(e,t)=>{r.log.debug("Loading VAD...");const s=await t(),n=await e.InferenceSession.create(s),a=new e.Tensor("int64",[16000n]),c=i(e);return r.log.debug("...finished loading VAD"),new o(n,c,a,e)}},202:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.PlatformAgnosticNonRealTimeVAD=t.defaultNonRealTimeVADOptions=void 0;const o=s(362),r=s(954),i=s(559),n=s(825);t.defaultNonRealTimeVADOptions={...o.defaultLegacyFrameProcessorOptions,ortConfig:void 0},t.PlatformAgnosticNonRealTimeVAD=class{static async _new(e,s,o={}){const r={...t.defaultNonRealTimeVADOptions,...o};void 0!==r.ortConfig&&r.ortConfig(s);const i=new this(e,s,r);return await i.init(),i}constructor(e,t,s){this.modelFetcher=e,this.ort=t,this.options=s,this.init=async()=>{const e=await i.SileroLegacy.new(this.ort,this.modelFetcher);this.frameProcessor=new o.FrameProcessor(e.process,e.reset_state,{frameSamples:this.options.frameSamples,positiveSpeechThreshold:this.options.positiveSpeechThreshold,negativeSpeechThreshold:this.options.negativeSpeechThreshold,redemptionFrames:this.options.redemptionFrames,preSpeechPadFrames:this.options.preSpeechPadFrames,minSpeechFrames:this.options.minSpeechFrames,submitUserSpeechOnPause:this.options.submitUserSpeechOnPause}),this.frameProcessor.resume()},this.run=async function*(e,t){const s={nativeSampleRate:t,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples},o=new n.Resampler(s);let i=0,a=0,c=0;for await(const t of o.stream(e)){const{msg:e,audio:s}=await this.frameProcessor.process(t);switch(e){case r.Message.SpeechStart:i=c*this.options.frameSamples/16;break;case r.Message.SpeechEnd:a=(c+1)*this.options.frameSamples/16,yield{audio:s,start:i,end:a}}c++}const{msg:h,audio:u}=this.frameProcessor.endSegment();h==r.Message.SpeechEnd&&(yield{audio:u,start:i,end:c*this.options.frameSamples/16})},(0,o.validateOptions)(s)}}},746:function(e,t,s){var o=this&&this.__createBinding||(Object.create?function(e,t,s,o){void 0===o&&(o=s);var r=Object.getOwnPropertyDescriptor(t,s);r&&!("get"in r?!t.__esModule:r.writable||r.configurable)||(r={enumerable:!0,get:function(){return t[s]}}),Object.defineProperty(e,o,r)}:function(e,t,s,o){void 0===o&&(o=s),e[o]=t[s]}),r=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),i=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var s in e)"default"!==s&&Object.prototype.hasOwnProperty.call(e,s)&&o(t,e,s);return r(t,e),t};Object.defineProperty(t,"__esModule",{value:!0}),t.AudioNodeVAD=t.MicVAD=t.getDefaultRealTimeVADOptions=t.ort=t.DEFAULT_MODEL=void 0;const n=i(s(656)),a=s(973),c=s(362),h=s(710),u=s(954),d=s(559);t.DEFAULT_MODEL="legacy",t.ort=n,t.getDefaultRealTimeVADOptions=e=>({..."v5"===e?c.defaultV5FrameProcessorOptions:c.defaultLegacyFrameProcessorOptions,onFrameProcessed:e=>{},onVADMisfire:()=>{h.log.debug("VAD misfire")},onSpeechStart:()=>{h.log.debug("Detected speech start")},onSpeechEnd:()=>{h.log.debug("Detected speech end")},baseAssetPath:"https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.20/dist/",onnxWASMBasePath:"https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/",stream:void 0,ortConfig:void 0,model:t.DEFAULT_MODEL,workletOptions:{}});class l{static async new(e={}){const s={...(0,t.getDefaultRealTimeVADOptions)(e.model??t.DEFAULT_MODEL),...e};let o;(0,c.validateOptions)(s),o=void 0===s.stream?await navigator.mediaDevices.getUserMedia({audio:{...s.additionalAudioConstraints,channelCount:1,echoCancellation:!0,autoGainControl:!0,noiseSuppression:!0}}):s.stream;const r=new AudioContext,i=new MediaStreamAudioSourceNode(r,{mediaStream:o}),n=await p.new(r,s);return n.receive(i),new l(s,r,o,n,i)}constructor(e,t,s,o,r,i=!1){this.options=e,this.audioContext=t,this.stream=s,this.audioNodeVAD=o,this.sourceNode=r,this.listening=i,this.pause=()=>{this.audioNodeVAD.pause(),this.listening=!1},this.start=()=>{this.audioNodeVAD.start(),this.listening=!0},this.destroy=()=>{this.listening&&this.pause(),void 0===this.options.stream&&this.stream.getTracks().forEach((e=>e.stop())),this.sourceNode.disconnect(),this.audioNodeVAD.destroy(),this.audioContext.close()}}}t.MicVAD=l;class p{static async new(e,s={}){const o={...(0,t.getDefaultRealTimeVADOptions)(s.model??t.DEFAULT_MODEL),...s};(0,c.validateOptions)(o),t.ort.env.wasm.wasmPaths=o.onnxWASMBasePath,void 0!==o.ortConfig&&o.ortConfig(t.ort);const r=o.baseAssetPath+"vad.worklet.bundle.min.js";try{await e.audioWorklet.addModule(r)}catch(e){throw console.error(`Encountered an error while loading worklet ${r}`),e}let i=o.workletOptions;i.processorOptions={...o.workletOptions.processorOptions??{},frameSamples:o.frameSamples};const n=new AudioWorkletNode(e,"vad-helper-worklet",i),h="v5"===o.model?"silero_vad_v5.onnx":"silero_vad_legacy.onnx",l=o.baseAssetPath+h,f="v5"===o.model?d.SileroV5.new:d.SileroLegacy.new;let m;try{m=await f(t.ort,(()=>(0,a.defaultModelFetcher)(l)))}catch(e){throw console.error(`Encountered an error while loading model file ${l}`),e}const g=new c.FrameProcessor(m.process,m.reset_state,{frameSamples:o.frameSamples,positiveSpeechThreshold:o.positiveSpeechThreshold,negativeSpeechThreshold:o.negativeSpeechThreshold,redemptionFrames:o.redemptionFrames,preSpeechPadFrames:o.preSpeechPadFrames,minSpeechFrames:o.minSpeechFrames,submitUserSpeechOnPause:o.submitUserSpeechOnPause}),S=new p(e,o,g,n);return n.port.onmessage=async e=>{if(e.data?.message===u.Message.AudioFrame){let t=e.data.data;t instanceof ArrayBuffer||(t=new ArrayBuffer(e.data.data.byteLength),new Uint8Array(t).set(new Uint8Array(e.data.data)));const s=new Float32Array(t);await S.processFrame(s)}},S}constructor(e,t,s,o){this.ctx=e,this.options=t,this.frameProcessor=s,this.entryNode=o,this.pause=()=>{const e=this.frameProcessor.pause();this.handleFrameProcessorEvent(e)},this.start=()=>{this.frameProcessor.resume()},this.receive=e=>{e.connect(this.entryNode)},this.processFrame=async e=>{const t=await this.frameProcessor.process(e);this.handleFrameProcessorEvent(t)},this.handleFrameProcessorEvent=e=>{switch(void 0!==e.probs&&this.options.onFrameProcessed(e.probs,e.frame),e.msg){case u.Message.SpeechStart:this.options.onSpeechStart();break;case u.Message.VADMisfire:this.options.onVADMisfire();break;case u.Message.SpeechEnd:this.options.onSpeechEnd(e.audio)}},this.destroy=()=>{this.entryNode.port.postMessage({message:u.Message.SpeechStop}),this.entryNode.disconnect()}}}t.AudioNodeVAD=p},825:(e,t,s)=>{Object.defineProperty(t,"__esModule",{value:!0}),t.Resampler=void 0;const o=s(710);t.Resampler=class{constructor(e){this.options=e,this.process=e=>{const t=[];for(const s of e)for(this.inputBuffer.push(s);this.hasEnoughDataForFrame();){const e=this.generateOutputFrame();t.push(e)}return t},this.stream=async function*(e){for(const t of e)for(this.inputBuffer.push(t);this.hasEnoughDataForFrame();){const e=this.generateOutputFrame();yield e}},e.nativeSampleRate<16e3&&o.log.error("nativeSampleRate is too low. Should have 16000 = targetSampleRate <= nativeSampleRate"),this.inputBuffer=[]}hasEnoughDataForFrame(){return this.inputBuffer.length*this.options.targetSampleRate/this.options.nativeSampleRate>=this.options.targetFrameSize}generateOutputFrame(){const e=new Float32Array(this.options.targetFrameSize);let t=0,s=0;for(;t<this.options.targetFrameSize;){let o=0,r=0;for(;s<Math.min(this.inputBuffer.length,(t+1)*this.options.nativeSampleRate/this.options.targetSampleRate);){const e=this.inputBuffer[s];void 0!==e&&(o+=e,r++),s++}e[t]=o/r,t++}return this.inputBuffer=this.inputBuffer.slice(s),e}}},787:(e,t)=>{function s(e,t,s){for(var o=0;o<s.length;o++)e.setUint8(t+o,s.charCodeAt(o))}Object.defineProperty(t,"__esModule",{value:!0}),t.audioFileToArray=t.encodeWAV=t.arrayBufferToBase64=t.minFramesForTargetMS=void 0,t.minFramesForTargetMS=function(e,t,s=16e3){return Math.ceil(e*s/1e3/t)},t.arrayBufferToBase64=function(e){const t=new Uint8Array(e),s=t.byteLength,o=new Array(s);for(var r=0;r<s;r++){const e=t[r];if(void 0===e)break;o[r]=String.fromCharCode(e)}return btoa(o.join(""))},t.encodeWAV=function(e,t=3,o=16e3,r=1,i=32){var n=i/8,a=r*n,c=new ArrayBuffer(44+e.length*n),h=new DataView(c);return s(h,0,"RIFF"),h.setUint32(4,36+e.length*n,!0),s(h,8,"WAVE"),s(h,12,"fmt "),h.setUint32(16,16,!0),h.setUint16(20,t,!0),h.setUint16(22,r,!0),h.setUint32(24,o,!0),h.setUint32(28,o*a,!0),h.setUint16(32,a,!0),h.setUint16(34,i,!0),s(h,36,"data"),h.setUint32(40,e.length*n,!0),1===t?function(e,t,s){for(var o=0;o<s.length;o++,t+=2){var r=Math.max(-1,Math.min(1,s[o]));e.setInt16(t,r<0?32768*r:32767*r,!0)}}(h,44,e):function(e,t,s){for(var o=0;o<s.length;o++,t+=4)e.setFloat32(t,s[o],!0)}(h,44,e),c},t.audioFileToArray=async function(e){const t=new OfflineAudioContext(1,1,44100),s=new FileReader;let o=null;if(await new Promise((r=>{s.addEventListener("loadend",(e=>{const i=s.result;t.decodeAudioData(i,(e=>{o=e,t.startRendering().then((e=>{console.log("Rendering completed successfully"),r()})).catch((e=>{console.error(`Rendering failed: ${e}`)}))}),(e=>{console.log(`Error with decoding audio data: ${e}`)}))})),s.readAsArrayBuffer(e)})),null===o)throw Error("some shit");let r=o,i=new Float32Array(r.length);for(let e=0;e<r.length;e++)for(let t=0;t<r.numberOfChannels;t++)i[e]+=r.getChannelData(t)[e];return{audio:i,sampleRate:r.sampleRate}}},656:t=>{t.exports=e}},s={};return function e(o){var r=s[o];if(void 0!==r)return r.exports;var i=s[o]={exports:{}};return t[o].call(i.exports,i,i.exports,e),i.exports}(590)})())); |
@@ -54,3 +54,3 @@ "use strict"; | ||
}, | ||
baseAssetPath: "https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.19/dist/", | ||
baseAssetPath: "https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.20/dist/", | ||
onnxWASMBasePath: "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/", | ||
@@ -57,0 +57,0 @@ stream: undefined, |
@@ -15,3 +15,3 @@ { | ||
"homepage": "https://github.com/ricky0123/vad", | ||
"version": "0.0.20", | ||
"version": "0.0.21", | ||
"license": "ISC", | ||
@@ -18,0 +18,0 @@ "main": "dist/index.js", |
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package