@ricky0123/vad-web
Advanced tools
Comparing version 0.0.2 to 0.0.3
@@ -14,4 +14,4 @@ import { NonRealTimeVAD as _NonRealTimeVAD, FrameProcessor, FrameProcessorOptions, Message, NonRealTimeVADOptions } from "@ricky0123/vad-common"; | ||
export type { FrameProcessorOptions, NonRealTimeVADOptions }; | ||
export { MicVAD, AudioNodeVAD } from "./real-time-vad"; | ||
export { MicVAD, AudioNodeVAD, defaultRealTimeVADOptions } from "./real-time-vad"; | ||
export type { RealTimeVADOptions } from "./real-time-vad"; | ||
//# sourceMappingURL=index.d.ts.map |
@@ -1,1 +0,1 @@ | ||
!function(e,t){"object"==typeof exports&&"object"==typeof module?module.exports=t(require("onnxruntime-web")):"function"==typeof define&&define.amd?define(["onnxruntime-web"],t):"object"==typeof exports?exports.vad=t(require("onnxruntime-web")):e.vad=t(e.ort)}(self,(e=>(()=>{"use strict";var t={656:t=>{t.exports=e}},i={};function r(e){var s=i[e];if(void 0!==s)return s.exports;var o=i[e]={exports:{}};return t[e](o,o.exports,r),o.exports}r.d=(e,t)=>{for(var i in t)r.o(t,i)&&!r.o(e,i)&&Object.defineProperty(e,i,{enumerable:!0,get:t[i]})},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),r.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},(()=>{var e;r.g.importScripts&&(e=r.g.location+"");var t=r.g.document;if(!e&&t&&(t.currentScript&&(e=t.currentScript.src),!e)){var i=t.getElementsByTagName("script");i.length&&(e=i[i.length-1].src)}if(!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),r.p=e})();var s={};return(()=>{r.r(s),r.d(s,{AudioNodeVAD:()=>E,FrameProcessor:()=>l,Message:()=>e,MicVAD:()=>T,NonRealTimeVAD:()=>j,utils:()=>B});var e,t=r(656);function i(e,t,i){for(var r=0;r<i.length;r++)e.setUint8(t+r,i.charCodeAt(r))}!function(e){e.AudioFrame="AUDIO_FRAME",e.SpeechStart="SPEECH_START",e.VADMisfire="VAD_MISFIRE",e.SpeechEnd="SPEECH_END"}(e||(e={}));const o="[VAD]",n=["error","debug","warn"].reduce(((e,t)=>(e[t]=function(e){return function(){for(var t=arguments.length,i=new Array(t),r=0;r<t;r++)i[r]=arguments[r];console[e](o,...i)}}(t),e)),{});function a(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}const h=[512,1024,1536],c={positiveSpeechThreshold:.5,negativeSpeechThreshold:.35,preSpeechPadFrames:1,redemptionFrames:8,frameSamples:1536,minSpeechFrames:3};function p(e){h.includes(e.frameSamples)||n.warn("You are using an unusual frame size"),(e.positiveSpeechThreshold<0||e.negativeSpeechThreshold>1)&&n.error("postiveSpeechThreshold should be a number between 0 and 1"),(e.negativeSpeechThreshold<0||e.negativeSpeechThreshold>e.positiveSpeechThreshold)&&n.error("negativeSpeechThreshold should be between 0 and postiveSpeechThreshold"),e.preSpeechPadFrames<0&&n.error("preSpeechPadFrames should be positive"),e.redemptionFrames<0&&n.error("preSpeechPadFrames should be positive")}const u=e=>{const t=e.reduce(((e,t)=>(e.push(e.at(-1)+t.length),e)),[0]),i=new Float32Array(t.at(-1));return e.forEach(((e,r)=>{const s=t[r];i.set(e,s)})),i};class l{constructor(t,i,r){a(this,"modelProcessFunc",void 0),a(this,"modelResetFunc",void 0),a(this,"options",void 0),a(this,"speaking",!1),a(this,"audioBuffer",void 0),a(this,"redemptionCounter",0),a(this,"active",!1),a(this,"reset",(()=>{this.speaking=!1,this.audioBuffer=[],this.modelResetFunc(),this.redemptionCounter=0})),a(this,"pause",(()=>{this.active=!1,this.reset()})),a(this,"resume",(()=>{this.active=!0})),a(this,"endSegment",(()=>{const t=this.audioBuffer;this.audioBuffer=[];const i=this.speaking;this.reset();const r=t.reduce(((e,t)=>e+ +t.isSpeech),0);if(i){if(r>=this.options.minSpeechFrames){const i=u(t.map((e=>e.frame)));return{msg:e.SpeechEnd,audio:i}}return{msg:e.VADMisfire}}return{}})),a(this,"process",(async t=>{if(!this.active)return{};const i=await this.modelProcessFunc(t);if(this.audioBuffer.push({frame:t,isSpeech:i.isSpeech>=this.options.positiveSpeechThreshold}),i.isSpeech>=this.options.positiveSpeechThreshold&&this.redemptionCounter&&(this.redemptionCounter=0),i.isSpeech>=this.options.positiveSpeechThreshold&&!this.speaking)return this.speaking=!0,{probs:i,msg:e.SpeechStart};if(i.isSpeech<this.options.negativeSpeechThreshold&&this.speaking&&++this.redemptionCounter>=this.options.redemptionFrames){this.redemptionCounter=0,this.speaking=!1;const t=this.audioBuffer;if(this.audioBuffer=[],t.reduce(((e,t)=>e+ +t.isSpeech),0)>=this.options.minSpeechFrames){const r=u(t.map((e=>e.frame)));return{probs:i,msg:e.SpeechEnd,audio:r}}return{probs:i,msg:e.VADMisfire}}if(!this.speaking)for(;this.audioBuffer.length>this.options.preSpeechPadFrames;)this.audioBuffer.shift();return{probs:i}})),this.modelProcessFunc=t,this.modelResetFunc=i,this.options=r,this.audioBuffer=[],this.reset()}}function d(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}class f{constructor(e,t){d(this,"ort",void 0),d(this,"modelFetcher",void 0),d(this,"_session",void 0),d(this,"_h",void 0),d(this,"_c",void 0),d(this,"_sr",void 0),d(this,"init",(async()=>{n.debug("initializing vad");const e=await this.modelFetcher();this._session=await this.ort.InferenceSession.create(e),this._sr=new this.ort.Tensor("int64",[16000n]),this.reset_state(),n.debug("vad is initialized")})),d(this,"reset_state",(()=>{const e=Array(128).fill(0);this._h=new this.ort.Tensor("float32",e,[2,1,64]),this._c=new this.ort.Tensor("float32",e,[2,1,64])})),d(this,"process",(async e=>{const t={input:new this.ort.Tensor("float32",e,[1,e.length]),h:this._h,c:this._c,sr:this._sr},i=await this._session.run(t);this._h=i.hn,this._c=i.cn;const[r]=i.output.data;return{notSpeech:1-r,isSpeech:r}})),this.ort=e,this.modelFetcher=t}}function m(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}d(f,"new",(async(e,t)=>{const i=new f(e,t);return await i.init(),i}));class v{constructor(e){m(this,"options",void 0),m(this,"inputBuffer",void 0),m(this,"process",(e=>{const t=[];for(const t of e)this.inputBuffer.push(t);for(;this.inputBuffer.length*this.options.targetSampleRate/this.options.nativeSampleRate>this.options.targetFrameSize;){const e=new Float32Array(this.options.targetFrameSize);let i=0,r=0;for(;i<this.options.targetFrameSize;){let t=0,s=0;for(;r<Math.min(this.inputBuffer.length,(i+1)*this.options.nativeSampleRate/this.options.targetSampleRate);)t+=this.inputBuffer[r],s++,r++;e[i]=t/s,i++}this.inputBuffer=this.inputBuffer.slice(r),t.push(e)}return t})),this.options=e,e.nativeSampleRate<16e3&&n.error("nativeSampleRate is too low. Should have 16000 = targetSampleRate <= nativeSampleRate"),this.inputBuffer=[]}}function S(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}const g={...c};class w{configure(){throw new Error}static async new(){const e=new this({...g,...arguments.length>0&&void 0!==arguments[0]?arguments[0]:{}});return await e.init(),e}constructor(t){S(this,"options",void 0),S(this,"ort",void 0),S(this,"modelFetcher",void 0),S(this,"frameProcessor",void 0),S(this,"init",(async()=>{const e=await f.new(this.ort,this.modelFetcher);this.frameProcessor=new l(e.process,e.reset_state,{frameSamples:this.options.frameSamples,positiveSpeechThreshold:this.options.positiveSpeechThreshold,negativeSpeechThreshold:this.options.negativeSpeechThreshold,redemptionFrames:this.options.redemptionFrames,preSpeechPadFrames:this.options.preSpeechPadFrames,minSpeechFrames:this.options.minSpeechFrames}),this.frameProcessor.resume()})),S(this,"run",(async function*(t,i){const r={nativeSampleRate:i,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples},s=new v(r).process(t);let o,n;for(const t of[...Array(s.length)].keys()){const i=s[t],{msg:r,audio:a}=await this.frameProcessor.process(i);switch(r){case e.SpeechStart:o=t*this.options.frameSamples/16;break;case e.SpeechEnd:n=(t+1)*this.options.frameSamples/16,yield{audio:a,start:o,end:n}}}const{msg:a,audio:h}=this.frameProcessor.endSegment();a==e.SpeechEnd&&(yield{audio:h,start:o,end:s.length*this.options.frameSamples/16})})),this.options=t,p(t),this.configure()}}const y={minFramesForTargetMS:function(e,t){let i=arguments.length>2&&void 0!==arguments[2]?arguments[2]:16e3;return Math.ceil(e*i/1e3/t)},arrayBufferToBase64:function(e){for(var t="",i=new Uint8Array(e),r=i.byteLength,s=0;s<r;s++)t+=String.fromCharCode(i[s]);return btoa(t)},encodeWAV:function(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:3,r=arguments.length>2&&void 0!==arguments[2]?arguments[2]:16e3,s=arguments.length>3&&void 0!==arguments[3]?arguments[3]:1,o=arguments.length>4&&void 0!==arguments[4]?arguments[4]:32;var n=o/8,a=s*n,h=new ArrayBuffer(44+e.length*n),c=new DataView(h);return i(c,0,"RIFF"),c.setUint32(4,36+e.length*n,!0),i(c,8,"WAVE"),i(c,12,"fmt "),c.setUint32(16,16,!0),c.setUint16(20,t,!0),c.setUint16(22,s,!0),c.setUint32(24,r,!0),c.setUint32(28,r*a,!0),c.setUint16(32,a,!0),c.setUint16(34,o,!0),i(c,36,"data"),c.setUint32(40,e.length*n,!0),1===t?function(e,t,i){for(var r=0;r<i.length;r++,t+=2){var s=Math.max(-1,Math.min(1,i[r]));e.setInt16(t,s<0?32768*s:32767*s,!0)}}(c,44,e):function(e,t,i){for(var r=0;r<i.length;r++,t+=4)e.setFloat32(t,i[r],!0)}(c,44,e),h}},b=r.p+"568bf886c02ac597add4.onnx",F=async()=>await fetch(b).then((e=>e.arrayBuffer()));function P(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}const A={...c,onFrameProcessed:e=>{},onVADMisfire:()=>{n.debug("VAD misfire")},onSpeechStart:()=>{n.debug("Detected speech start")},onSpeechEnd:()=>{n.debug("Detected speech end")}};class T{static async new(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};const t=new T({...A,...e});return await t.init(),t}constructor(e){P(this,"listening",!1),P(this,"init",(async()=>{this.stream=await navigator.mediaDevices.getUserMedia({audio:{...this.options.additionalAudioConstraints,channelCount:1,echoCancellation:!0,autoGainControl:!0,noiseSuppression:!0}}),this.audioContext=new AudioContext;const e=new MediaStreamAudioSourceNode(this.audioContext,{mediaStream:this.stream});this.audioNodeVAD=await E.new(this.audioContext,this.options),this.audioNodeVAD.receive(e)})),P(this,"pause",(()=>{this.audioNodeVAD.pause(),this.listening=!1})),P(this,"start",(()=>{this.audioNodeVAD.start(),this.listening=!0})),this.options=e,p(e)}}class E{static async new(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{};const i=new E(e,{...A,...t});return await i.init(),i}constructor(i,s){P(this,"pause",(()=>{this.frameProcessor.pause()})),P(this,"start",(()=>{this.frameProcessor.resume()})),P(this,"receive",(e=>{e.connect(this.entryNode)})),P(this,"processFrame",(async t=>{const{probs:i,msg:r,audio:s}=await this.frameProcessor.process(t);switch(void 0!==i&&this.options.onFrameProcessed(i),r){case e.SpeechStart:this.options.onSpeechStart();break;case e.VADMisfire:this.options.onVADMisfire();break;case e.SpeechEnd:this.options.onSpeechEnd(s)}})),P(this,"init",(async()=>{const i=r.p+"vad.worklet.js";await this.ctx.audioWorklet.addModule(i);const s=new AudioWorkletNode(this.ctx,"vad-helper-worklet",{processorOptions:{frameSamples:this.options.frameSamples}});this.entryNode=s;const o=await f.new(t,F);this.frameProcessor=new l(o.process,o.reset_state,{frameSamples:this.options.frameSamples,positiveSpeechThreshold:this.options.positiveSpeechThreshold,negativeSpeechThreshold:this.options.negativeSpeechThreshold,redemptionFrames:this.options.redemptionFrames,preSpeechPadFrames:this.options.preSpeechPadFrames,minSpeechFrames:this.options.minSpeechFrames}),s.port.onmessage=async t=>{if(t.data?.message===e.AudioFrame){const e=t.data.data,i=new Float32Array(e);await this.processFrame(i)}}})),this.ctx=i,this.options=s,p(s)}}class j extends w{configure(){this.ort=t,this.modelFetcher=F}}const B={audioFileToArray:async function(e){const t=new OfflineAudioContext(1,1,44100),i=new FileReader;let r=null;if(await new Promise((s=>{i.addEventListener("loadend",(e=>{const o=i.result;t.decodeAudioData(o,(e=>{r=e,t.startRendering().then((e=>{console.log("Rendering completed successfully"),s()})).catch((e=>{console.error(`Rendering failed: ${e}`)}))}),(e=>{console.log(`Error with decoding audio data: ${e}`)}))})),i.readAsArrayBuffer(e)})),null===r)throw Error("some shit");let s=r,o=new Float32Array(s.length);for(let e=0;e<s.length;e++)for(let t=0;t<s.numberOfChannels;t++)o[e]+=s.getChannelData(t)[e];return{audio:o,sampleRate:s.sampleRate}},...y}})(),s})())); | ||
!function(e,t){"object"==typeof exports&&"object"==typeof module?module.exports=t(require("onnxruntime-web")):"function"==typeof define&&define.amd?define(["onnxruntime-web"],t):"object"==typeof exports?exports.vad=t(require("onnxruntime-web")):e.vad=t(e.ort)}(self,(e=>(()=>{"use strict";var t={656:t=>{t.exports=e}},i={};function r(e){var s=i[e];if(void 0!==s)return s.exports;var o=i[e]={exports:{}};return t[e](o,o.exports,r),o.exports}r.d=(e,t)=>{for(var i in t)r.o(t,i)&&!r.o(e,i)&&Object.defineProperty(e,i,{enumerable:!0,get:t[i]})},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),r.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},(()=>{var e;r.g.importScripts&&(e=r.g.location+"");var t=r.g.document;if(!e&&t&&(t.currentScript&&(e=t.currentScript.src),!e)){var i=t.getElementsByTagName("script");i.length&&(e=i[i.length-1].src)}if(!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),r.p=e})();var s={};return(()=>{r.r(s),r.d(s,{AudioNodeVAD:()=>E,FrameProcessor:()=>l,Message:()=>e,MicVAD:()=>T,NonRealTimeVAD:()=>j,defaultRealTimeVADOptions:()=>A,utils:()=>D});var e,t=r(656);function i(e,t,i){for(var r=0;r<i.length;r++)e.setUint8(t+r,i.charCodeAt(r))}!function(e){e.AudioFrame="AUDIO_FRAME",e.SpeechStart="SPEECH_START",e.VADMisfire="VAD_MISFIRE",e.SpeechEnd="SPEECH_END"}(e||(e={}));const o="[VAD]",n=["error","debug","warn"].reduce(((e,t)=>(e[t]=function(e){return function(){for(var t=arguments.length,i=new Array(t),r=0;r<t;r++)i[r]=arguments[r];console[e](o,...i)}}(t),e)),{});function a(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}const h=[512,1024,1536],c={positiveSpeechThreshold:.5,negativeSpeechThreshold:.35,preSpeechPadFrames:1,redemptionFrames:8,frameSamples:1536,minSpeechFrames:3};function p(e){h.includes(e.frameSamples)||n.warn("You are using an unusual frame size"),(e.positiveSpeechThreshold<0||e.negativeSpeechThreshold>1)&&n.error("postiveSpeechThreshold should be a number between 0 and 1"),(e.negativeSpeechThreshold<0||e.negativeSpeechThreshold>e.positiveSpeechThreshold)&&n.error("negativeSpeechThreshold should be between 0 and postiveSpeechThreshold"),e.preSpeechPadFrames<0&&n.error("preSpeechPadFrames should be positive"),e.redemptionFrames<0&&n.error("preSpeechPadFrames should be positive")}const u=e=>{const t=e.reduce(((e,t)=>(e.push(e.at(-1)+t.length),e)),[0]),i=new Float32Array(t.at(-1));return e.forEach(((e,r)=>{const s=t[r];i.set(e,s)})),i};class l{constructor(t,i,r){a(this,"modelProcessFunc",void 0),a(this,"modelResetFunc",void 0),a(this,"options",void 0),a(this,"speaking",!1),a(this,"audioBuffer",void 0),a(this,"redemptionCounter",0),a(this,"active",!1),a(this,"reset",(()=>{this.speaking=!1,this.audioBuffer=[],this.modelResetFunc(),this.redemptionCounter=0})),a(this,"pause",(()=>{this.active=!1,this.reset()})),a(this,"resume",(()=>{this.active=!0})),a(this,"endSegment",(()=>{const t=this.audioBuffer;this.audioBuffer=[];const i=this.speaking;this.reset();const r=t.reduce(((e,t)=>e+ +t.isSpeech),0);if(i){if(r>=this.options.minSpeechFrames){const i=u(t.map((e=>e.frame)));return{msg:e.SpeechEnd,audio:i}}return{msg:e.VADMisfire}}return{}})),a(this,"process",(async t=>{if(!this.active)return{};const i=await this.modelProcessFunc(t);if(this.audioBuffer.push({frame:t,isSpeech:i.isSpeech>=this.options.positiveSpeechThreshold}),i.isSpeech>=this.options.positiveSpeechThreshold&&this.redemptionCounter&&(this.redemptionCounter=0),i.isSpeech>=this.options.positiveSpeechThreshold&&!this.speaking)return this.speaking=!0,{probs:i,msg:e.SpeechStart};if(i.isSpeech<this.options.negativeSpeechThreshold&&this.speaking&&++this.redemptionCounter>=this.options.redemptionFrames){this.redemptionCounter=0,this.speaking=!1;const t=this.audioBuffer;if(this.audioBuffer=[],t.reduce(((e,t)=>e+ +t.isSpeech),0)>=this.options.minSpeechFrames){const r=u(t.map((e=>e.frame)));return{probs:i,msg:e.SpeechEnd,audio:r}}return{probs:i,msg:e.VADMisfire}}if(!this.speaking)for(;this.audioBuffer.length>this.options.preSpeechPadFrames;)this.audioBuffer.shift();return{probs:i}})),this.modelProcessFunc=t,this.modelResetFunc=i,this.options=r,this.audioBuffer=[],this.reset()}}function d(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}class f{constructor(e,t){d(this,"ort",void 0),d(this,"modelFetcher",void 0),d(this,"_session",void 0),d(this,"_h",void 0),d(this,"_c",void 0),d(this,"_sr",void 0),d(this,"init",(async()=>{n.debug("initializing vad");const e=await this.modelFetcher();this._session=await this.ort.InferenceSession.create(e),this._sr=new this.ort.Tensor("int64",[16000n]),this.reset_state(),n.debug("vad is initialized")})),d(this,"reset_state",(()=>{const e=Array(128).fill(0);this._h=new this.ort.Tensor("float32",e,[2,1,64]),this._c=new this.ort.Tensor("float32",e,[2,1,64])})),d(this,"process",(async e=>{const t={input:new this.ort.Tensor("float32",e,[1,e.length]),h:this._h,c:this._c,sr:this._sr},i=await this._session.run(t);this._h=i.hn,this._c=i.cn;const[r]=i.output.data;return{notSpeech:1-r,isSpeech:r}})),this.ort=e,this.modelFetcher=t}}function m(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}d(f,"new",(async(e,t)=>{const i=new f(e,t);return await i.init(),i}));class v{constructor(e){m(this,"options",void 0),m(this,"inputBuffer",void 0),m(this,"process",(e=>{const t=[];for(const t of e)this.inputBuffer.push(t);for(;this.inputBuffer.length*this.options.targetSampleRate/this.options.nativeSampleRate>this.options.targetFrameSize;){const e=new Float32Array(this.options.targetFrameSize);let i=0,r=0;for(;i<this.options.targetFrameSize;){let t=0,s=0;for(;r<Math.min(this.inputBuffer.length,(i+1)*this.options.nativeSampleRate/this.options.targetSampleRate);)t+=this.inputBuffer[r],s++,r++;e[i]=t/s,i++}this.inputBuffer=this.inputBuffer.slice(r),t.push(e)}return t})),this.options=e,e.nativeSampleRate<16e3&&n.error("nativeSampleRate is too low. Should have 16000 = targetSampleRate <= nativeSampleRate"),this.inputBuffer=[]}}function S(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}const g={...c};class w{configure(){throw new Error}static async new(){const e=new this({...g,...arguments.length>0&&void 0!==arguments[0]?arguments[0]:{}});return await e.init(),e}constructor(t){S(this,"options",void 0),S(this,"ort",void 0),S(this,"modelFetcher",void 0),S(this,"frameProcessor",void 0),S(this,"init",(async()=>{const e=await f.new(this.ort,this.modelFetcher);this.frameProcessor=new l(e.process,e.reset_state,{frameSamples:this.options.frameSamples,positiveSpeechThreshold:this.options.positiveSpeechThreshold,negativeSpeechThreshold:this.options.negativeSpeechThreshold,redemptionFrames:this.options.redemptionFrames,preSpeechPadFrames:this.options.preSpeechPadFrames,minSpeechFrames:this.options.minSpeechFrames}),this.frameProcessor.resume()})),S(this,"run",(async function*(t,i){const r={nativeSampleRate:i,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples},s=new v(r).process(t);let o,n;for(const t of[...Array(s.length)].keys()){const i=s[t],{msg:r,audio:a}=await this.frameProcessor.process(i);switch(r){case e.SpeechStart:o=t*this.options.frameSamples/16;break;case e.SpeechEnd:n=(t+1)*this.options.frameSamples/16,yield{audio:a,start:o,end:n}}}const{msg:a,audio:h}=this.frameProcessor.endSegment();a==e.SpeechEnd&&(yield{audio:h,start:o,end:s.length*this.options.frameSamples/16})})),this.options=t,p(t),this.configure()}}const y={minFramesForTargetMS:function(e,t){let i=arguments.length>2&&void 0!==arguments[2]?arguments[2]:16e3;return Math.ceil(e*i/1e3/t)},arrayBufferToBase64:function(e){for(var t="",i=new Uint8Array(e),r=i.byteLength,s=0;s<r;s++)t+=String.fromCharCode(i[s]);return btoa(t)},encodeWAV:function(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:3,r=arguments.length>2&&void 0!==arguments[2]?arguments[2]:16e3,s=arguments.length>3&&void 0!==arguments[3]?arguments[3]:1,o=arguments.length>4&&void 0!==arguments[4]?arguments[4]:32;var n=o/8,a=s*n,h=new ArrayBuffer(44+e.length*n),c=new DataView(h);return i(c,0,"RIFF"),c.setUint32(4,36+e.length*n,!0),i(c,8,"WAVE"),i(c,12,"fmt "),c.setUint32(16,16,!0),c.setUint16(20,t,!0),c.setUint16(22,s,!0),c.setUint32(24,r,!0),c.setUint32(28,r*a,!0),c.setUint16(32,a,!0),c.setUint16(34,o,!0),i(c,36,"data"),c.setUint32(40,e.length*n,!0),1===t?function(e,t,i){for(var r=0;r<i.length;r++,t+=2){var s=Math.max(-1,Math.min(1,i[r]));e.setInt16(t,s<0?32768*s:32767*s,!0)}}(c,44,e):function(e,t,i){for(var r=0;r<i.length;r++,t+=4)e.setFloat32(t,i[r],!0)}(c,44,e),h}},b=r.p+"568bf886c02ac597add4.onnx",F=async()=>await fetch(b).then((e=>e.arrayBuffer()));function P(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}const A={...c,onFrameProcessed:e=>{},onVADMisfire:()=>{n.debug("VAD misfire")},onSpeechStart:()=>{n.debug("Detected speech start")},onSpeechEnd:()=>{n.debug("Detected speech end")}};class T{static async new(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};const t=new T({...A,...e});return await t.init(),t}constructor(e){P(this,"listening",!1),P(this,"init",(async()=>{this.stream=await navigator.mediaDevices.getUserMedia({audio:{...this.options.additionalAudioConstraints,channelCount:1,echoCancellation:!0,autoGainControl:!0,noiseSuppression:!0}}),this.audioContext=new AudioContext;const e=new MediaStreamAudioSourceNode(this.audioContext,{mediaStream:this.stream});this.audioNodeVAD=await E.new(this.audioContext,this.options),this.audioNodeVAD.receive(e)})),P(this,"pause",(()=>{this.audioNodeVAD.pause(),this.listening=!1})),P(this,"start",(()=>{this.audioNodeVAD.start(),this.listening=!0})),this.options=e,p(e)}}class E{static async new(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{};const i=new E(e,{...A,...t});return await i.init(),i}constructor(i,s){P(this,"pause",(()=>{this.frameProcessor.pause()})),P(this,"start",(()=>{this.frameProcessor.resume()})),P(this,"receive",(e=>{e.connect(this.entryNode)})),P(this,"processFrame",(async t=>{const{probs:i,msg:r,audio:s}=await this.frameProcessor.process(t);switch(void 0!==i&&this.options.onFrameProcessed(i),r){case e.SpeechStart:this.options.onSpeechStart();break;case e.VADMisfire:this.options.onVADMisfire();break;case e.SpeechEnd:this.options.onSpeechEnd(s)}})),P(this,"init",(async()=>{const i=r.p+"vad.worklet.js";await this.ctx.audioWorklet.addModule(i);const s=new AudioWorkletNode(this.ctx,"vad-helper-worklet",{processorOptions:{frameSamples:this.options.frameSamples}});this.entryNode=s;const o=await f.new(t,F);this.frameProcessor=new l(o.process,o.reset_state,{frameSamples:this.options.frameSamples,positiveSpeechThreshold:this.options.positiveSpeechThreshold,negativeSpeechThreshold:this.options.negativeSpeechThreshold,redemptionFrames:this.options.redemptionFrames,preSpeechPadFrames:this.options.preSpeechPadFrames,minSpeechFrames:this.options.minSpeechFrames}),s.port.onmessage=async t=>{if(t.data?.message===e.AudioFrame){const e=t.data.data,i=new Float32Array(e);await this.processFrame(i)}}})),this.ctx=i,this.options=s,p(s)}}class j extends w{configure(){this.ort=t,this.modelFetcher=F}}const D={audioFileToArray:async function(e){const t=new OfflineAudioContext(1,1,44100),i=new FileReader;let r=null;if(await new Promise((s=>{i.addEventListener("loadend",(e=>{const o=i.result;t.decodeAudioData(o,(e=>{r=e,t.startRendering().then((e=>{console.log("Rendering completed successfully"),s()})).catch((e=>{console.error(`Rendering failed: ${e}`)}))}),(e=>{console.log(`Error with decoding audio data: ${e}`)}))})),i.readAsArrayBuffer(e)})),null===r)throw Error("some shit");let s=r,o=new Float32Array(s.length);for(let e=0;e<s.length;e++)for(let t=0;t<s.numberOfChannels;t++)o[e]+=s.getChannelData(t)[e];return{audio:o,sampleRate:s.sampleRate}},...y}})(),s})())); |
@@ -26,3 +26,3 @@ import { SpeechProbabilities, FrameProcessor, FrameProcessorOptions } from "@ricky0123/vad-common"; | ||
} | ||
export declare const defaultRealtimeVADOptions: RealTimeVADOptions; | ||
export declare const defaultRealTimeVADOptions: RealTimeVADOptions; | ||
export declare class MicVAD { | ||
@@ -29,0 +29,0 @@ options: RealTimeVADOptions; |
@@ -15,3 +15,4 @@ { | ||
"homepage": "https://github.com/ricky0123/vad", | ||
"version": "0.0.2", | ||
"version": "0.0.3", | ||
"license": "ISC", | ||
"main": "dist/index.js", | ||
@@ -18,0 +19,0 @@ "devDependencies": { |
Sorry, the diff of this file is not supported yet
No License Found
License(Experimental) License information could not be found.
Found 1 instance in 1 package
1832726
0