@ricky0123/vad-web
Advanced tools
Comparing version
@@ -1,10 +0,10 @@ | ||
import { NonRealTimeVAD as _NonRealTimeVAD, FrameProcessor, FrameProcessorOptions, Message, NonRealTimeVADOptions } from "@ricky0123/vad-common"; | ||
import { PlatformAgnosticNonRealTimeVAD, FrameProcessor, FrameProcessorOptions, Message, NonRealTimeVADOptions } from "./_common"; | ||
import { audioFileToArray } from "./utils"; | ||
declare class NonRealTimeVAD extends _NonRealTimeVAD { | ||
configure(): void; | ||
declare class NonRealTimeVAD extends PlatformAgnosticNonRealTimeVAD { | ||
static new(options?: Partial<NonRealTimeVADOptions>): Promise<NonRealTimeVAD>; | ||
} | ||
export declare const utils: { | ||
minFramesForTargetMS: typeof import("@ricky0123/vad-common/dist/utils").minFramesForTargetMS; | ||
arrayBufferToBase64: typeof import("@ricky0123/vad-common/dist/utils").arrayBufferToBase64; | ||
encodeWAV: typeof import("@ricky0123/vad-common/dist/utils").encodeWAV; | ||
minFramesForTargetMS: typeof import("./_common/utils").minFramesForTargetMS; | ||
arrayBufferToBase64: typeof import("./_common/utils").arrayBufferToBase64; | ||
encodeWAV: typeof import("./_common/utils").encodeWAV; | ||
audioFileToArray: typeof audioFileToArray; | ||
@@ -14,4 +14,4 @@ }; | ||
export type { FrameProcessorOptions, NonRealTimeVADOptions }; | ||
export { MicVAD, AudioNodeVAD, defaultRealTimeVADOptions } from "./real-time-vad"; | ||
export { MicVAD, AudioNodeVAD, defaultRealTimeVADOptions, } from "./real-time-vad"; | ||
export type { RealTimeVADOptions } from "./real-time-vad"; | ||
//# sourceMappingURL=index.d.ts.map |
@@ -1,1 +0,13 @@ | ||
!function(e,t){"object"==typeof exports&&"object"==typeof module?module.exports=t(require("onnxruntime-web")):"function"==typeof define&&define.amd?define(["onnxruntime-web"],t):"object"==typeof exports?exports.vad=t(require("onnxruntime-web")):e.vad=t(e.ort)}(self,(e=>(()=>{"use strict";var t={656:t=>{t.exports=e}},i={};function r(e){var s=i[e];if(void 0!==s)return s.exports;var o=i[e]={exports:{}};return t[e](o,o.exports,r),o.exports}r.d=(e,t)=>{for(var i in t)r.o(t,i)&&!r.o(e,i)&&Object.defineProperty(e,i,{enumerable:!0,get:t[i]})},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),r.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},(()=>{var e;r.g.importScripts&&(e=r.g.location+"");var t=r.g.document;if(!e&&t&&(t.currentScript&&(e=t.currentScript.src),!e)){var i=t.getElementsByTagName("script");i.length&&(e=i[i.length-1].src)}if(!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),r.p=e})();var s={};return(()=>{r.r(s),r.d(s,{AudioNodeVAD:()=>E,FrameProcessor:()=>l,Message:()=>e,MicVAD:()=>T,NonRealTimeVAD:()=>j,defaultRealTimeVADOptions:()=>A,utils:()=>D});var e,t=r(656);function i(e,t,i){for(var r=0;r<i.length;r++)e.setUint8(t+r,i.charCodeAt(r))}!function(e){e.AudioFrame="AUDIO_FRAME",e.SpeechStart="SPEECH_START",e.VADMisfire="VAD_MISFIRE",e.SpeechEnd="SPEECH_END"}(e||(e={}));const o="[VAD]",n=["error","debug","warn"].reduce(((e,t)=>(e[t]=function(e){return function(){for(var t=arguments.length,i=new Array(t),r=0;r<t;r++)i[r]=arguments[r];console[e](o,...i)}}(t),e)),{});function a(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}const h=[512,1024,1536],c={positiveSpeechThreshold:.5,negativeSpeechThreshold:.35,preSpeechPadFrames:1,redemptionFrames:8,frameSamples:1536,minSpeechFrames:3};function p(e){h.includes(e.frameSamples)||n.warn("You are using an unusual frame size"),(e.positiveSpeechThreshold<0||e.negativeSpeechThreshold>1)&&n.error("postiveSpeechThreshold should be a number between 0 and 1"),(e.negativeSpeechThreshold<0||e.negativeSpeechThreshold>e.positiveSpeechThreshold)&&n.error("negativeSpeechThreshold should be between 0 and postiveSpeechThreshold"),e.preSpeechPadFrames<0&&n.error("preSpeechPadFrames should be positive"),e.redemptionFrames<0&&n.error("preSpeechPadFrames should be positive")}const u=e=>{const t=e.reduce(((e,t)=>(e.push(e.at(-1)+t.length),e)),[0]),i=new Float32Array(t.at(-1));return e.forEach(((e,r)=>{const s=t[r];i.set(e,s)})),i};class l{constructor(t,i,r){a(this,"modelProcessFunc",void 0),a(this,"modelResetFunc",void 0),a(this,"options",void 0),a(this,"speaking",!1),a(this,"audioBuffer",void 0),a(this,"redemptionCounter",0),a(this,"active",!1),a(this,"reset",(()=>{this.speaking=!1,this.audioBuffer=[],this.modelResetFunc(),this.redemptionCounter=0})),a(this,"pause",(()=>{this.active=!1,this.reset()})),a(this,"resume",(()=>{this.active=!0})),a(this,"endSegment",(()=>{const t=this.audioBuffer;this.audioBuffer=[];const i=this.speaking;this.reset();const r=t.reduce(((e,t)=>e+ +t.isSpeech),0);if(i){if(r>=this.options.minSpeechFrames){const i=u(t.map((e=>e.frame)));return{msg:e.SpeechEnd,audio:i}}return{msg:e.VADMisfire}}return{}})),a(this,"process",(async t=>{if(!this.active)return{};const i=await this.modelProcessFunc(t);if(this.audioBuffer.push({frame:t,isSpeech:i.isSpeech>=this.options.positiveSpeechThreshold}),i.isSpeech>=this.options.positiveSpeechThreshold&&this.redemptionCounter&&(this.redemptionCounter=0),i.isSpeech>=this.options.positiveSpeechThreshold&&!this.speaking)return this.speaking=!0,{probs:i,msg:e.SpeechStart};if(i.isSpeech<this.options.negativeSpeechThreshold&&this.speaking&&++this.redemptionCounter>=this.options.redemptionFrames){this.redemptionCounter=0,this.speaking=!1;const t=this.audioBuffer;if(this.audioBuffer=[],t.reduce(((e,t)=>e+ +t.isSpeech),0)>=this.options.minSpeechFrames){const r=u(t.map((e=>e.frame)));return{probs:i,msg:e.SpeechEnd,audio:r}}return{probs:i,msg:e.VADMisfire}}if(!this.speaking)for(;this.audioBuffer.length>this.options.preSpeechPadFrames;)this.audioBuffer.shift();return{probs:i}})),this.modelProcessFunc=t,this.modelResetFunc=i,this.options=r,this.audioBuffer=[],this.reset()}}function d(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}class f{constructor(e,t){d(this,"ort",void 0),d(this,"modelFetcher",void 0),d(this,"_session",void 0),d(this,"_h",void 0),d(this,"_c",void 0),d(this,"_sr",void 0),d(this,"init",(async()=>{n.debug("initializing vad");const e=await this.modelFetcher();this._session=await this.ort.InferenceSession.create(e),this._sr=new this.ort.Tensor("int64",[16000n]),this.reset_state(),n.debug("vad is initialized")})),d(this,"reset_state",(()=>{const e=Array(128).fill(0);this._h=new this.ort.Tensor("float32",e,[2,1,64]),this._c=new this.ort.Tensor("float32",e,[2,1,64])})),d(this,"process",(async e=>{const t={input:new this.ort.Tensor("float32",e,[1,e.length]),h:this._h,c:this._c,sr:this._sr},i=await this._session.run(t);this._h=i.hn,this._c=i.cn;const[r]=i.output.data;return{notSpeech:1-r,isSpeech:r}})),this.ort=e,this.modelFetcher=t}}function m(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}d(f,"new",(async(e,t)=>{const i=new f(e,t);return await i.init(),i}));class v{constructor(e){m(this,"options",void 0),m(this,"inputBuffer",void 0),m(this,"process",(e=>{const t=[];for(const t of e)this.inputBuffer.push(t);for(;this.inputBuffer.length*this.options.targetSampleRate/this.options.nativeSampleRate>this.options.targetFrameSize;){const e=new Float32Array(this.options.targetFrameSize);let i=0,r=0;for(;i<this.options.targetFrameSize;){let t=0,s=0;for(;r<Math.min(this.inputBuffer.length,(i+1)*this.options.nativeSampleRate/this.options.targetSampleRate);)t+=this.inputBuffer[r],s++,r++;e[i]=t/s,i++}this.inputBuffer=this.inputBuffer.slice(r),t.push(e)}return t})),this.options=e,e.nativeSampleRate<16e3&&n.error("nativeSampleRate is too low. Should have 16000 = targetSampleRate <= nativeSampleRate"),this.inputBuffer=[]}}function S(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}const g={...c};class w{configure(){throw new Error}static async new(){const e=new this({...g,...arguments.length>0&&void 0!==arguments[0]?arguments[0]:{}});return await e.init(),e}constructor(t){S(this,"options",void 0),S(this,"ort",void 0),S(this,"modelFetcher",void 0),S(this,"frameProcessor",void 0),S(this,"init",(async()=>{const e=await f.new(this.ort,this.modelFetcher);this.frameProcessor=new l(e.process,e.reset_state,{frameSamples:this.options.frameSamples,positiveSpeechThreshold:this.options.positiveSpeechThreshold,negativeSpeechThreshold:this.options.negativeSpeechThreshold,redemptionFrames:this.options.redemptionFrames,preSpeechPadFrames:this.options.preSpeechPadFrames,minSpeechFrames:this.options.minSpeechFrames}),this.frameProcessor.resume()})),S(this,"run",(async function*(t,i){const r={nativeSampleRate:i,targetSampleRate:16e3,targetFrameSize:this.options.frameSamples},s=new v(r).process(t);let o,n;for(const t of[...Array(s.length)].keys()){const i=s[t],{msg:r,audio:a}=await this.frameProcessor.process(i);switch(r){case e.SpeechStart:o=t*this.options.frameSamples/16;break;case e.SpeechEnd:n=(t+1)*this.options.frameSamples/16,yield{audio:a,start:o,end:n}}}const{msg:a,audio:h}=this.frameProcessor.endSegment();a==e.SpeechEnd&&(yield{audio:h,start:o,end:s.length*this.options.frameSamples/16})})),this.options=t,p(t),this.configure()}}const y={minFramesForTargetMS:function(e,t){let i=arguments.length>2&&void 0!==arguments[2]?arguments[2]:16e3;return Math.ceil(e*i/1e3/t)},arrayBufferToBase64:function(e){for(var t="",i=new Uint8Array(e),r=i.byteLength,s=0;s<r;s++)t+=String.fromCharCode(i[s]);return btoa(t)},encodeWAV:function(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:3,r=arguments.length>2&&void 0!==arguments[2]?arguments[2]:16e3,s=arguments.length>3&&void 0!==arguments[3]?arguments[3]:1,o=arguments.length>4&&void 0!==arguments[4]?arguments[4]:32;var n=o/8,a=s*n,h=new ArrayBuffer(44+e.length*n),c=new DataView(h);return i(c,0,"RIFF"),c.setUint32(4,36+e.length*n,!0),i(c,8,"WAVE"),i(c,12,"fmt "),c.setUint32(16,16,!0),c.setUint16(20,t,!0),c.setUint16(22,s,!0),c.setUint32(24,r,!0),c.setUint32(28,r*a,!0),c.setUint16(32,a,!0),c.setUint16(34,o,!0),i(c,36,"data"),c.setUint32(40,e.length*n,!0),1===t?function(e,t,i){for(var r=0;r<i.length;r++,t+=2){var s=Math.max(-1,Math.min(1,i[r]));e.setInt16(t,s<0?32768*s:32767*s,!0)}}(c,44,e):function(e,t,i){for(var r=0;r<i.length;r++,t+=4)e.setFloat32(t,i[r],!0)}(c,44,e),h}},b=r.p+"568bf886c02ac597add4.onnx",F=async()=>await fetch(b).then((e=>e.arrayBuffer()));function P(e,t,i){return(t=function(e){var t=function(e,t){if("object"!=typeof e||null===e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var r=i.call(e,"string");if("object"!=typeof r)return r;throw new TypeError("@@toPrimitive must return a primitive value.")}return String(e)}(e);return"symbol"==typeof t?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:i,enumerable:!0,configurable:!0,writable:!0}):e[t]=i,e}const A={...c,onFrameProcessed:e=>{},onVADMisfire:()=>{n.debug("VAD misfire")},onSpeechStart:()=>{n.debug("Detected speech start")},onSpeechEnd:()=>{n.debug("Detected speech end")}};class T{static async new(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};const t=new T({...A,...e});return await t.init(),t}constructor(e){P(this,"listening",!1),P(this,"init",(async()=>{this.stream=await navigator.mediaDevices.getUserMedia({audio:{...this.options.additionalAudioConstraints,channelCount:1,echoCancellation:!0,autoGainControl:!0,noiseSuppression:!0}}),this.audioContext=new AudioContext;const e=new MediaStreamAudioSourceNode(this.audioContext,{mediaStream:this.stream});this.audioNodeVAD=await E.new(this.audioContext,this.options),this.audioNodeVAD.receive(e)})),P(this,"pause",(()=>{this.audioNodeVAD.pause(),this.listening=!1})),P(this,"start",(()=>{this.audioNodeVAD.start(),this.listening=!0})),this.options=e,p(e)}}class E{static async new(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{};const i=new E(e,{...A,...t});return await i.init(),i}constructor(i,s){P(this,"pause",(()=>{this.frameProcessor.pause()})),P(this,"start",(()=>{this.frameProcessor.resume()})),P(this,"receive",(e=>{e.connect(this.entryNode)})),P(this,"processFrame",(async t=>{const{probs:i,msg:r,audio:s}=await this.frameProcessor.process(t);switch(void 0!==i&&this.options.onFrameProcessed(i),r){case e.SpeechStart:this.options.onSpeechStart();break;case e.VADMisfire:this.options.onVADMisfire();break;case e.SpeechEnd:this.options.onSpeechEnd(s)}})),P(this,"init",(async()=>{const i=r.p+"vad.worklet.js";await this.ctx.audioWorklet.addModule(i);const s=new AudioWorkletNode(this.ctx,"vad-helper-worklet",{processorOptions:{frameSamples:this.options.frameSamples}});this.entryNode=s;const o=await f.new(t,F);this.frameProcessor=new l(o.process,o.reset_state,{frameSamples:this.options.frameSamples,positiveSpeechThreshold:this.options.positiveSpeechThreshold,negativeSpeechThreshold:this.options.negativeSpeechThreshold,redemptionFrames:this.options.redemptionFrames,preSpeechPadFrames:this.options.preSpeechPadFrames,minSpeechFrames:this.options.minSpeechFrames}),s.port.onmessage=async t=>{if(t.data?.message===e.AudioFrame){const e=t.data.data,i=new Float32Array(e);await this.processFrame(i)}}})),this.ctx=i,this.options=s,p(s)}}class j extends w{configure(){this.ort=t,this.modelFetcher=F}}const D={audioFileToArray:async function(e){const t=new OfflineAudioContext(1,1,44100),i=new FileReader;let r=null;if(await new Promise((s=>{i.addEventListener("loadend",(e=>{const o=i.result;t.decodeAudioData(o,(e=>{r=e,t.startRendering().then((e=>{console.log("Rendering completed successfully"),s()})).catch((e=>{console.error(`Rendering failed: ${e}`)}))}),(e=>{console.log(`Error with decoding audio data: ${e}`)}))})),i.readAsArrayBuffer(e)})),null===r)throw Error("some shit");let s=r,o=new Float32Array(s.length);for(let e=0;e<s.length;e++)for(let t=0;t<s.numberOfChannels;t++)o[e]+=s.getChannelData(t)[e];return{audio:o,sampleRate:s.sampleRate}},...y}})(),s})())); | ||
import * as ort from "onnxruntime-web"; | ||
import { utils as _utils, PlatformAgnosticNonRealTimeVAD, FrameProcessor, Message, } from "./_common"; | ||
import { modelFetcher } from "./model-fetcher"; | ||
import { audioFileToArray } from "./utils"; | ||
class NonRealTimeVAD extends PlatformAgnosticNonRealTimeVAD { | ||
static async new(options = {}) { | ||
return await this._new(modelFetcher, ort, options); | ||
} | ||
} | ||
export const utils = { audioFileToArray, ..._utils }; | ||
export { FrameProcessor, Message, NonRealTimeVAD }; | ||
export { MicVAD, AudioNodeVAD, defaultRealTimeVADOptions, } from "./real-time-vad"; | ||
//# sourceMappingURL=index.js.map |
@@ -1,2 +0,2 @@ | ||
import { SpeechProbabilities, FrameProcessor, FrameProcessorOptions } from "@ricky0123/vad-common"; | ||
import { SpeechProbabilities, FrameProcessor, FrameProcessorOptions } from "./_common"; | ||
interface RealTimeVADCallbacks { | ||
@@ -25,2 +25,3 @@ /** Callback to run after each frame. The size (number of samples) of a frame is given by `frameSamples`. */ | ||
additionalAudioConstraints?: AudioConstraints; | ||
workletURL: string; | ||
} | ||
@@ -27,0 +28,0 @@ export declare const defaultRealTimeVADOptions: RealTimeVADOptions; |
@@ -15,14 +15,9 @@ { | ||
"homepage": "https://github.com/ricky0123/vad", | ||
"version": "0.0.3", | ||
"version": "0.0.4", | ||
"license": "ISC", | ||
"main": "dist/index.js", | ||
"devDependencies": { | ||
"@ricky0123/vad-common": "*", | ||
"@babel/core": "^7.20.12", | ||
"@babel/preset-env": "^7.20.2", | ||
"@babel/preset-typescript": "^7.18.6", | ||
"@playwright/test": "^1.30.0", | ||
"@types/audioworklet": "^0.0.36", | ||
"@types/express": "^4.17.17", | ||
"babel-loader": "^9.1.2", | ||
"express": "^4.18.2", | ||
@@ -36,6 +31,7 @@ "webpack": "^5.75.0", | ||
"scripts": { | ||
"build": "webpack && tsc -p tsconfig.types.json", | ||
"test-server": "tsc -p tsconfig.scripts.json && node ./scripts/test-server.js", | ||
"test": "playwright test" | ||
"build": "./scripts/build.sh", | ||
"test-server": "tsc -p ./scripts/tsconfig.json && node ./scripts/test-server.js", | ||
"test": "playwright test", | ||
"clean": "rm -rf dist" | ||
} | ||
} |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Uses eval
Supply chain riskPackage uses dynamic code execution (e.g., eval()), which is a dangerous practice. This can prevent the code from running in certain environments and increases the risk that the code may contain exploits or malicious behavior.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
Minified code
QualityThis package contains minified code. This may be harmless in some cases where minified code is included in packaged libraries, however packages on npm should not minify code.
Found 1 instance in 1 package
Uses eval
Supply chain riskPackage uses dynamic code execution (e.g., eval()), which is a dangerous practice. This can prevent the code from running in certain environments and increases the risk that the code may contain exploits or malicious behavior.
Found 1 instance in 1 package
Minified code
QualityThis package contains minified code. This may be harmless in some cases where minified code is included in packaged libraries, however packages on npm should not minify code.
Found 1 instance in 1 package
1954676
6.65%6
-45.45%59
293.33%1345
1742.47%10
900%15
650%