@lenml/tokenizers
Advanced tools
| export * from "./TokenizerLoader"; | ||
| export * as tokenizers from "./tokenizers/tokenizers"; | ||
| export * from "./types"; |
| import { | ||
| AutoTokenizer as _AutoTokenizer, | ||
| PreTrainedTokenizer, | ||
| } from "./tokenizers/tokenizers"; | ||
| import { NSTokenizerConfig, NSTokenizerJSON } from "./types"; | ||
| interface ITokenizerModelJsonData { | ||
| tokenizerJSON: Partial<NSTokenizerJSON.Root>; | ||
| tokenizerConfig: Partial<NSTokenizerConfig.Root>; | ||
| } | ||
| interface ITokenizerModelUrls { | ||
| tokenizerJSON: string; | ||
| tokenizerConfig: string; | ||
| } | ||
| export class TokenizerLoader { | ||
| /** | ||
| * Creates a pre-trained tokenizer from the provided model data. | ||
| * | ||
| * @param {ITokenizerModelJsonData} model - The model data containing the tokenizer JSON and configuration. | ||
| * @return {PreTrainedTokenizer} pre-trained tokenizer. | ||
| * @throws {Error} If the tokenizer JSON or configuration is missing. | ||
| */ | ||
| static fromPreTrained(model: ITokenizerModelJsonData): PreTrainedTokenizer { | ||
| const { tokenizerJSON, tokenizerConfig } = model; | ||
| if (!tokenizerJSON) { | ||
| throw new Error("tokenizerJSON is required."); | ||
| } | ||
| if (!tokenizerConfig) { | ||
| throw new Error("tokenizerConfig is required."); | ||
| } | ||
| // Some tokenizers are saved with the "Fast" suffix, so we remove that if present. | ||
| const tokenizerName = | ||
| tokenizerConfig.tokenizer_class?.replace(/Fast$/, "") ?? | ||
| "PreTrainedTokenizer"; | ||
| let cls = (_AutoTokenizer as any).TOKENIZER_CLASS_MAPPING[tokenizerName]; | ||
| if (!cls) { | ||
| console.warn( | ||
| `Unknown tokenizer class "${tokenizerName}", attempting to construct from base class.` | ||
| ); | ||
| cls = PreTrainedTokenizer; | ||
| } | ||
| return new cls(tokenizerJSON, tokenizerConfig); | ||
| } | ||
| /** | ||
| * Creates a pre-trained tokenizer from the provided model URLs. | ||
| * | ||
| * @param {ITokenizerModelUrls} model - The model URLs containing the tokenizer JSON and configuration. | ||
| * @param {Object} [options] - Optional parameters. | ||
| * @param {any} [options.fetch] - The fetch function to use for making HTTP requests. Defaults to global.fetch. | ||
| * @param {Partial<ITokenizerModelJsonData>} [options.tokenizerJSON] - Additional tokenizer JSON data to merge with the fetched data. | ||
| * @param {Partial<ITokenizerModelJsonData>} [options.tokenizerConfig] - Additional tokenizer configuration data to merge with the fetched data. | ||
| * @return {Promise<PreTrainedTokenizer>} A promise that resolves to the pre-trained tokenizer. | ||
| */ | ||
| static async fromPreTrainedUrls( | ||
| model: ITokenizerModelUrls, | ||
| options?: { | ||
| fetch?: any; | ||
| } & Partial<ITokenizerModelJsonData> | ||
| ) { | ||
| const fetch = | ||
| (options?.fetch as typeof global.fetch) ?? | ||
| globalThis.fetch.bind(globalThis); | ||
| const [tokenizerJSON, tokenizerConfig] = await Promise.all([ | ||
| fetch(model.tokenizerJSON).then((res) => res.json()), | ||
| fetch(model.tokenizerConfig).then((res) => res.json()), | ||
| ]); | ||
| return TokenizerLoader.fromPreTrained({ | ||
| tokenizerJSON: { | ||
| ...tokenizerJSON, | ||
| ...options?.tokenizerJSON, | ||
| }, | ||
| tokenizerConfig: { | ||
| ...tokenizerConfig, | ||
| ...options?.tokenizerConfig, | ||
| }, | ||
| }); | ||
| } | ||
| } |
| # tokenizers.js | ||
| this code fork from `https://github.com/huggingface/transformers` | ||
| # What changes? | ||
| - remove onnx dependencies | ||
| - remove env dependencies (esm/hf_repo_downloader) | ||
| # License | ||
| Apache-2.0 |
Sorry, the diff of this file is too big to display
| /** | ||
| * @file Core utility functions/classes for Transformers.js. | ||
| * | ||
| * These are only used internally, meaning an end-user shouldn't | ||
| * need to access anything here. | ||
| * | ||
| * @module utils/core | ||
| */ | ||
| /** | ||
| * Helper function to dispatch progress callbacks. | ||
| * | ||
| * @param {Function} progress_callback The progress callback function to dispatch. | ||
| * @param {any} data The data to pass to the progress callback function. | ||
| * @returns {void} | ||
| * @private | ||
| */ | ||
| export function dispatchCallback(progress_callback, data) { | ||
| if (progress_callback) progress_callback(data); | ||
| } | ||
| /** | ||
| * Reverses the keys and values of an object. | ||
| * | ||
| * @param {Object} data The object to reverse. | ||
| * @returns {Object} The reversed object. | ||
| * @see https://ultimatecourses.com/blog/reverse-object-keys-and-values-in-javascript | ||
| */ | ||
| export function reverseDictionary(data) { | ||
| // https://ultimatecourses.com/blog/reverse-object-keys-and-values-in-javascript | ||
| return Object.fromEntries( | ||
| Object.entries(data).map(([key, value]) => [value, key]) | ||
| ); | ||
| } | ||
| /** | ||
| * Escapes regular expression special characters from a string by replacing them with their escaped counterparts. | ||
| * | ||
| * @param {string} string The string to escape. | ||
| * @returns {string} The escaped string. | ||
| */ | ||
| export function escapeRegExp(string) { | ||
| return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string | ||
| } | ||
| /** | ||
| * A base class for creating callable objects. | ||
| * | ||
| * @type {new () => {(...args: any[]): any, _call(...args: any[]): any}} | ||
| */ | ||
| export const Callable = /** @type {any} */ ( | ||
| class { | ||
| /** | ||
| * Creates a new instance of the Callable class. | ||
| */ | ||
| constructor() { | ||
| /** | ||
| * Creates a closure that delegates to a private method '_call' with the given arguments. | ||
| * @type {any} | ||
| * @param {...any} args Zero or more arguments to pass to the '_call' method. | ||
| * @returns {*} The result of calling the '_call' method. | ||
| */ | ||
| let closure = function (...args) { | ||
| return closure._call(...args); | ||
| }; | ||
| return Object.setPrototypeOf(closure, new.target.prototype); | ||
| } | ||
| /** | ||
| * This method should be implemented in subclasses to provide the | ||
| * functionality of the callable object. | ||
| * | ||
| * @param {any[]} args | ||
| * @throws {Error} If the subclass does not implement the `_call` method. | ||
| */ | ||
| _call(...args) { | ||
| throw Error("Must implement _call method in subclass"); | ||
| } | ||
| } | ||
| ); | ||
| /** | ||
| * Check if a value is a typed array. | ||
| * @param {*} val The value to check. | ||
| * @returns {boolean} True if the value is a `TypedArray`, false otherwise. | ||
| * | ||
| * Adapted from https://stackoverflow.com/a/71091338/13989043 | ||
| */ | ||
| export function isTypedArray(val) { | ||
| return val?.prototype?.__proto__?.constructor?.name === "TypedArray"; | ||
| } | ||
| /** | ||
| * Check if a value is an integer. | ||
| * @param {*} x The value to check. | ||
| * @returns {boolean} True if the value is a string, false otherwise. | ||
| */ | ||
| export function isIntegralNumber(x) { | ||
| return Number.isInteger(x) || typeof x === "bigint"; | ||
| } | ||
| /** | ||
| * Check if a value is exists. | ||
| * @param {*} x The value to check. | ||
| * @returns {boolean} True if the value exists, false otherwise. | ||
| */ | ||
| export function exists(x) { | ||
| return x !== undefined && x !== null; | ||
| } | ||
| /** | ||
| * Calculates the dimensions of a nested array. | ||
| * | ||
| * @param {any[]} arr The nested array to calculate dimensions for. | ||
| * @returns {number[]} An array containing the dimensions of the input array. | ||
| */ | ||
| export function calculateDimensions(arr) { | ||
| const dimensions = []; | ||
| let current = arr; | ||
| while (Array.isArray(current)) { | ||
| dimensions.push(current.length); | ||
| current = current[0]; | ||
| } | ||
| return dimensions; | ||
| } | ||
| /** | ||
| * Replicate python's .pop() method for objects. | ||
| * @param {Object} obj The object to pop from. | ||
| * @param {string} key The key to pop. | ||
| * @param {*} defaultValue The default value to return if the key does not exist. | ||
| * @returns {*} The value of the popped key. | ||
| * @throws {Error} If the key does not exist and no default value is provided. | ||
| */ | ||
| export function pop(obj, key, defaultValue = undefined) { | ||
| const value = obj[key]; | ||
| if (value !== undefined) { | ||
| delete obj[key]; | ||
| return value; | ||
| } | ||
| if (defaultValue === undefined) { | ||
| throw Error(`Key ${key} does not exist in object.`); | ||
| } | ||
| return defaultValue; | ||
| } | ||
| /** | ||
| * Efficiently merge arrays, creating a new copy. | ||
| * Adapted from https://stackoverflow.com/a/6768642/13989043 | ||
| * @param {Array[]} arrs Arrays to merge. | ||
| * @returns {Array} The merged array. | ||
| */ | ||
| export function mergeArrays(...arrs) { | ||
| return Array.prototype.concat.apply([], arrs); | ||
| } | ||
| /** | ||
| * Compute the Cartesian product of given arrays | ||
| * @param {...Array} a Arrays to compute the product | ||
| * @returns {Array} Returns the computed Cartesian product as an array | ||
| * @private | ||
| */ | ||
| export function product(...a) { | ||
| // Cartesian product of items | ||
| // Adapted from https://stackoverflow.com/a/43053803 | ||
| return a.reduce((a, b) => a.flatMap((d) => b.map((e) => [d, e]))); | ||
| } | ||
| /** | ||
| * Calculates the index offset for a given index and window size. | ||
| * @param {number} i The index. | ||
| * @param {number} w The window size. | ||
| * @returns {number} The index offset. | ||
| */ | ||
| export function calculateReflectOffset(i, w) { | ||
| return Math.abs(((i + w) % (2 * w)) - w); | ||
| } |
| /** | ||
| * @file Custom data structures. | ||
| * | ||
| * These are only used internally, meaning an end-user shouldn't | ||
| * need to access anything here. | ||
| * | ||
| * @module utils/data-structures | ||
| */ | ||
| /** | ||
| * Efficient Heap-based Implementation of a Priority Queue. | ||
| * It uses an array-based binary heap, where the root is at index `0`, and the | ||
| * children of node `i` are located at indices `2i + 1` and `2i + 2`, respectively. | ||
| * | ||
| * Adapted from the following sources: | ||
| * - https://stackoverflow.com/a/42919752/13989043 (original) | ||
| * - https://github.com/belladoreai/llama-tokenizer-js (minor improvements) | ||
| */ | ||
| export class PriorityQueue { | ||
| /** | ||
| * Create a new PriorityQueue. | ||
| * @param {Function} comparator Comparator function to determine priority. Defaults to a MaxHeap. | ||
| */ | ||
| constructor(comparator = (a, b) => a > b) { | ||
| this._heap = []; | ||
| this._comparator = comparator; | ||
| } | ||
| /** | ||
| * The size of the queue | ||
| */ | ||
| get size() { | ||
| return this._heap.length; | ||
| } | ||
| /** | ||
| * Check if the queue is empty. | ||
| * @returns {boolean} `true` if the queue is empty, `false` otherwise. | ||
| */ | ||
| isEmpty() { | ||
| return this.size === 0; | ||
| } | ||
| /** | ||
| * Return the element with the highest priority in the queue. | ||
| * @returns {any} The highest priority element in the queue. | ||
| */ | ||
| peek() { | ||
| return this._heap[0]; | ||
| } | ||
| /** | ||
| * Add one or more elements to the queue. | ||
| * @param {...any} values The values to push into the queue. | ||
| * @returns {number} The new size of the queue. | ||
| */ | ||
| push(...values) { | ||
| return this.extend(values); | ||
| } | ||
| /** | ||
| * Add multiple elements to the queue. | ||
| * @param {any[]} values The values to push into the queue. | ||
| * @returns {number} The new size of the queue. | ||
| */ | ||
| extend(values) { | ||
| for (const value of values) { | ||
| this._heap.push(value); | ||
| this._siftUp(); | ||
| } | ||
| return this.size; | ||
| } | ||
| /** | ||
| * Remove and return the element with the highest priority in the queue. | ||
| * @returns {any} The element with the highest priority in the queue. | ||
| */ | ||
| pop() { | ||
| const poppedValue = this.peek(); | ||
| const bottom = this.size - 1; | ||
| if (bottom > 0) { | ||
| this._swap(0, bottom); | ||
| } | ||
| this._heap.pop(); | ||
| this._siftDown(); | ||
| return poppedValue; | ||
| } | ||
| /** | ||
| * Replace the element with the highest priority in the queue with a new value. | ||
| * @param {*} value The new value. | ||
| * @returns {*} The replaced value. | ||
| */ | ||
| replace(value) { | ||
| const replacedValue = this.peek(); | ||
| this._heap[0] = value; | ||
| this._siftDown(); | ||
| return replacedValue; | ||
| } | ||
| /** | ||
| * Compute the index for the parent of the node at index `i`. | ||
| * @param {number} i The index of the node to get the parent of. | ||
| * @returns {number} The index of the parent node. | ||
| * @private | ||
| */ | ||
| _parent(i) { | ||
| return ((i + 1) >>> 1) - 1; | ||
| } | ||
| /** | ||
| * Compute the index for the left child of the node at index `i`. | ||
| * @param {number} i The index of the node to get the left child of. | ||
| * @returns {number} The index of the left child. | ||
| * @private | ||
| */ | ||
| _left(i) { | ||
| return (i << 1) + 1; | ||
| } | ||
| /** | ||
| * Compute the index for the right child of the node at index `i`. | ||
| * @param {number} i The index of the node to get the right child of. | ||
| * @returns {number} The index of the right child. | ||
| * @private | ||
| */ | ||
| _right(i) { | ||
| return (i + 1) << 1; | ||
| } | ||
| /** | ||
| * Check if the element at index `i` is greater than the element at index `j`. | ||
| * @param {number} i The index of the first element to compare. | ||
| * @param {number} j The index of the second element to compare. | ||
| * @returns {boolean} `true` if the element at index `i` is greater than the element at index `j`, `false` otherwise. | ||
| * @private | ||
| */ | ||
| _greater(i, j) { | ||
| return this._comparator(this._heap[i], this._heap[j]); | ||
| } | ||
| /** | ||
| * Swap the elements at indices `i` and `j`. | ||
| * @param {number} i The index of the first element to swap. | ||
| * @param {number} j The index of the second element to swap. | ||
| * @private | ||
| */ | ||
| _swap(i, j) { | ||
| const temp = this._heap[i]; | ||
| this._heap[i] = this._heap[j]; | ||
| this._heap[j] = temp; | ||
| } | ||
| /** | ||
| * Maintain the heap property by updating positions in the heap, | ||
| * starting at the last element and moving up the heap. | ||
| * @private | ||
| */ | ||
| _siftUp() { | ||
| let node = this.size - 1; | ||
| while (node > 0 && this._greater(node, this._parent(node))) { | ||
| this._swap(node, this._parent(node)); | ||
| node = this._parent(node); | ||
| } | ||
| } | ||
| /** | ||
| * Maintain the heap property by updating positions in the heap, | ||
| * starting at the first element and moving down the heap. | ||
| * @private | ||
| */ | ||
| _siftDown() { | ||
| let node = 0; | ||
| while ( | ||
| (this._left(node) < this.size && this._greater(this._left(node), node)) || | ||
| (this._right(node) < this.size && this._greater(this._right(node), node)) | ||
| ) { | ||
| const maxChild = | ||
| this._right(node) < this.size && | ||
| this._greater(this._right(node), this._left(node)) | ||
| ? this._right(node) | ||
| : this._left(node); | ||
| this._swap(node, maxChild); | ||
| node = maxChild; | ||
| } | ||
| } | ||
| } | ||
| /** | ||
| * A trie structure to efficiently store and search for strings. | ||
| */ | ||
| export class CharTrie { | ||
| constructor() { | ||
| this.root = CharTrieNode.default(); | ||
| } | ||
| /** | ||
| * Adds one or more `texts` to the trie. | ||
| * @param {string[]} texts The strings to add to the trie. | ||
| */ | ||
| extend(texts) { | ||
| for (let text of texts) { | ||
| this.push(text); | ||
| } | ||
| } | ||
| /** | ||
| * Adds text to the trie. | ||
| * @param {string} text The string to add to the trie. | ||
| */ | ||
| push(text) { | ||
| let node = this.root; | ||
| for (let ch of text) { | ||
| let child = node.children.get(ch); | ||
| if (child === undefined) { | ||
| child = CharTrieNode.default(); | ||
| node.children.set(ch, child); | ||
| } | ||
| node = child; | ||
| } | ||
| node.isLeaf = true; | ||
| } | ||
| /** | ||
| * Searches the trie for all strings with a common prefix of `text`. | ||
| * @param {string} text The common prefix to search for. | ||
| * @yields {string} Each string in the trie that has `text` as a prefix. | ||
| */ | ||
| *commonPrefixSearch(text) { | ||
| let node = this.root; | ||
| let prefix = ""; | ||
| for (let i = 0; i < text.length && node !== undefined; ++i) { | ||
| const ch = text[i]; | ||
| prefix += ch; | ||
| node = node.children.get(ch); | ||
| if (node !== undefined && node.isLeaf) { | ||
| yield prefix; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| /** | ||
| * Represents a node in a character trie. | ||
| */ | ||
| class CharTrieNode { | ||
| /** | ||
| * Create a new CharTrieNode. | ||
| * @param {boolean} isLeaf Whether the node is a leaf node or not. | ||
| * @param {Map<string, CharTrieNode>} children A map containing the node's children, where the key is a character and the value is a `CharTrieNode`. | ||
| */ | ||
| constructor(isLeaf, children) { | ||
| this.isLeaf = isLeaf; | ||
| this.children = children; | ||
| } | ||
| /** | ||
| * Returns a new `CharTrieNode` instance with default values. | ||
| * @returns {CharTrieNode} A new `CharTrieNode` instance with `isLeaf` set to `false` and an empty `children` map. | ||
| */ | ||
| static default() { | ||
| return new CharTrieNode(false, new Map()); | ||
| } | ||
| } | ||
| /** | ||
| * A lattice data structure to be used for tokenization. | ||
| */ | ||
| export class TokenLattice { | ||
| /** | ||
| * Creates a new TokenLattice instance. | ||
| * | ||
| * @param {string} sentence The input sentence to be tokenized. | ||
| * @param {number} bosTokenId The beginning-of-sequence token ID. | ||
| * @param {number} eosTokenId The end-of-sequence token ID. | ||
| */ | ||
| constructor(sentence, bosTokenId, eosTokenId) { | ||
| this.sentence = sentence; | ||
| this.len = sentence.length; | ||
| this.bosTokenId = bosTokenId; | ||
| this.eosTokenId = eosTokenId; | ||
| this.nodes = []; | ||
| this.beginNodes = Array.from({ length: this.len + 1 }, () => []); | ||
| this.endNodes = Array.from({ length: this.len + 1 }, () => []); | ||
| const bos = new TokenLatticeNode(this.bosTokenId, 0, 0, 0, 0.0); | ||
| const eos = new TokenLatticeNode(this.eosTokenId, 1, this.len, 0, 0.0); | ||
| this.nodes.push(bos.clone()); | ||
| this.nodes.push(eos.clone()); | ||
| this.beginNodes[this.len].push(eos); | ||
| this.endNodes[0].push(bos); | ||
| } | ||
| /** | ||
| * Inserts a new token node into the token lattice. | ||
| * | ||
| * @param {number} pos The starting position of the token. | ||
| * @param {number} length The length of the token. | ||
| * @param {number} score The score of the token. | ||
| * @param {number} tokenId The token ID of the token. | ||
| */ | ||
| insert(pos, length, score, tokenId) { | ||
| const nodeId = this.nodes.length; | ||
| const node = new TokenLatticeNode(tokenId, nodeId, pos, length, score); | ||
| this.beginNodes[pos].push(node); | ||
| this.endNodes[pos + length].push(node); | ||
| this.nodes.push(node); | ||
| } | ||
| /** | ||
| * Implements the Viterbi algorithm to compute the most likely sequence of tokens. | ||
| * | ||
| * @returns {TokenLatticeNode[]} The array of nodes representing the most likely sequence of tokens. | ||
| */ | ||
| viterbi() { | ||
| const len = this.len; | ||
| let pos = 0; | ||
| while (pos <= len) { | ||
| if (this.beginNodes[pos].length == 0) { | ||
| return []; | ||
| } | ||
| for (let rnode of this.beginNodes[pos]) { | ||
| rnode.prev = null; | ||
| let bestScore = 0.0; | ||
| let bestNode = null; | ||
| for (let lnode of this.endNodes[pos]) { | ||
| const score = lnode.backtraceScore + rnode.score; | ||
| if (bestNode === null || score > bestScore) { | ||
| bestNode = lnode.clone(); | ||
| bestScore = score; | ||
| } | ||
| } | ||
| if (bestNode !== null) { | ||
| rnode.prev = bestNode; | ||
| rnode.backtraceScore = bestScore; | ||
| } else { | ||
| return []; | ||
| } | ||
| } | ||
| ++pos; | ||
| } | ||
| const results = []; | ||
| const root = this.beginNodes[len][0]; | ||
| const prev = root.prev; | ||
| if (prev === null) { | ||
| return []; | ||
| } | ||
| let node = prev.clone(); | ||
| while (node.prev !== null) { | ||
| results.push(node.clone()); | ||
| const n = node.clone(); | ||
| node = n.prev.clone(); | ||
| } | ||
| results.reverse(); | ||
| return results; | ||
| } | ||
| /** | ||
| * @param {TokenLatticeNode} node | ||
| * @returns {string} The array of nodes representing the most likely sequence of tokens. | ||
| */ | ||
| piece(node) { | ||
| return this.sentence.slice(node.pos, node.pos + node.length); | ||
| } | ||
| /** | ||
| * @returns {Array} The array of nodes representing the most likely sequence of tokens. | ||
| */ | ||
| tokens() { | ||
| const nodes = this.viterbi(); | ||
| return nodes.map((x) => this.piece(x)); | ||
| } | ||
| /** | ||
| * @returns {Array} The array of nodes representing the most likely sequence of tokens. | ||
| */ | ||
| tokenIds() { | ||
| const nodes = this.viterbi(); | ||
| return nodes.map((x) => x.tokenId); | ||
| } | ||
| } | ||
| class TokenLatticeNode { | ||
| /** | ||
| * Represents a node in a token lattice for a given sentence. | ||
| * @param {number} tokenId The ID of the token associated with this node. | ||
| * @param {number} nodeId The ID of this node. | ||
| * @param {number} pos The starting position of the token in the sentence. | ||
| * @param {number} length The length of the token. | ||
| * @param {number} score The score associated with the token. | ||
| */ | ||
| constructor(tokenId, nodeId, pos, length, score) { | ||
| this.tokenId = tokenId; | ||
| this.nodeId = nodeId; | ||
| this.pos = pos; | ||
| this.length = length; | ||
| this.score = score; | ||
| this.prev = null; | ||
| this.backtraceScore = 0.0; | ||
| } | ||
| /** | ||
| * Returns a clone of this node. | ||
| * @returns {TokenLatticeNode} A clone of this node. | ||
| */ | ||
| clone() { | ||
| const n = new TokenLatticeNode( | ||
| this.tokenId, | ||
| this.nodeId, | ||
| this.pos, | ||
| this.length, | ||
| this.score | ||
| ); | ||
| n.prev = this.prev; | ||
| n.backtraceScore = this.backtraceScore; | ||
| return n; | ||
| } | ||
| } |
| /** | ||
| * | ||
| * Retrieves a file from either a remote URL using the Fetch API or from the local file system using the FileSystem API. | ||
| * If the filesystem is available and `env.useCache = true`, the file will be downloaded and cached. | ||
| * | ||
| * @param {string} path_or_repo_id This can be either: | ||
| * - a string, the *model id* of a model repo on huggingface.co. | ||
| * - a path to a *directory* potentially containing the file. | ||
| * @param {string} filename The name of the file to locate in `path_or_repo`. | ||
| * @param {boolean} [fatal=true] Whether to throw an error if the file is not found. | ||
| * @param {PretrainedOptions} [options] An object containing optional parameters. | ||
| * | ||
| * @throws Will throw an error if the file is not found and `fatal` is true. | ||
| * @returns {Promise} A Promise that resolves with the file content as a buffer. | ||
| */ | ||
| export async function getModelFile( | ||
| path_or_repo_id, | ||
| filename, | ||
| fatal = true, | ||
| options = {} | ||
| ) { | ||
| if (path_or_repo_id.startsWith("http")) { | ||
| return fetch(path_or_repo_id + filename).then((response) => { | ||
| if (!response.ok) { | ||
| if (fatal) { | ||
| throw new Error(`File not found at ${path_or_repo_id}${filename}`); | ||
| } else { | ||
| return null; | ||
| } | ||
| } | ||
| return response.arrayBuffer(); | ||
| }); | ||
| } else { | ||
| throw new Error( | ||
| "Filesystem not supported, please implement your own file reading logic." | ||
| ); | ||
| } | ||
| } | ||
| /** | ||
| * Fetches a JSON file from a given path and file name. | ||
| * | ||
| * @param {string} modelPath The path to the directory containing the file. | ||
| * @param {string} fileName The name of the file to fetch. | ||
| * @param {boolean} [fatal=true] Whether to throw an error if the file is not found. | ||
| * @param {PretrainedOptions} [options] An object containing optional parameters. | ||
| * @returns {Promise<Object>} The JSON data parsed into a JavaScript object. | ||
| * @throws Will throw an error if the file is not found and `fatal` is true. | ||
| */ | ||
| export async function getModelJSON( | ||
| modelPath, | ||
| fileName, | ||
| fatal = true, | ||
| options = {} | ||
| ) { | ||
| let buffer = await getModelFile(modelPath, fileName, fatal, options); | ||
| if (buffer === null) { | ||
| // Return empty object | ||
| return {}; | ||
| } | ||
| let decoder = new TextDecoder("utf-8"); | ||
| let jsonData = decoder.decode(buffer); | ||
| return JSON.parse(jsonData); | ||
| } |
| /** | ||
| * @file Helper module for mathematical processing. | ||
| * | ||
| * These functions and classes are only used internally, | ||
| * meaning an end-user shouldn't need to access anything here. | ||
| * | ||
| * @module utils/maths | ||
| */ | ||
| /** | ||
| * @typedef {Int8Array | Uint8Array | Uint8ClampedArray | Int16Array | Uint16Array | Int32Array | Uint32Array | Float32Array | Float64Array} TypedArray | ||
| * @typedef {BigInt64Array | BigUint64Array} BigTypedArray | ||
| * @typedef {TypedArray | BigTypedArray} AnyTypedArray | ||
| */ | ||
| /** | ||
| * @param {TypedArray} input | ||
| */ | ||
| export function interpolate_data( | ||
| input, | ||
| [in_channels, in_height, in_width], | ||
| [out_height, out_width], | ||
| mode = "bilinear", | ||
| align_corners = false | ||
| ) { | ||
| // TODO use mode and align_corners | ||
| // Output image dimensions | ||
| const x_scale = out_width / in_width; | ||
| const y_scale = out_height / in_height; | ||
| // Output image | ||
| // @ts-ignore | ||
| const out_img = new input.constructor(out_height * out_width * in_channels); | ||
| // Pre-calculate strides | ||
| const inStride = in_height * in_width; | ||
| const outStride = out_height * out_width; | ||
| for (let i = 0; i < out_height; ++i) { | ||
| for (let j = 0; j < out_width; ++j) { | ||
| // Calculate output offset | ||
| const outOffset = i * out_width + j; | ||
| // Calculate input pixel coordinates | ||
| const x = (j + 0.5) / x_scale - 0.5; | ||
| const y = (i + 0.5) / y_scale - 0.5; | ||
| // Calculate the four nearest input pixels | ||
| // We also check if the input pixel coordinates are within the image bounds | ||
| let x1 = Math.floor(x); | ||
| let y1 = Math.floor(y); | ||
| const x2 = Math.min(x1 + 1, in_width - 1); | ||
| const y2 = Math.min(y1 + 1, in_height - 1); | ||
| x1 = Math.max(x1, 0); | ||
| y1 = Math.max(y1, 0); | ||
| // Calculate the fractional distances between the input pixel and the four nearest pixels | ||
| const s = x - x1; | ||
| const t = y - y1; | ||
| // Perform bilinear interpolation | ||
| const w1 = (1 - s) * (1 - t); | ||
| const w2 = s * (1 - t); | ||
| const w3 = (1 - s) * t; | ||
| const w4 = s * t; | ||
| // Calculate the four nearest input pixel indices | ||
| const yStride = y1 * in_width; | ||
| const xStride = y2 * in_width; | ||
| const idx1 = yStride + x1; | ||
| const idx2 = yStride + x2; | ||
| const idx3 = xStride + x1; | ||
| const idx4 = xStride + x2; | ||
| for (let k = 0; k < in_channels; ++k) { | ||
| // Calculate channel offset | ||
| const cOffset = k * inStride; | ||
| out_img[k * outStride + outOffset] = | ||
| w1 * input[cOffset + idx1] + | ||
| w2 * input[cOffset + idx2] + | ||
| w3 * input[cOffset + idx3] + | ||
| w4 * input[cOffset + idx4]; | ||
| } | ||
| } | ||
| } | ||
| return out_img; | ||
| } | ||
| /** | ||
| * Helper method to permute a `AnyTypedArray` directly | ||
| * @template {AnyTypedArray} T | ||
| * @param {T} array | ||
| * @param {number[]} dims | ||
| * @param {number[]} axes | ||
| * @returns {[T, number[]]} The permuted array and the new shape. | ||
| */ | ||
| export function permute_data(array, dims, axes) { | ||
| // Calculate the new shape of the permuted array | ||
| // and the stride of the original array | ||
| const shape = new Array(axes.length); | ||
| const stride = new Array(axes.length); | ||
| for (let i = axes.length - 1, s = 1; i >= 0; --i) { | ||
| stride[i] = s; | ||
| shape[i] = dims[axes[i]]; | ||
| s *= shape[i]; | ||
| } | ||
| // Precompute inverse mapping of stride | ||
| const invStride = axes.map((_, i) => stride[axes.indexOf(i)]); | ||
| // Create the permuted array with the new shape | ||
| // @ts-ignore | ||
| const permutedData = new array.constructor(array.length); | ||
| // Permute the original array to the new array | ||
| for (let i = 0; i < array.length; ++i) { | ||
| let newIndex = 0; | ||
| for (let j = dims.length - 1, k = i; j >= 0; --j) { | ||
| newIndex += (k % dims[j]) * invStride[j]; | ||
| k = Math.floor(k / dims[j]); | ||
| } | ||
| permutedData[newIndex] = array[i]; | ||
| } | ||
| return [permutedData, shape]; | ||
| } | ||
| /** | ||
| * Compute the softmax of an array of numbers. | ||
| * @template {TypedArray|number[]} T | ||
| * @param {T} arr The array of numbers to compute the softmax of. | ||
| * @returns {T} The softmax array. | ||
| */ | ||
| export function softmax(arr) { | ||
| // Compute the maximum value in the array | ||
| const maxVal = max(arr)[0]; | ||
| // Compute the exponentials of the array values | ||
| const exps = arr.map((x) => Math.exp(x - maxVal)); | ||
| // Compute the sum of the exponentials | ||
| // @ts-ignore | ||
| const sumExps = exps.reduce((acc, val) => acc + val, 0); | ||
| // Compute the softmax values | ||
| const softmaxArr = exps.map((x) => x / sumExps); | ||
| return /** @type {T} */ (softmaxArr); | ||
| } | ||
| /** | ||
| * Calculates the logarithm of the softmax function for the input array. | ||
| * @template {TypedArray|number[]} T | ||
| * @param {T} arr The input array to calculate the log_softmax function for. | ||
| * @returns {T} The resulting log_softmax array. | ||
| */ | ||
| export function log_softmax(arr) { | ||
| // Compute the softmax values | ||
| const softmaxArr = softmax(arr); | ||
| // Apply log formula to each element | ||
| const logSoftmaxArr = softmaxArr.map((x) => Math.log(x)); | ||
| return /** @type {T} */ (logSoftmaxArr); | ||
| } | ||
| /** | ||
| * Calculates the dot product of two arrays. | ||
| * @param {number[]} arr1 The first array. | ||
| * @param {number[]} arr2 The second array. | ||
| * @returns {number} The dot product of arr1 and arr2. | ||
| */ | ||
| export function dot(arr1, arr2) { | ||
| let result = 0; | ||
| for (let i = 0; i < arr1.length; ++i) { | ||
| result += arr1[i] * arr2[i]; | ||
| } | ||
| return result; | ||
| } | ||
| /** | ||
| * Get the top k items from an iterable, sorted by descending order | ||
| * @param {any[]|TypedArray} items The items to be sorted | ||
| * @param {number|null} [top_k=0] The number of top items to return (default: 0 = return all) | ||
| * @returns {[number, any][]} The top k items, sorted by descending order | ||
| */ | ||
| export function getTopItems(items, top_k = 0) { | ||
| // if top == 0, return all | ||
| items = Array.from(items) | ||
| .map((x, i) => [i, x]) // Get indices ([index, score]) | ||
| .sort((a, b) => b[1] - a[1]); // Sort by log probabilities | ||
| if (top_k !== null && top_k > 0) { | ||
| items = items.slice(0, top_k); // Get top k items | ||
| } | ||
| return items; | ||
| } | ||
| /** | ||
| * Computes the cosine similarity between two arrays. | ||
| * | ||
| * @param {number[]} arr1 The first array. | ||
| * @param {number[]} arr2 The second array. | ||
| * @returns {number} The cosine similarity between the two arrays. | ||
| */ | ||
| export function cos_sim(arr1, arr2) { | ||
| // Calculate dot product of the two arrays | ||
| const dotProduct = dot(arr1, arr2); | ||
| // Calculate the magnitude of the first array | ||
| const magnitudeA = magnitude(arr1); | ||
| // Calculate the magnitude of the second array | ||
| const magnitudeB = magnitude(arr2); | ||
| // Calculate the cosine similarity | ||
| const cosineSimilarity = dotProduct / (magnitudeA * magnitudeB); | ||
| return cosineSimilarity; | ||
| } | ||
| /** | ||
| * Calculates the magnitude of a given array. | ||
| * @param {number[]} arr The array to calculate the magnitude of. | ||
| * @returns {number} The magnitude of the array. | ||
| */ | ||
| export function magnitude(arr) { | ||
| return Math.sqrt(arr.reduce((acc, val) => acc + val * val, 0)); | ||
| } | ||
| /** | ||
| * Returns the value and index of the minimum element in an array. | ||
| * @param {number[]|TypedArray} arr array of numbers. | ||
| * @returns {number[]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin] | ||
| * @throws {Error} If array is empty. | ||
| */ | ||
| export function min(arr) { | ||
| if (arr.length === 0) throw Error("Array must not be empty"); | ||
| let min = arr[0]; | ||
| let indexOfMin = 0; | ||
| for (let i = 1; i < arr.length; ++i) { | ||
| if (arr[i] < min) { | ||
| min = arr[i]; | ||
| indexOfMin = i; | ||
| } | ||
| } | ||
| return [min, indexOfMin]; | ||
| } | ||
| /** | ||
| * Returns the value and index of the maximum element in an array. | ||
| * @param {number[]|AnyTypedArray} arr array of numbers. | ||
| * @returns {[number, number]} the value and index of the maximum element, of the form: [valueOfMax, indexOfMax] | ||
| * @throws {Error} If array is empty. | ||
| */ | ||
| export function max(arr) { | ||
| if (arr.length === 0) throw Error("Array must not be empty"); | ||
| let max = arr[0]; | ||
| let indexOfMax = 0; | ||
| for (let i = 1; i < arr.length; ++i) { | ||
| if (arr[i] > max) { | ||
| max = arr[i]; | ||
| indexOfMax = i; | ||
| } | ||
| } | ||
| return [Number(max), indexOfMax]; | ||
| } | ||
| function isPowerOfTwo(number) { | ||
| // Check if the number is greater than 0 and has only one bit set to 1 | ||
| return number > 0 && (number & (number - 1)) === 0; | ||
| } | ||
| /** | ||
| * Implementation of Radix-4 FFT. | ||
| * | ||
| * P2FFT class provides functionality for performing Fast Fourier Transform on arrays | ||
| * which are a power of two in length. | ||
| * Code adapted from https://www.npmjs.com/package/fft.js | ||
| */ | ||
| class P2FFT { | ||
| /** | ||
| * @param {number} size The size of the input array. Must be a power of two larger than 1. | ||
| * @throws {Error} FFT size must be a power of two larger than 1. | ||
| */ | ||
| constructor(size) { | ||
| this.size = size | 0; // convert to a 32-bit signed integer | ||
| if (this.size <= 1 || !isPowerOfTwo(this.size)) | ||
| throw new Error("FFT size must be a power of two larger than 1"); | ||
| this._csize = size << 1; | ||
| this.table = new Float64Array(this.size * 2); | ||
| for (let i = 0; i < this.table.length; i += 2) { | ||
| const angle = (Math.PI * i) / this.size; | ||
| this.table[i] = Math.cos(angle); | ||
| this.table[i + 1] = -Math.sin(angle); | ||
| } | ||
| // Find size's power of two | ||
| let power = 0; | ||
| for (let t = 1; this.size > t; t <<= 1) ++power; | ||
| // Calculate initial step's width: | ||
| // * If we are full radix-4, it is 2x smaller to give inital len=8 | ||
| // * Otherwise it is the same as `power` to give len=4 | ||
| this._width = power % 2 === 0 ? power - 1 : power; | ||
| // Pre-compute bit-reversal patterns | ||
| this._bitrev = new Int32Array(1 << this._width); | ||
| for (let j = 0; j < this._bitrev.length; ++j) { | ||
| this._bitrev[j] = 0; | ||
| for (let shift = 0; shift < this._width; shift += 2) { | ||
| const revShift = this._width - shift - 2; | ||
| this._bitrev[j] |= ((j >>> shift) & 3) << revShift; | ||
| } | ||
| } | ||
| } | ||
| /** | ||
| * Create a complex number array with size `2 * size` | ||
| * | ||
| * @returns {Float64Array} A complex number array with size `2 * size` | ||
| */ | ||
| createComplexArray() { | ||
| return new Float64Array(this._csize); | ||
| } | ||
| /** | ||
| * Converts a complex number representation stored in a Float64Array to an array of real numbers. | ||
| * | ||
| * @param {Float64Array} complex The complex number representation to be converted. | ||
| * @param {number[]} [storage] An optional array to store the result in. | ||
| * @returns {number[]} An array of real numbers representing the input complex number representation. | ||
| */ | ||
| fromComplexArray(complex, storage) { | ||
| const res = storage || new Array(complex.length >>> 1); | ||
| for (let i = 0; i < complex.length; i += 2) res[i >>> 1] = complex[i]; | ||
| return res; | ||
| } | ||
| /** | ||
| * Convert a real-valued input array to a complex-valued output array. | ||
| * @param {Float64Array} input The real-valued input array. | ||
| * @param {Float64Array} [storage] Optional buffer to store the output array. | ||
| * @returns {Float64Array} The complex-valued output array. | ||
| */ | ||
| toComplexArray(input, storage) { | ||
| const res = storage || this.createComplexArray(); | ||
| for (let i = 0; i < res.length; i += 2) { | ||
| res[i] = input[i >>> 1]; | ||
| res[i + 1] = 0; | ||
| } | ||
| return res; | ||
| } | ||
| /** | ||
| * Completes the spectrum by adding its mirrored negative frequency components. | ||
| * @param {Float64Array} spectrum The input spectrum. | ||
| * @returns {void} | ||
| */ | ||
| completeSpectrum(spectrum) { | ||
| const size = this._csize; | ||
| const half = size >>> 1; | ||
| for (let i = 2; i < half; i += 2) { | ||
| spectrum[size - i] = spectrum[i]; | ||
| spectrum[size - i + 1] = -spectrum[i + 1]; | ||
| } | ||
| } | ||
| /** | ||
| * Performs a Fast Fourier Transform (FFT) on the given input data and stores the result in the output buffer. | ||
| * | ||
| * @param {Float64Array} out The output buffer to store the result. | ||
| * @param {Float64Array} data The input data to transform. | ||
| * | ||
| * @throws {Error} Input and output buffers must be different. | ||
| * | ||
| * @returns {void} | ||
| */ | ||
| transform(out, data) { | ||
| if (out === data) | ||
| throw new Error("Input and output buffers must be different"); | ||
| this._transform4(out, data, 1 /* DONE */); | ||
| } | ||
| /** | ||
| * Performs a real-valued forward FFT on the given input buffer and stores the result in the given output buffer. | ||
| * The input buffer must contain real values only, while the output buffer will contain complex values. The input and | ||
| * output buffers must be different. | ||
| * | ||
| * @param {Float64Array} out The output buffer. | ||
| * @param {Float64Array} data The input buffer containing real values. | ||
| * | ||
| * @throws {Error} If the input and output buffers are the same. | ||
| */ | ||
| realTransform(out, data) { | ||
| if (out === data) | ||
| throw new Error("Input and output buffers must be different"); | ||
| this._realTransform4(out, data, 1 /* DONE */); | ||
| } | ||
| /** | ||
| * Performs an inverse FFT transformation on the given `data` array, and stores the result in `out`. | ||
| * The `out` array must be a different buffer than the `data` array. The `out` array will contain the | ||
| * result of the transformation. The `data` array will not be modified. | ||
| * | ||
| * @param {Float64Array} out The output buffer for the transformed data. | ||
| * @param {Float64Array} data The input data to transform. | ||
| * @throws {Error} If `out` and `data` refer to the same buffer. | ||
| * @returns {void} | ||
| */ | ||
| inverseTransform(out, data) { | ||
| if (out === data) | ||
| throw new Error("Input and output buffers must be different"); | ||
| this._transform4(out, data, -1 /* DONE */); | ||
| for (let i = 0; i < out.length; ++i) out[i] /= this.size; | ||
| } | ||
| /** | ||
| * Performs a radix-4 implementation of a discrete Fourier transform on a given set of data. | ||
| * | ||
| * @param {Float64Array} out The output buffer for the transformed data. | ||
| * @param {Float64Array} data The input buffer of data to be transformed. | ||
| * @param {number} inv A scaling factor to apply to the transform. | ||
| * @returns {void} | ||
| */ | ||
| _transform4(out, data, inv) { | ||
| // radix-4 implementation | ||
| const size = this._csize; | ||
| // Initial step (permute and transform) | ||
| const width = this._width; | ||
| let step = 1 << width; | ||
| let len = (size / step) << 1; | ||
| let outOff; | ||
| let t; | ||
| const bitrev = this._bitrev; | ||
| if (len === 4) { | ||
| for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) { | ||
| const off = bitrev[t]; | ||
| this._singleTransform2(data, out, outOff, off, step); | ||
| } | ||
| } else { | ||
| // len === 8 | ||
| for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) { | ||
| const off = bitrev[t]; | ||
| this._singleTransform4(data, out, outOff, off, step, inv); | ||
| } | ||
| } | ||
| // Loop through steps in decreasing order | ||
| for (step >>= 2; step >= 2; step >>= 2) { | ||
| len = (size / step) << 1; | ||
| const quarterLen = len >>> 2; | ||
| // Loop through offsets in the data | ||
| for (outOff = 0; outOff < size; outOff += len) { | ||
| // Full case | ||
| const limit = outOff + quarterLen - 1; | ||
| for (let i = outOff, k = 0; i < limit; i += 2, k += step) { | ||
| const A = i; | ||
| const B = A + quarterLen; | ||
| const C = B + quarterLen; | ||
| const D = C + quarterLen; | ||
| // Original values | ||
| const Ar = out[A]; | ||
| const Ai = out[A + 1]; | ||
| const Br = out[B]; | ||
| const Bi = out[B + 1]; | ||
| const Cr = out[C]; | ||
| const Ci = out[C + 1]; | ||
| const Dr = out[D]; | ||
| const Di = out[D + 1]; | ||
| const tableBr = this.table[k]; | ||
| const tableBi = inv * this.table[k + 1]; | ||
| const MBr = Br * tableBr - Bi * tableBi; | ||
| const MBi = Br * tableBi + Bi * tableBr; | ||
| const tableCr = this.table[2 * k]; | ||
| const tableCi = inv * this.table[2 * k + 1]; | ||
| const MCr = Cr * tableCr - Ci * tableCi; | ||
| const MCi = Cr * tableCi + Ci * tableCr; | ||
| const tableDr = this.table[3 * k]; | ||
| const tableDi = inv * this.table[3 * k + 1]; | ||
| const MDr = Dr * tableDr - Di * tableDi; | ||
| const MDi = Dr * tableDi + Di * tableDr; | ||
| // Pre-Final values | ||
| const T0r = Ar + MCr; | ||
| const T0i = Ai + MCi; | ||
| const T1r = Ar - MCr; | ||
| const T1i = Ai - MCi; | ||
| const T2r = MBr + MDr; | ||
| const T2i = MBi + MDi; | ||
| const T3r = inv * (MBr - MDr); | ||
| const T3i = inv * (MBi - MDi); | ||
| // Final values | ||
| out[A] = T0r + T2r; | ||
| out[A + 1] = T0i + T2i; | ||
| out[B] = T1r + T3i; | ||
| out[B + 1] = T1i - T3r; | ||
| out[C] = T0r - T2r; | ||
| out[C + 1] = T0i - T2i; | ||
| out[D] = T1r - T3i; | ||
| out[D + 1] = T1i + T3r; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| /** | ||
| * Performs a radix-2 implementation of a discrete Fourier transform on a given set of data. | ||
| * | ||
| * @param {Float64Array} data The input buffer of data to be transformed. | ||
| * @param {Float64Array} out The output buffer for the transformed data. | ||
| * @param {number} outOff The offset at which to write the output data. | ||
| * @param {number} off The offset at which to begin reading the input data. | ||
| * @param {number} step The step size for indexing the input data. | ||
| * @returns {void} | ||
| */ | ||
| _singleTransform2(data, out, outOff, off, step) { | ||
| // radix-2 implementation | ||
| // NOTE: Only called for len=4 | ||
| const evenR = data[off]; | ||
| const evenI = data[off + 1]; | ||
| const oddR = data[off + step]; | ||
| const oddI = data[off + step + 1]; | ||
| out[outOff] = evenR + oddR; | ||
| out[outOff + 1] = evenI + oddI; | ||
| out[outOff + 2] = evenR - oddR; | ||
| out[outOff + 3] = evenI - oddI; | ||
| } | ||
| /** | ||
| * Performs radix-4 transformation on input data of length 8 | ||
| * | ||
| * @param {Float64Array} data Input data array of length 8 | ||
| * @param {Float64Array} out Output data array of length 8 | ||
| * @param {number} outOff Index of output array to start writing from | ||
| * @param {number} off Index of input array to start reading from | ||
| * @param {number} step Step size between elements in input array | ||
| * @param {number} inv Scaling factor for inverse transform | ||
| * | ||
| * @returns {void} | ||
| */ | ||
| _singleTransform4(data, out, outOff, off, step, inv) { | ||
| // radix-4 | ||
| // NOTE: Only called for len=8 | ||
| const step2 = step * 2; | ||
| const step3 = step * 3; | ||
| // Original values | ||
| const Ar = data[off]; | ||
| const Ai = data[off + 1]; | ||
| const Br = data[off + step]; | ||
| const Bi = data[off + step + 1]; | ||
| const Cr = data[off + step2]; | ||
| const Ci = data[off + step2 + 1]; | ||
| const Dr = data[off + step3]; | ||
| const Di = data[off + step3 + 1]; | ||
| // Pre-Final values | ||
| const T0r = Ar + Cr; | ||
| const T0i = Ai + Ci; | ||
| const T1r = Ar - Cr; | ||
| const T1i = Ai - Ci; | ||
| const T2r = Br + Dr; | ||
| const T2i = Bi + Di; | ||
| const T3r = inv * (Br - Dr); | ||
| const T3i = inv * (Bi - Di); | ||
| // Final values | ||
| out[outOff] = T0r + T2r; | ||
| out[outOff + 1] = T0i + T2i; | ||
| out[outOff + 2] = T1r + T3i; | ||
| out[outOff + 3] = T1i - T3r; | ||
| out[outOff + 4] = T0r - T2r; | ||
| out[outOff + 5] = T0i - T2i; | ||
| out[outOff + 6] = T1r - T3i; | ||
| out[outOff + 7] = T1i + T3r; | ||
| } | ||
| /** | ||
| * Real input radix-4 implementation | ||
| * @param {Float64Array} out Output array for the transformed data | ||
| * @param {Float64Array} data Input array of real data to be transformed | ||
| * @param {number} inv The scale factor used to normalize the inverse transform | ||
| */ | ||
| _realTransform4(out, data, inv) { | ||
| // Real input radix-4 implementation | ||
| const size = this._csize; | ||
| // Initial step (permute and transform) | ||
| const width = this._width; | ||
| let step = 1 << width; | ||
| let len = (size / step) << 1; | ||
| let outOff; | ||
| let t; | ||
| const bitrev = this._bitrev; | ||
| if (len === 4) { | ||
| for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) { | ||
| const off = bitrev[t]; | ||
| this._singleRealTransform2(data, out, outOff, off >>> 1, step >>> 1); | ||
| } | ||
| } else { | ||
| // len === 8 | ||
| for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) { | ||
| const off = bitrev[t]; | ||
| this._singleRealTransform4( | ||
| data, | ||
| out, | ||
| outOff, | ||
| off >>> 1, | ||
| step >>> 1, | ||
| inv | ||
| ); | ||
| } | ||
| } | ||
| // TODO: Optimize once https://github.com/indutny/fft.js/issues/25 is fixed | ||
| // Loop through steps in decreasing order | ||
| for (step >>= 2; step >= 2; step >>= 2) { | ||
| len = (size / step) << 1; | ||
| const quarterLen = len >>> 2; | ||
| // Loop through offsets in the data | ||
| for (outOff = 0; outOff < size; outOff += len) { | ||
| // Full case | ||
| const limit = outOff + quarterLen - 1; | ||
| for (let i = outOff, k = 0; i < limit; i += 2, k += step) { | ||
| const A = i; | ||
| const B = A + quarterLen; | ||
| const C = B + quarterLen; | ||
| const D = C + quarterLen; | ||
| // Original values | ||
| const Ar = out[A]; | ||
| const Ai = out[A + 1]; | ||
| const Br = out[B]; | ||
| const Bi = out[B + 1]; | ||
| const Cr = out[C]; | ||
| const Ci = out[C + 1]; | ||
| const Dr = out[D]; | ||
| const Di = out[D + 1]; | ||
| const tableBr = this.table[k]; | ||
| const tableBi = inv * this.table[k + 1]; | ||
| const MBr = Br * tableBr - Bi * tableBi; | ||
| const MBi = Br * tableBi + Bi * tableBr; | ||
| const tableCr = this.table[2 * k]; | ||
| const tableCi = inv * this.table[2 * k + 1]; | ||
| const MCr = Cr * tableCr - Ci * tableCi; | ||
| const MCi = Cr * tableCi + Ci * tableCr; | ||
| const tableDr = this.table[3 * k]; | ||
| const tableDi = inv * this.table[3 * k + 1]; | ||
| const MDr = Dr * tableDr - Di * tableDi; | ||
| const MDi = Dr * tableDi + Di * tableDr; | ||
| // Pre-Final values | ||
| const T0r = Ar + MCr; | ||
| const T0i = Ai + MCi; | ||
| const T1r = Ar - MCr; | ||
| const T1i = Ai - MCi; | ||
| const T2r = MBr + MDr; | ||
| const T2i = MBi + MDi; | ||
| const T3r = inv * (MBr - MDr); | ||
| const T3i = inv * (MBi - MDi); | ||
| // Final values | ||
| out[A] = T0r + T2r; | ||
| out[A + 1] = T0i + T2i; | ||
| out[B] = T1r + T3i; | ||
| out[B + 1] = T1i - T3r; | ||
| out[C] = T0r - T2r; | ||
| out[C + 1] = T0i - T2i; | ||
| out[D] = T1r - T3i; | ||
| out[D + 1] = T1i + T3r; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| /** | ||
| * Performs a single real input radix-2 transformation on the provided data | ||
| * | ||
| * @param {Float64Array} data The input data array | ||
| * @param {Float64Array} out The output data array | ||
| * @param {number} outOff The output offset | ||
| * @param {number} off The input offset | ||
| * @param {number} step The step | ||
| * | ||
| * @returns {void} | ||
| */ | ||
| _singleRealTransform2(data, out, outOff, off, step) { | ||
| // radix-2 implementation | ||
| // NOTE: Only called for len=4 | ||
| const evenR = data[off]; | ||
| const oddR = data[off + step]; | ||
| out[outOff] = evenR + oddR; | ||
| out[outOff + 1] = 0; | ||
| out[outOff + 2] = evenR - oddR; | ||
| out[outOff + 3] = 0; | ||
| } | ||
| /** | ||
| * Computes a single real-valued transform using radix-4 algorithm. | ||
| * This method is only called for len=8. | ||
| * | ||
| * @param {Float64Array} data The input data array. | ||
| * @param {Float64Array} out The output data array. | ||
| * @param {number} outOff The offset into the output array. | ||
| * @param {number} off The offset into the input array. | ||
| * @param {number} step The step size for the input array. | ||
| * @param {number} inv The value of inverse. | ||
| */ | ||
| _singleRealTransform4(data, out, outOff, off, step, inv) { | ||
| // radix-4 | ||
| // NOTE: Only called for len=8 | ||
| const step2 = step * 2; | ||
| const step3 = step * 3; | ||
| // Original values | ||
| const Ar = data[off]; | ||
| const Br = data[off + step]; | ||
| const Cr = data[off + step2]; | ||
| const Dr = data[off + step3]; | ||
| // Pre-Final values | ||
| const T0r = Ar + Cr; | ||
| const T1r = Ar - Cr; | ||
| const T2r = Br + Dr; | ||
| const T3r = inv * (Br - Dr); | ||
| // Final values | ||
| out[outOff] = T0r + T2r; | ||
| out[outOff + 1] = 0; | ||
| out[outOff + 2] = T1r; | ||
| out[outOff + 3] = -T3r; | ||
| out[outOff + 4] = T0r - T2r; | ||
| out[outOff + 5] = 0; | ||
| out[outOff + 6] = T1r; | ||
| out[outOff + 7] = T3r; | ||
| } | ||
| } | ||
| /** | ||
| * NP2FFT class provides functionality for performing Fast Fourier Transform on arrays | ||
| * which are not a power of two in length. In such cases, the chirp-z transform is used. | ||
| * | ||
| * For more information, see: https://math.stackexchange.com/questions/77118/non-power-of-2-ffts/77156#77156 | ||
| */ | ||
| class NP2FFT { | ||
| /** | ||
| * Constructs a new NP2FFT object. | ||
| * @param {number} fft_length The length of the FFT | ||
| */ | ||
| constructor(fft_length) { | ||
| // Helper variables | ||
| const a = 2 * (fft_length - 1); | ||
| const b = 2 * (2 * fft_length - 1); | ||
| const nextP2 = 2 ** Math.ceil(Math.log2(b)); | ||
| this.bufferSize = nextP2; | ||
| this._a = a; | ||
| // Define buffers | ||
| // Compute chirp for transform | ||
| const chirp = new Float64Array(b); | ||
| const ichirp = new Float64Array(nextP2); | ||
| this._chirpBuffer = new Float64Array(nextP2); | ||
| this._buffer1 = new Float64Array(nextP2); | ||
| this._buffer2 = new Float64Array(nextP2); | ||
| this._outBuffer1 = new Float64Array(nextP2); | ||
| this._outBuffer2 = new Float64Array(nextP2); | ||
| // Compute complex exponentiation | ||
| const theta = (-2 * Math.PI) / fft_length; | ||
| const baseR = Math.cos(theta); | ||
| const baseI = Math.sin(theta); | ||
| // Precompute helper for chirp-z transform | ||
| for (let i = 0; i < b >> 1; ++i) { | ||
| // Compute complex power: | ||
| const e = (i + 1 - fft_length) ** 2 / 2.0; | ||
| // Compute the modulus and argument of the result | ||
| const result_mod = Math.sqrt(baseR ** 2 + baseI ** 2) ** e; | ||
| const result_arg = e * Math.atan2(baseI, baseR); | ||
| // Convert the result back to rectangular form | ||
| // and assign to chirp and ichirp | ||
| const i2 = 2 * i; | ||
| chirp[i2] = result_mod * Math.cos(result_arg); | ||
| chirp[i2 + 1] = result_mod * Math.sin(result_arg); | ||
| // conjugate | ||
| ichirp[i2] = chirp[i2]; | ||
| ichirp[i2 + 1] = -chirp[i2 + 1]; | ||
| } | ||
| this._slicedChirpBuffer = chirp.subarray(a, b); | ||
| // create object to perform Fast Fourier Transforms | ||
| // with `nextP2` complex numbers | ||
| this._f = new P2FFT(nextP2 >> 1); | ||
| this._f.transform(this._chirpBuffer, ichirp); | ||
| } | ||
| _transform(output, input, real) { | ||
| const ib1 = this._buffer1; | ||
| const ib2 = this._buffer2; | ||
| const ob2 = this._outBuffer1; | ||
| const ob3 = this._outBuffer2; | ||
| const cb = this._chirpBuffer; | ||
| const sb = this._slicedChirpBuffer; | ||
| const a = this._a; | ||
| if (real) { | ||
| // Real multiplication | ||
| for (let j = 0; j < sb.length; j += 2) { | ||
| const j2 = j + 1; | ||
| const j3 = j >> 1; | ||
| const a_real = input[j3]; | ||
| ib1[j] = a_real * sb[j]; | ||
| ib1[j2] = a_real * sb[j2]; | ||
| } | ||
| } else { | ||
| // Complex multiplication | ||
| for (let j = 0; j < sb.length; j += 2) { | ||
| const j2 = j + 1; | ||
| ib1[j] = input[j] * sb[j] - input[j2] * sb[j2]; | ||
| ib1[j2] = input[j] * sb[j2] + input[j2] * sb[j]; | ||
| } | ||
| } | ||
| this._f.transform(ob2, ib1); | ||
| for (let j = 0; j < cb.length; j += 2) { | ||
| const j2 = j + 1; | ||
| ib2[j] = ob2[j] * cb[j] - ob2[j2] * cb[j2]; | ||
| ib2[j2] = ob2[j] * cb[j2] + ob2[j2] * cb[j]; | ||
| } | ||
| this._f.inverseTransform(ob3, ib2); | ||
| for (let j = 0; j < ob3.length; j += 2) { | ||
| const a_real = ob3[j + a]; | ||
| const a_imag = ob3[j + a + 1]; | ||
| const b_real = sb[j]; | ||
| const b_imag = sb[j + 1]; | ||
| output[j] = a_real * b_real - a_imag * b_imag; | ||
| output[j + 1] = a_real * b_imag + a_imag * b_real; | ||
| } | ||
| } | ||
| transform(output, input) { | ||
| this._transform(output, input, false); | ||
| } | ||
| realTransform(output, input) { | ||
| this._transform(output, input, true); | ||
| } | ||
| } | ||
| export class FFT { | ||
| constructor(fft_length) { | ||
| this.fft_length = fft_length; | ||
| this.isPowerOfTwo = isPowerOfTwo(fft_length); | ||
| if (this.isPowerOfTwo) { | ||
| this.fft = new P2FFT(fft_length); | ||
| this.outputBufferSize = 2 * fft_length; | ||
| } else { | ||
| this.fft = new NP2FFT(fft_length); | ||
| this.outputBufferSize = this.fft.bufferSize; | ||
| } | ||
| } | ||
| realTransform(out, input) { | ||
| this.fft.realTransform(out, input); | ||
| } | ||
| transform(out, input) { | ||
| this.fft.transform(out, input); | ||
| } | ||
| } | ||
| /** | ||
| * Performs median filter on the provided data. Padding is done by mirroring the data. | ||
| * @param {AnyTypedArray} data The input array | ||
| * @param {number} windowSize The window size | ||
| */ | ||
| export function medianFilter(data, windowSize) { | ||
| if (windowSize % 2 === 0 || windowSize <= 0) { | ||
| throw new Error("Window size must be a positive odd number"); | ||
| } | ||
| // @ts-ignore | ||
| const outputArray = new data.constructor(data.length); | ||
| // @ts-ignore | ||
| const buffer = new data.constructor(windowSize); // Reusable array for storing values | ||
| const halfWindowSize = Math.floor(windowSize / 2); | ||
| for (let i = 0; i < data.length; ++i) { | ||
| let valuesIndex = 0; | ||
| for (let j = -halfWindowSize; j <= halfWindowSize; ++j) { | ||
| let index = i + j; | ||
| if (index < 0) { | ||
| index = Math.abs(index); | ||
| } else if (index >= data.length) { | ||
| index = 2 * (data.length - 1) - index; | ||
| } | ||
| buffer[valuesIndex++] = data[index]; | ||
| } | ||
| buffer.sort(); | ||
| outputArray[i] = buffer[halfWindowSize]; | ||
| } | ||
| return outputArray; | ||
| } | ||
| /** | ||
| * Helper function to round a number to a given number of decimals | ||
| * @param {number} num The number to round | ||
| * @param {number} decimals The number of decimals | ||
| * @returns {number} The rounded number | ||
| */ | ||
| export function round(num, decimals) { | ||
| const pow = Math.pow(10, decimals); | ||
| return Math.round(num * pow) / pow; | ||
| } | ||
| /** | ||
| * Helper function to round a number to the nearest integer, with ties rounded to the nearest even number. | ||
| * Also known as "bankers' rounding". This is the default rounding mode in python. For example: | ||
| * 1.5 rounds to 2 and 2.5 rounds to 2. | ||
| * | ||
| * @param {number} x The number to round | ||
| * @returns {number} The rounded number | ||
| */ | ||
| export function bankers_round(x) { | ||
| const r = Math.round(x); | ||
| const br = Math.abs(x) % 1 === 0.5 ? (r % 2 === 0 ? r : r - 1) : r; | ||
| return br; | ||
| } |
| /** | ||
| * @file Helper module for `Tensor` processing. | ||
| * | ||
| * These functions and classes are only used internally, | ||
| * meaning an end-user shouldn't need to access anything here. | ||
| * | ||
| * @module utils/tensor | ||
| */ | ||
| import { interpolate_data, permute_data } from "./maths.js"; | ||
| const DataTypeMap = Object.freeze({ | ||
| float32: Float32Array, | ||
| float64: Float64Array, | ||
| string: Array, // string[] | ||
| int8: Int8Array, | ||
| uint8: Uint8Array, | ||
| int16: Int16Array, | ||
| uint16: Uint16Array, | ||
| int32: Int32Array, | ||
| uint32: Uint32Array, | ||
| int64: BigInt64Array, | ||
| uint64: BigUint64Array, | ||
| bool: Uint8Array, | ||
| }); | ||
| /** | ||
| * @typedef {keyof typeof DataTypeMap} DataType | ||
| * @typedef {import('./maths.js').AnyTypedArray | any[]} DataArray | ||
| */ | ||
| // NOTE: Just to facilitate git merge, this class is not used ye | ||
| const ONNXTensor = class {}; | ||
| export class Tensor { | ||
| /** @type {number[]} Dimensions of the tensor. */ | ||
| dims; | ||
| /** @type {DataType} Type of the tensor. */ | ||
| type; | ||
| /** @type {DataArray} The data stored in the tensor. */ | ||
| data; | ||
| /** @type {number} The number of elements in the tensor. */ | ||
| size; | ||
| /** | ||
| * Create a new Tensor or copy an existing Tensor. | ||
| * @param {[DataType, DataArray, number[]]|[import('onnxruntime-common').Tensor]} args | ||
| */ | ||
| constructor(...args) { | ||
| if (args[0] instanceof ONNXTensor) { | ||
| // Create shallow copy | ||
| Object.assign(this, args[0]); | ||
| } else { | ||
| // Create new tensor | ||
| Object.assign( | ||
| this, | ||
| new ONNXTensor( | ||
| /** @type {DataType} */ (args[0]), | ||
| /** @type {Exclude<import('./maths.js').AnyTypedArray, Uint8ClampedArray>} */ ( | ||
| args[1] | ||
| ), | ||
| args[2] | ||
| ) | ||
| ); | ||
| } | ||
| return new Proxy(this, { | ||
| get: (obj, key) => { | ||
| if (typeof key === "string") { | ||
| let index = Number(key); | ||
| if (Number.isInteger(index)) { | ||
| // key is an integer (i.e., index) | ||
| return obj._getitem(index); | ||
| } | ||
| } | ||
| // @ts-ignore | ||
| return obj[key]; | ||
| }, | ||
| set: (obj, key, value) => { | ||
| // TODO allow setting of data | ||
| // @ts-ignore | ||
| return (obj[key] = value); | ||
| }, | ||
| }); | ||
| } | ||
| /** | ||
| * Returns an iterator object for iterating over the tensor data in row-major order. | ||
| * If the tensor has more than one dimension, the iterator will yield subarrays. | ||
| * @returns {Iterator} An iterator object for iterating over the tensor data in row-major order. | ||
| */ | ||
| *[Symbol.iterator]() { | ||
| const [iterLength, ...iterDims] = this.dims; | ||
| if (iterDims.length > 0) { | ||
| const iterSize = iterDims.reduce((a, b) => a * b); | ||
| for (let i = 0; i < iterLength; ++i) { | ||
| yield this._subarray(i, iterSize, iterDims); | ||
| } | ||
| } else { | ||
| yield* this.data; | ||
| } | ||
| } | ||
| /** | ||
| * Index into a Tensor object. | ||
| * @param {number} index The index to access. | ||
| * @returns {Tensor} The data at the specified index. | ||
| */ | ||
| _getitem(index) { | ||
| const [iterLength, ...iterDims] = this.dims; | ||
| index = safeIndex(index, iterLength); | ||
| if (iterDims.length > 0) { | ||
| const iterSize = iterDims.reduce((a, b) => a * b); | ||
| return this._subarray(index, iterSize, iterDims); | ||
| } else { | ||
| return new Tensor(this.type, [this.data[index]], iterDims); | ||
| } | ||
| } | ||
| /** | ||
| * @param {number|bigint} item The item to search for in the tensor | ||
| * @returns {number} The index of the first occurrence of item in the tensor data. | ||
| */ | ||
| indexOf(item) { | ||
| for (let index = 0; index < this.data.length; ++index) { | ||
| // Note: == instead of === so we can match Ints with BigInts | ||
| if (this.data[index] == item) { | ||
| return index; | ||
| } | ||
| } | ||
| return -1; | ||
| } | ||
| /** | ||
| * @param {number} index | ||
| * @param {number} iterSize | ||
| * @param {any} iterDims | ||
| * @returns {Tensor} | ||
| */ | ||
| _subarray(index, iterSize, iterDims) { | ||
| const o1 = index * iterSize; | ||
| const o2 = (index + 1) * iterSize; | ||
| // We use subarray if available (typed array), otherwise we use slice (normal array) | ||
| const data = | ||
| "subarray" in this.data | ||
| ? this.data.subarray(o1, o2) | ||
| : this.data.slice(o1, o2); | ||
| return new Tensor(this.type, data, iterDims); | ||
| } | ||
| /** | ||
| * Returns the value of this tensor as a standard JavaScript Number. This only works | ||
| * for tensors with one element. For other cases, see `Tensor.tolist()`. | ||
| * @returns {number|bigint} The value of this tensor as a standard JavaScript Number. | ||
| * @throws {Error} If the tensor has more than one element. | ||
| */ | ||
| item() { | ||
| if (this.data.length !== 1) { | ||
| throw new Error( | ||
| `a Tensor with ${this.data.length} elements cannot be converted to Scalar` | ||
| ); | ||
| } | ||
| return this.data[0]; | ||
| } | ||
| /** | ||
| * Convert tensor data to a n-dimensional JS list | ||
| * @returns {Array} | ||
| */ | ||
| tolist() { | ||
| return reshape(this.data, this.dims); | ||
| } | ||
| /** | ||
| * Return a new Tensor with the sigmoid function applied to each element. | ||
| * @returns {Tensor} The tensor with the sigmoid function applied. | ||
| */ | ||
| sigmoid() { | ||
| return this.clone().sigmoid_(); | ||
| } | ||
| /** | ||
| * Applies the sigmoid function to the tensor in place. | ||
| * @returns {Tensor} Returns `this`. | ||
| */ | ||
| sigmoid_() { | ||
| for (let i = 0; i < this.data.length; ++i) { | ||
| this.data[i] = 1 / (1 + Math.exp(-this.data[i])); | ||
| } | ||
| return this; | ||
| } | ||
| /** | ||
| * Return a new Tensor with every element multiplied by a constant. | ||
| * @param {number} val The value to multiply by. | ||
| * @returns {Tensor} The new tensor. | ||
| */ | ||
| mul(val) { | ||
| return this.clone().mul_(val); | ||
| } | ||
| /** | ||
| * Multiply the tensor by a constant in place. | ||
| * @param {number} val The value to multiply by. | ||
| * @returns {Tensor} Returns `this`. | ||
| */ | ||
| mul_(val) { | ||
| for (let i = 0; i < this.data.length; ++i) { | ||
| this.data[i] *= val; | ||
| } | ||
| return this; | ||
| } | ||
| /** | ||
| * Return a new Tensor with every element added by a constant. | ||
| * @param {number} val The value to add by. | ||
| * @returns {Tensor} The new tensor. | ||
| */ | ||
| add(val) { | ||
| return this.clone().add_(val); | ||
| } | ||
| /** | ||
| * Add the tensor by a constant in place. | ||
| * @param {number} val The value to add by. | ||
| * @returns {Tensor} Returns `this`. | ||
| */ | ||
| add_(val) { | ||
| for (let i = 0; i < this.data.length; ++i) { | ||
| this.data[i] += val; | ||
| } | ||
| return this; | ||
| } | ||
| clone() { | ||
| return new Tensor(this.type, this.data.slice(), this.dims.slice()); | ||
| } | ||
| slice(...slices) { | ||
| // This allows for slicing with ranges and numbers | ||
| let newTensorDims = []; | ||
| let newOffsets = []; | ||
| // slices is an array of numbers or arrays of numbers | ||
| // e.g., slices = [0, [1, 3], null, [0, 3]] | ||
| for (let sliceIndex = 0; sliceIndex < this.dims.length; ++sliceIndex) { | ||
| let slice = slices[sliceIndex]; | ||
| if (slice === null || slice === undefined) { | ||
| // null or undefined means take the whole dimension | ||
| newOffsets.push([0, this.dims[sliceIndex]]); | ||
| newTensorDims.push(this.dims[sliceIndex]); | ||
| } else if (typeof slice === "number") { | ||
| slice = safeIndex(slice, this.dims[sliceIndex], sliceIndex); | ||
| // A number means take a single element | ||
| newOffsets.push([slice, slice + 1]); | ||
| } else if (Array.isArray(slice) && slice.length === 2) { | ||
| // An array of length 2 means take a range of elements | ||
| if (slice[0] > slice[1]) { | ||
| throw new Error(`Invalid slice: ${slice}`); | ||
| } | ||
| let offsets = [ | ||
| Math.max(slice[0], 0), | ||
| Math.min(slice[1], this.dims[sliceIndex]), | ||
| ]; | ||
| newOffsets.push(offsets); | ||
| newTensorDims.push(offsets[1] - offsets[0]); | ||
| } else { | ||
| throw new Error(`Invalid slice: ${slice}`); | ||
| } | ||
| } | ||
| let newDims = newOffsets.map(([start, end]) => end - start); | ||
| let newBufferSize = newDims.reduce((a, b) => a * b); | ||
| // Allocate memory | ||
| // @ts-ignore | ||
| let data = new this.data.constructor(newBufferSize); | ||
| // Precompute strides | ||
| const stride = this.stride(); | ||
| for (let i = 0; i < newBufferSize; ++i) { | ||
| let originalIndex = 0; | ||
| for (let j = newDims.length - 1, num = i; j >= 0; --j) { | ||
| const size = newDims[j]; | ||
| originalIndex += ((num % size) + newOffsets[j][0]) * stride[j]; | ||
| num = Math.floor(num / size); | ||
| } | ||
| data[i] = this.data[originalIndex]; | ||
| } | ||
| return new Tensor(this.type, data, newTensorDims); | ||
| } | ||
| /** | ||
| * Return a permuted version of this Tensor, according to the provided dimensions. | ||
| * @param {...number} dims Dimensions to permute. | ||
| * @returns {Tensor} The permuted tensor. | ||
| */ | ||
| permute(...dims) { | ||
| return permute(this, dims); | ||
| } | ||
| // TODO: implement transpose. For now (backwards compatibility), it's just an alias for permute() | ||
| transpose(...dims) { | ||
| return this.permute(...dims); | ||
| } | ||
| // TODO add .max() and .min() methods | ||
| /** | ||
| * Returns the sum of each row of the input tensor in the given dimension dim. | ||
| * | ||
| * @param {number} [dim=null] The dimension or dimensions to reduce. If `null`, all dimensions are reduced. | ||
| * @param {boolean} keepdim Whether the output tensor has `dim` retained or not. | ||
| * @returns The summed tensor | ||
| */ | ||
| sum(dim = null, keepdim = false) { | ||
| return this.norm(1, dim, keepdim); | ||
| } | ||
| /** | ||
| * Returns the matrix norm or vector norm of a given tensor. | ||
| * @param {number|string} [p='fro'] The order of norm | ||
| * @param {number} [dim=null] Specifies which dimension of the tensor to calculate the norm across. | ||
| * If dim is None, the norm will be calculated across all dimensions of input. | ||
| * @param {boolean} [keepdim=false] Whether the output tensors have dim retained or not. | ||
| * @returns {Tensor} The norm of the tensor. | ||
| */ | ||
| norm(p = "fro", dim = null, keepdim = false) { | ||
| if (p === "fro") { | ||
| // NOTE: Since we only support integer dims, Frobenius norm produces the same result as p=2. | ||
| p = 2; | ||
| } else if (typeof p === "string") { | ||
| throw Error(`Unsupported norm: ${p}`); | ||
| } | ||
| if (dim === null) { | ||
| // @ts-ignore | ||
| let val = this.data.reduce((a, b) => a + b ** p, 0) ** (1 / p); | ||
| return new Tensor(this.type, [val], []); | ||
| } | ||
| // Negative indexing | ||
| dim = safeIndex(dim, this.dims.length); | ||
| // Calculate the shape of the resulting array after summation | ||
| const resultDims = this.dims.slice(); // Copy the original dimensions | ||
| resultDims[dim] = 1; // Remove the specified axis | ||
| // Create a new array to store the accumulated values | ||
| // @ts-ignore | ||
| const result = new this.data.constructor(this.data.length / this.dims[dim]); | ||
| // Iterate over the data array | ||
| for (let i = 0; i < this.data.length; ++i) { | ||
| // Calculate the index in the resulting array | ||
| let resultIndex = 0; | ||
| for ( | ||
| let j = this.dims.length - 1, num = i, resultMultiplier = 1; | ||
| j >= 0; | ||
| --j | ||
| ) { | ||
| const size = this.dims[j]; | ||
| if (j !== dim) { | ||
| const index = num % size; | ||
| resultIndex += index * resultMultiplier; | ||
| resultMultiplier *= resultDims[j]; | ||
| } | ||
| num = Math.floor(num / size); | ||
| } | ||
| // Accumulate the value at the current index | ||
| result[resultIndex] += this.data[i] ** p; | ||
| } | ||
| if (p !== 1) { | ||
| for (let i = 0; i < result.length; ++i) { | ||
| result[i] = result[i] ** (1 / p); | ||
| } | ||
| } | ||
| if (!keepdim) { | ||
| resultDims.splice(dim, 1); | ||
| } | ||
| return new Tensor(this.type, result, resultDims); | ||
| } | ||
| /** | ||
| * Performs `L_p` normalization of inputs over specified dimension. Operates in place. | ||
| * @param {number} [p=2] The exponent value in the norm formulation | ||
| * @param {number} [dim=1] The dimension to reduce | ||
| * @returns {Tensor} `this` for operation chaining. | ||
| */ | ||
| normalize_(p = 2.0, dim = 1) { | ||
| dim = safeIndex(dim, this.dims.length); | ||
| const norm = this.norm(p, dim, true); | ||
| for (let i = 0; i < this.data.length; ++i) { | ||
| // Calculate the index in the resulting array | ||
| let resultIndex = 0; | ||
| for ( | ||
| let j = this.dims.length - 1, num = i, resultMultiplier = 1; | ||
| j >= 0; | ||
| --j | ||
| ) { | ||
| const size = this.dims[j]; | ||
| if (j !== dim) { | ||
| const index = num % size; | ||
| resultIndex += index * resultMultiplier; | ||
| resultMultiplier *= this.dims[j]; | ||
| } | ||
| num = Math.floor(num / size); | ||
| } | ||
| // Divide by normalized value | ||
| this.data[i] /= norm.data[resultIndex]; | ||
| } | ||
| return this; | ||
| } | ||
| /** | ||
| * Performs `L_p` normalization of inputs over specified dimension. | ||
| * @param {number} [p=2] The exponent value in the norm formulation | ||
| * @param {number} [dim=1] The dimension to reduce | ||
| * @returns {Tensor} The normalized tensor. | ||
| */ | ||
| normalize(p = 2.0, dim = 1) { | ||
| return this.clone().normalize_(p, dim); | ||
| } | ||
| /** | ||
| * Compute and return the stride of this tensor. | ||
| * Stride is the jump necessary to go from one element to the next one in the specified dimension dim. | ||
| * @returns {number[]} The stride of this tensor. | ||
| */ | ||
| stride() { | ||
| return dimsToStride(this.dims); | ||
| } | ||
| /** | ||
| * Returns a tensor with all specified dimensions of input of size 1 removed. | ||
| * | ||
| * NOTE: The returned tensor shares the storage with the input tensor, so changing the contents of one will change the contents of the other. | ||
| * If you would like a copy, use `tensor.clone()` before squeezing. | ||
| * | ||
| * @param {number} [dim=null] If given, the input will be squeezed only in the specified dimensions. | ||
| * @returns The squeezed tensor | ||
| */ | ||
| squeeze(dim = null) { | ||
| return new Tensor(this.type, this.data, calc_squeeze_dims(this.dims, dim)); | ||
| } | ||
| /** | ||
| * In-place version of @see {@link Tensor.squeeze} | ||
| */ | ||
| squeeze_(dim = null) { | ||
| this.dims = calc_squeeze_dims(this.dims, dim); | ||
| return this; | ||
| } | ||
| /** | ||
| * Returns a new tensor with a dimension of size one inserted at the specified position. | ||
| * | ||
| * NOTE: The returned tensor shares the same underlying data with this tensor. | ||
| * | ||
| * @param {number} dim The index at which to insert the singleton dimension | ||
| * @returns The unsqueezed tensor | ||
| */ | ||
| unsqueeze(dim = null) { | ||
| return new Tensor( | ||
| this.type, | ||
| this.data, | ||
| calc_unsqueeze_dims(this.dims, dim) | ||
| ); | ||
| } | ||
| /** | ||
| * In-place version of @see {@link Tensor.unsqueeze} | ||
| */ | ||
| unsqueeze_(dim = null) { | ||
| this.dims = calc_unsqueeze_dims(this.dims, dim); | ||
| return this; | ||
| } | ||
| /** | ||
| * In-place version of @see {@link Tensor.flatten} | ||
| */ | ||
| flatten_(start_dim = 0, end_dim = -1) { | ||
| // TODO validate inputs | ||
| end_dim = (end_dim + this.dims.length) % this.dims.length; | ||
| let dimsToKeepBefore = this.dims.slice(0, start_dim); | ||
| let dimsToFlatten = this.dims.slice(start_dim, end_dim + 1); | ||
| let dimsToKeepAfter = this.dims.slice(end_dim + 1); | ||
| this.dims = [ | ||
| ...dimsToKeepBefore, | ||
| dimsToFlatten.reduce((a, b) => a * b, 1), | ||
| ...dimsToKeepAfter, | ||
| ]; | ||
| return this; | ||
| } | ||
| /** | ||
| * Flattens input by reshaping it into a one-dimensional tensor. | ||
| * If `start_dim` or `end_dim` are passed, only dimensions starting with `start_dim` | ||
| * and ending with `end_dim` are flattened. The order of elements in input is unchanged. | ||
| * @param {number} start_dim the first dim to flatten | ||
| * @param {number} end_dim the last dim to flatten | ||
| * @returns The flattened tensor. | ||
| */ | ||
| flatten(start_dim = 0, end_dim = -1) { | ||
| return this.clone().flatten_(start_dim, end_dim); | ||
| } | ||
| /** | ||
| * Returns a new tensor with the same data as the `self` tensor but of a different `shape`. | ||
| * @param {...number} dims the desired size | ||
| * @returns {Tensor} The tensor with the same data but different shape | ||
| */ | ||
| view(...dims) { | ||
| // TODO: validate dims | ||
| let inferredIndex = -1; | ||
| for (let i = 0; i < dims.length; ++i) { | ||
| if (dims[i] === -1) { | ||
| if (inferredIndex !== -1) { | ||
| throw new Error("Only one dimension can be inferred"); | ||
| } | ||
| inferredIndex = i; | ||
| } | ||
| } | ||
| if (inferredIndex !== -1) { | ||
| // Some dimension must be inferred | ||
| const productOther = dims.reduce((product, curr, index) => { | ||
| return index !== inferredIndex ? product * curr : product; | ||
| }, 1); | ||
| dims[inferredIndex] = this.data.length / productOther; | ||
| } | ||
| return new Tensor(this.type, this.data, dims); // NOTE: uses same underlying storage | ||
| } | ||
| neg_() { | ||
| for (let i = 0; i < this.data.length; ++i) { | ||
| this.data[i] = -this.data[i]; | ||
| } | ||
| return this; | ||
| } | ||
| neg() { | ||
| return this.clone().neg_(); | ||
| } | ||
| /** | ||
| * In-place version of @see {@link Tensor.clamp} | ||
| */ | ||
| clamp_(min, max) { | ||
| for (let i = 0; i < this.data.length; ++i) { | ||
| this.data[i] = Math.min(Math.max(this.data[i], min), max); | ||
| } | ||
| return this; | ||
| } | ||
| /** | ||
| * Clamps all elements in input into the range [ min, max ] | ||
| * @param {number} min lower-bound of the range to be clamped to | ||
| * @param {number} max upper-bound of the range to be clamped to | ||
| * @returns the output tensor. | ||
| */ | ||
| clamp(min, max) { | ||
| return this.clone().clamp_(min, max); | ||
| } | ||
| /** | ||
| * In-place version of @see {@link Tensor.round} | ||
| */ | ||
| round_() { | ||
| for (let i = 0; i < this.data.length; ++i) { | ||
| this.data[i] = Math.round(this.data[i]); | ||
| } | ||
| return this; | ||
| } | ||
| /** | ||
| * Rounds elements of input to the nearest integer. | ||
| * @returns the output tensor. | ||
| */ | ||
| round() { | ||
| return this.clone().round_(); | ||
| } | ||
| /** | ||
| * Performs Tensor dtype conversion. | ||
| * @param {DataType} type The desired data type. | ||
| * @returns {Tensor} The converted tensor. | ||
| */ | ||
| to(type) { | ||
| // If the self Tensor already has the correct dtype, then self is returned. | ||
| if (this.type === type) return this; | ||
| // Otherwise, the returned tensor is a copy of self with the desired dtype. | ||
| if (!DataTypeMap.hasOwnProperty(type)) { | ||
| throw new Error(`Unsupported type: ${type}`); | ||
| } | ||
| // @ts-ignore | ||
| return new Tensor(type, DataTypeMap[type].from(this.data), this.dims); | ||
| } | ||
| } | ||
| /** | ||
| * This creates a nested array of a given type and depth (see examples). | ||
| * | ||
| * @example | ||
| * NestArray<string, 1>; // string[] | ||
| * @example | ||
| * NestArray<number, 2>; // number[][] | ||
| * @example | ||
| * NestArray<string, 3>; // string[][][] etc. | ||
| * @template T | ||
| * @template {number} Depth | ||
| * @template {never[]} [Acc=[]] | ||
| * @typedef {Acc['length'] extends Depth ? T : NestArray<T[], Depth, [...Acc, never]>} NestArray | ||
| */ | ||
| /** | ||
| * Reshapes a 1-dimensional array into an n-dimensional array, according to the provided dimensions. | ||
| * | ||
| * @example | ||
| * reshape([10 ], [1 ]); // Type: number[] Value: [10] | ||
| * reshape([1, 2, 3, 4 ], [2, 2 ]); // Type: number[][] Value: [[1, 2], [3, 4]] | ||
| * reshape([1, 2, 3, 4, 5, 6, 7, 8], [2, 2, 2]); // Type: number[][][] Value: [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] | ||
| * reshape([1, 2, 3, 4, 5, 6, 7, 8], [4, 2 ]); // Type: number[][] Value: [[1, 2], [3, 4], [5, 6], [7, 8]] | ||
| * @param {T[]|DataArray} data The input array to reshape. | ||
| * @param {DIM} dimensions The target shape/dimensions. | ||
| * @template T | ||
| * @template {[number]|number[]} DIM | ||
| * @returns {NestArray<T, DIM["length"]>} The reshaped array. | ||
| */ | ||
| function reshape(data, dimensions) { | ||
| const totalElements = data.length; | ||
| const dimensionSize = dimensions.reduce((a, b) => a * b); | ||
| if (totalElements !== dimensionSize) { | ||
| throw Error( | ||
| `cannot reshape array of size ${totalElements} into shape (${dimensions})` | ||
| ); | ||
| } | ||
| /** @type {any} */ | ||
| let reshapedArray = data; | ||
| for (let i = dimensions.length - 1; i >= 0; i--) { | ||
| reshapedArray = reshapedArray.reduce( | ||
| (acc, val) => { | ||
| let lastArray = acc[acc.length - 1]; | ||
| if (lastArray.length < dimensions[i]) { | ||
| lastArray.push(val); | ||
| } else { | ||
| acc.push([val]); | ||
| } | ||
| return acc; | ||
| }, | ||
| [[]] | ||
| ); | ||
| } | ||
| return reshapedArray[0]; | ||
| } | ||
| /** | ||
| * Permutes a tensor according to the provided axes. | ||
| * @param {any} tensor The input tensor to permute. | ||
| * @param {Array} axes The axes to permute the tensor along. | ||
| * @returns {Tensor} The permuted tensor. | ||
| */ | ||
| export function permute(tensor, axes) { | ||
| const [permutedData, shape] = permute_data(tensor.data, tensor.dims, axes); | ||
| return new Tensor(tensor.type, permutedData, shape); | ||
| } | ||
| /** | ||
| * Interpolates an Tensor to the given size. | ||
| * @param {Tensor} input The input tensor to interpolate. Data must be channel-first (i.e., [c, h, w]) | ||
| * @param {number[]} size The output size of the image | ||
| * @param {string} mode The interpolation mode | ||
| * @param {boolean} align_corners Whether to align corners. | ||
| * @returns {Tensor} The interpolated tensor. | ||
| */ | ||
| export function interpolate( | ||
| input, | ||
| [out_height, out_width], | ||
| mode = "bilinear", | ||
| align_corners = false | ||
| ) { | ||
| // Input image dimensions | ||
| const in_channels = input.dims.at(-3) ?? 1; | ||
| const in_height = input.dims.at(-2); | ||
| const in_width = input.dims.at(-1); | ||
| let output = interpolate_data( | ||
| /** @type {import('./maths.js').TypedArray}*/ (input.data), | ||
| [in_channels, in_height, in_width], | ||
| [out_height, out_width], | ||
| mode, | ||
| align_corners | ||
| ); | ||
| return new Tensor(input.type, output, [in_channels, out_height, out_width]); | ||
| } | ||
| /** | ||
| * Perform mean pooling of the last hidden state followed by a normalization step. | ||
| * @param {Tensor} last_hidden_state Tensor of shape [batchSize, seqLength, embedDim] | ||
| * @param {Tensor} attention_mask Tensor of shape [batchSize, seqLength] | ||
| * @returns {Tensor} Returns a new Tensor of shape [batchSize, embedDim]. | ||
| */ | ||
| export function mean_pooling(last_hidden_state, attention_mask) { | ||
| // last_hidden_state: [batchSize, seqLength, embedDim] | ||
| // attention_mask: [batchSize, seqLength] | ||
| let shape = [last_hidden_state.dims[0], last_hidden_state.dims[2]]; | ||
| // @ts-ignore | ||
| let returnedData = new last_hidden_state.data.constructor( | ||
| shape[0] * shape[1] | ||
| ); | ||
| let [batchSize, seqLength, embedDim] = last_hidden_state.dims; | ||
| let outIndex = 0; | ||
| for (let i = 0; i < batchSize; ++i) { | ||
| let offset = i * embedDim * seqLength; | ||
| for (let k = 0; k < embedDim; ++k) { | ||
| let sum = 0; | ||
| let count = 0; | ||
| let attnMaskOffset = i * seqLength; | ||
| let offset2 = offset + k; | ||
| // Pool over all words in sequence | ||
| for (let j = 0; j < seqLength; ++j) { | ||
| // index into attention mask | ||
| let attn = Number(attention_mask.data[attnMaskOffset + j]); | ||
| count += attn; | ||
| sum += last_hidden_state.data[offset2 + j * embedDim] * attn; | ||
| } | ||
| let avg = sum / count; | ||
| returnedData[outIndex++] = avg; | ||
| } | ||
| } | ||
| return new Tensor(last_hidden_state.type, returnedData, shape); | ||
| } | ||
| /** | ||
| * Apply Layer Normalization for last certain number of dimensions. | ||
| * @param {Tensor} input The input tensor | ||
| * @param {number[]} normalized_shape input shape from an expected input of size | ||
| * @param {Object} options The options for the layer normalization | ||
| * @param {number} [options.eps=1e-5] A value added to the denominator for numerical stability. | ||
| * @returns {Tensor} The normalized tensor. | ||
| */ | ||
| export function layer_norm(input, normalized_shape, { eps = 1e-5 } = {}) { | ||
| if (input.dims.length !== 2) { | ||
| throw new Error("`layer_norm` currently only supports 2D input."); | ||
| } | ||
| const [batchSize, featureDim] = input.dims; | ||
| if (normalized_shape.length !== 1 && normalized_shape[0] !== featureDim) { | ||
| throw new Error( | ||
| "`normalized_shape` must be a 1D array with shape `[input.dims[1]]`." | ||
| ); | ||
| } | ||
| const [std, mean] = std_mean(input, 1, 0, true); | ||
| // @ts-ignore | ||
| const returnedData = new input.data.constructor(input.data.length); | ||
| for (let i = 0; i < batchSize; ++i) { | ||
| const offset = i * featureDim; | ||
| for (let j = 0; j < featureDim; ++j) { | ||
| const offset2 = offset + j; | ||
| returnedData[offset2] = | ||
| (input.data[offset2] - mean.data[i]) / (std.data[i] + eps); | ||
| } | ||
| } | ||
| return new Tensor(input.type, returnedData, input.dims); | ||
| } | ||
| /** | ||
| * Helper function to calculate new dimensions when performing a squeeze operation. | ||
| * @param {number[]} dims The dimensions of the tensor. | ||
| * @param {number|number[]|null} dim The dimension(s) to squeeze. | ||
| * @returns The new dimensions. | ||
| * @private | ||
| */ | ||
| function calc_squeeze_dims(dims, dim) { | ||
| dims = dims.slice(); | ||
| if (dim === null) { | ||
| dims = dims.filter((d) => d !== 1); | ||
| } else if (typeof dim === "number") { | ||
| if (dims[dim] === 1) { | ||
| dims.splice(dim, 1); | ||
| } | ||
| } else if (Array.isArray(dim)) { | ||
| dims = dims.filter((x, i) => { | ||
| return x !== 1 || !dim.includes(i); | ||
| }); | ||
| } | ||
| return dims; | ||
| } | ||
| /** | ||
| * Helper function to calculate new dimensions when performing an unsqueeze operation. | ||
| * @param {number[]} dims The dimensions of the tensor. | ||
| * @param {number} dim The dimension to unsqueeze. | ||
| * @returns The new dimensions. | ||
| * @private | ||
| */ | ||
| function calc_unsqueeze_dims(dims, dim) { | ||
| // Dimension out of range (e.g., "expected to be in range of [-4, 3], but got 4") | ||
| // + 1 since we allow inserting at the end (i.e. dim = -1) | ||
| dim = safeIndex(dim, dims.length + 1); | ||
| dims = dims.slice(); | ||
| // Insert 1 into specified dimension | ||
| dims.splice(dim, 0, 1); | ||
| return dims; | ||
| } | ||
| /** | ||
| * Safely calculate the index for an array of a given size, allowing negative indexing. | ||
| * @param {number} index The index that will be used. | ||
| * @param {number} size The size of the array. | ||
| * @param {number} [dimension=null] The dimension that the index is for (optional). | ||
| * @returns {number} The index, guaranteed to be non-negative and less than `arrayLength`. | ||
| * | ||
| * @throws {Error} If the index is out of range. | ||
| * @private | ||
| */ | ||
| function safeIndex(index, size, dimension = null) { | ||
| if (index < -size || index >= size) { | ||
| throw new Error( | ||
| `IndexError: index ${index} is out of bounds for dimension${ | ||
| dimension === null ? "" : " " + dimension | ||
| } with size ${size}` | ||
| ); | ||
| } | ||
| if (index < 0) { | ||
| // Negative indexing, ensuring positive index | ||
| index = ((index % size) + size) % size; | ||
| } | ||
| return index; | ||
| } | ||
| /** | ||
| * Concatenates an array of tensors along a specified dimension. | ||
| * @param {Tensor[]} tensors The array of tensors to concatenate. | ||
| * @param {number} dim The dimension to concatenate along. | ||
| * @returns {Tensor} The concatenated tensor. | ||
| */ | ||
| export function cat(tensors, dim = 0) { | ||
| dim = safeIndex(dim, tensors[0].dims.length); | ||
| // TODO do validation of shapes | ||
| const resultDims = tensors[0].dims.slice(); | ||
| resultDims[dim] = tensors.reduce((a, b) => a + b.dims[dim], 0); | ||
| // Create a new array to store the accumulated values | ||
| const resultSize = resultDims.reduce((a, b) => a * b, 1); | ||
| // @ts-ignore | ||
| const result = new tensors[0].data.constructor(resultSize); | ||
| // Create output tensor of same type as first | ||
| const resultType = tensors[0].type; | ||
| if (dim === 0) { | ||
| // Handle special case for performance reasons | ||
| let offset = 0; | ||
| for (let t of tensors) { | ||
| result.set(t.data, offset); | ||
| offset += t.data.length; | ||
| } | ||
| } else { | ||
| let currentDim = 0; | ||
| for (let t = 0; t < tensors.length; ++t) { | ||
| let tensor = tensors[t]; | ||
| // Iterate over the data array | ||
| for (let i = 0; i < tensor.data.length; ++i) { | ||
| // Calculate the index in the resulting array | ||
| let resultIndex = 0; | ||
| for ( | ||
| let j = tensor.dims.length - 1, num = i, resultMultiplier = 1; | ||
| j >= 0; | ||
| --j | ||
| ) { | ||
| const size = tensor.dims[j]; | ||
| let index = num % size; | ||
| if (j === dim) { | ||
| index += currentDim; | ||
| } | ||
| resultIndex += index * resultMultiplier; | ||
| resultMultiplier *= resultDims[j]; | ||
| num = Math.floor(num / size); | ||
| } | ||
| // Accumulate the value at the current index | ||
| result[resultIndex] = tensor.data[i]; | ||
| } | ||
| currentDim += tensor.dims[dim]; | ||
| } | ||
| } | ||
| return new Tensor(resultType, result, resultDims); | ||
| } | ||
| /** | ||
| * Stack an array of tensors along a specified dimension. | ||
| * @param {Tensor[]} tensors The array of tensors to stack. | ||
| * @param {number} dim The dimension to stack along. | ||
| * @returns {Tensor} The stacked tensor. | ||
| */ | ||
| export function stack(tensors, dim = 0) { | ||
| // TODO do validation of shapes | ||
| // NOTE: stack expects each tensor to be equal size | ||
| return cat( | ||
| tensors.map((t) => t.unsqueeze(dim)), | ||
| dim | ||
| ); | ||
| } | ||
| /** | ||
| * Calculates the standard deviation and mean over the dimensions specified by dim. dim can be a single dimension or `null` to reduce over all dimensions. | ||
| * @param {Tensor} input the input tenso | ||
| * @param {number|null} dim the dimension to reduce. If None, all dimensions are reduced. | ||
| * @param {number} correction difference between the sample size and sample degrees of freedom. Defaults to Bessel's correction, correction=1. | ||
| * @param {boolean} keepdim whether the output tensor has dim retained or not. | ||
| * @returns {Tensor[]} A tuple of (std, mean) tensors. | ||
| */ | ||
| export function std_mean(input, dim = null, correction = 1, keepdim = false) { | ||
| if (dim === null) { | ||
| // None to reduce over all dimensions. | ||
| // @ts-ignore | ||
| const sum = input.data.reduce((a, b) => a + b, 0); | ||
| const mean = sum / input.data.length; | ||
| // @ts-ignore | ||
| const std = Math.sqrt( | ||
| input.data.reduce((a, b) => a + (b - mean) ** 2, 0) / | ||
| (input.data.length - correction) | ||
| ); | ||
| const meanTensor = new Tensor( | ||
| input.type, | ||
| [mean], | ||
| [ | ||
| /* scalar */ | ||
| ] | ||
| ); | ||
| const stdTensor = new Tensor( | ||
| input.type, | ||
| [std], | ||
| [ | ||
| /* scalar */ | ||
| ] | ||
| ); | ||
| return [stdTensor, meanTensor]; | ||
| } | ||
| // Negative indexing | ||
| dim = safeIndex(dim, input.dims.length); | ||
| const meanTensor = mean(input, dim, keepdim); | ||
| // Calculate the shape of the resulting array after summation | ||
| const resultDims = input.dims.slice(); // Copy the original dimensions | ||
| resultDims[dim] = 1; // Remove the specified axis | ||
| // Create a new array to store the accumulated values | ||
| // @ts-ignore | ||
| const result = new input.data.constructor( | ||
| input.data.length / input.dims[dim] | ||
| ); | ||
| // Iterate over the data array | ||
| for (let i = 0; i < input.data.length; ++i) { | ||
| // Calculate the index in the resulting array | ||
| let resultIndex = 0; | ||
| for ( | ||
| let j = input.dims.length - 1, num = i, resultMultiplier = 1; | ||
| j >= 0; | ||
| --j | ||
| ) { | ||
| const size = input.dims[j]; | ||
| if (j !== dim) { | ||
| const index = num % size; | ||
| resultIndex += index * resultMultiplier; | ||
| resultMultiplier *= resultDims[j]; | ||
| } | ||
| num = Math.floor(num / size); | ||
| } | ||
| // Accumulate the value at the current index | ||
| result[resultIndex] += (input.data[i] - meanTensor.data[resultIndex]) ** 2; | ||
| } | ||
| for (let i = 0; i < result.length; ++i) { | ||
| result[i] = Math.sqrt(result[i] / (input.dims[dim] - correction)); | ||
| } | ||
| if (!keepdim) { | ||
| resultDims.splice(dim, 1); | ||
| } | ||
| const stdTensor = new Tensor(input.type, result, resultDims); | ||
| return [stdTensor, meanTensor]; | ||
| } | ||
| /** | ||
| * Returns the mean value of each row of the input tensor in the given dimension dim. | ||
| * @param {Tensor} input the input tensor. | ||
| * @param {number|null} dim the dimension to reduce. | ||
| * @param {boolean} keepdim whether the output tensor has dim retained or not. | ||
| * @returns A new tensor with means taken along the specified dimension. | ||
| */ | ||
| export function mean(input, dim = null, keepdim = false) { | ||
| if (dim === null) { | ||
| // None to reduce over all dimensions. | ||
| // @ts-ignore | ||
| let val = input.data.reduce((a, b) => a + b, 0); | ||
| return new Tensor( | ||
| input.type, | ||
| [val / input.data.length], | ||
| [ | ||
| /* scalar */ | ||
| ] | ||
| ); | ||
| } | ||
| // Negative indexing | ||
| dim = safeIndex(dim, input.dims.length); | ||
| // Calculate the shape of the resulting array after summation | ||
| const resultDims = input.dims.slice(); // Copy the original dimensions | ||
| resultDims[dim] = 1; // Remove the specified axis | ||
| // Create a new array to store the accumulated values | ||
| // @ts-ignore | ||
| const result = new input.data.constructor( | ||
| input.data.length / input.dims[dim] | ||
| ); | ||
| // Iterate over the data array | ||
| for (let i = 0; i < input.data.length; ++i) { | ||
| // Calculate the index in the resulting array | ||
| let resultIndex = 0; | ||
| for ( | ||
| let j = input.dims.length - 1, num = i, resultMultiplier = 1; | ||
| j >= 0; | ||
| --j | ||
| ) { | ||
| const size = input.dims[j]; | ||
| if (j !== dim) { | ||
| const index = num % size; | ||
| resultIndex += index * resultMultiplier; | ||
| resultMultiplier *= resultDims[j]; | ||
| } | ||
| num = Math.floor(num / size); | ||
| } | ||
| // Accumulate the value at the current index | ||
| result[resultIndex] += input.data[i]; | ||
| } | ||
| if (input.dims[dim] !== 1) { | ||
| for (let i = 0; i < result.length; ++i) { | ||
| result[i] = result[i] / input.dims[dim]; | ||
| } | ||
| } | ||
| if (!keepdim) { | ||
| resultDims.splice(dim, 1); | ||
| } | ||
| return new Tensor(input.type, result, resultDims); | ||
| } | ||
| /** | ||
| * | ||
| * Measures similarity between two temporal sequences (e.g., input audio and output tokens | ||
| * to generate token-level timestamps). | ||
| * @param {Tensor} matrix | ||
| * @returns {number[][]} | ||
| */ | ||
| export function dynamicTimeWarping(matrix) { | ||
| const [output_length, input_length] = matrix.dims; | ||
| const outputShape = [output_length + 1, input_length + 1]; | ||
| const cost = new Tensor( | ||
| "float32", | ||
| new Float32Array(outputShape[0] * outputShape[1]).fill(Infinity), | ||
| outputShape | ||
| ); | ||
| const trace = new Tensor( | ||
| "float32", | ||
| new Float32Array(outputShape[0] * outputShape[1]).fill(-1), | ||
| outputShape | ||
| ); | ||
| // same as `cost[0][0] = 0`; | ||
| cost[0].data[0] = 0; | ||
| for (let j = 1; j < input_length + 1; ++j) { | ||
| for (let i = 1; i < output_length + 1; ++i) { | ||
| const c0 = cost[i - 1][j - 1].item(); | ||
| const c1 = cost[i - 1][j].item(); | ||
| const c2 = cost[i][j - 1].item(); | ||
| let c, t; | ||
| if (c0 < c1 && c0 < c2) { | ||
| c = c0; | ||
| t = 0; | ||
| } else if (c1 < c0 && c1 < c2) { | ||
| c = c1; | ||
| t = 1; | ||
| } else { | ||
| c = c2; | ||
| t = 2; | ||
| } | ||
| cost[i].data[j] = matrix[i - 1][j - 1].item() + c; | ||
| trace[i].data[j] = t; | ||
| } | ||
| } | ||
| // backtrace | ||
| let i = output_length; | ||
| let j = input_length; | ||
| // @ts-ignore | ||
| trace.data.fill(2, 0, outputShape[1]); // trace[0, :] = 2 | ||
| for (let i = 0; i < outputShape[0]; ++i) { | ||
| // trace[:, 0] = 1 | ||
| trace[i].data[0] = 1; | ||
| } | ||
| let text_indices = []; | ||
| let time_indices = []; | ||
| while (i > 0 || j > 0) { | ||
| text_indices.push(i - 1); | ||
| time_indices.push(j - 1); | ||
| const t = trace[i][j].item(); | ||
| switch (t) { | ||
| case 0: | ||
| --i; | ||
| --j; | ||
| break; | ||
| case 1: | ||
| --i; | ||
| break; | ||
| case 2: | ||
| --j; | ||
| break; | ||
| default: | ||
| throw new Error( | ||
| `Internal error in dynamic time warping. Unexpected trace[${i}, ${j}]. Please file a bug report.` | ||
| ); | ||
| } | ||
| } | ||
| text_indices.reverse(); | ||
| time_indices.reverse(); | ||
| return [text_indices, time_indices]; | ||
| } | ||
| function dimsToStride(dims) { | ||
| const stride = new Array(dims.length); | ||
| for (let i = dims.length - 1, s2 = 1; i >= 0; --i) { | ||
| stride[i] = s2; | ||
| s2 *= dims[i]; | ||
| } | ||
| return stride; | ||
| } | ||
| /** | ||
| * Returns a tensor filled with the scalar value 1, with the shape defined by the variable argument size. | ||
| * @param {number[]} size A sequence of integers defining the shape of the output tensor. | ||
| */ | ||
| export function ones(size) { | ||
| const numElements = size.reduce((a, b) => a * b, 1); | ||
| return new Tensor("int64", new BigInt64Array(numElements).fill(1n), size); | ||
| } | ||
| /** | ||
| * Returns a tensor filled with the scalar value 1, with the same size as input. | ||
| * @param {Tensor} tensor The size of input will determine size of the output tensor. | ||
| * @returns The ones tensor. | ||
| */ | ||
| export function ones_like(tensor) { | ||
| return ones(tensor.dims); | ||
| } | ||
| /** | ||
| * Quantizes the embeddings tensor to binary or unsigned binary precision. | ||
| * @param {Tensor} tensor The tensor to quantize. | ||
| * @param {'binary'|'ubinary'} precision The precision to use for quantization. | ||
| * @returns {Tensor} The quantized tensor. | ||
| */ | ||
| export function quantize_embeddings(tensor, precision) { | ||
| if (tensor.dims.length !== 2) { | ||
| throw new Error("The tensor must have 2 dimensions"); | ||
| } | ||
| if (tensor.dims.at(-1) % 8 !== 0) { | ||
| throw new Error("The last dimension of the tensor must be a multiple of 8"); | ||
| } | ||
| if (!["binary", "ubinary"].includes(precision)) { | ||
| throw new Error("The precision must be either 'binary' or 'ubinary'"); | ||
| } | ||
| const signed = precision === "binary"; | ||
| const dtype = signed ? "int8" : "uint8"; | ||
| // Create a typed array to store the packed bits | ||
| const cls = signed ? Int8Array : Uint8Array; | ||
| const inputData = tensor.data; | ||
| const outputData = new cls(inputData.length / 8); | ||
| // Iterate over each number in the array | ||
| for (let i = 0; i < inputData.length; ++i) { | ||
| // Determine if the number is greater than 0 | ||
| const bit = inputData[i] > 0 ? 1 : 0; | ||
| // Calculate the index in the typed array and the position within the byte | ||
| const arrayIndex = Math.floor(i / 8); | ||
| const bitPosition = i % 8; | ||
| // Pack the bit into the typed array | ||
| outputData[arrayIndex] |= bit << (7 - bitPosition); | ||
| if (signed && bitPosition === 0) { | ||
| outputData[arrayIndex] -= 128; | ||
| } | ||
| } | ||
| return new Tensor(dtype, outputData, [tensor.dims[0], tensor.dims[1] / 8]); | ||
| } |
+181
| import type { | ||
| AutoTokenizer, | ||
| PreTrainedTokenizer, | ||
| } from "./tokenizers/tokenizers"; | ||
| export type TokenizerMapping = typeof AutoTokenizer.TOKENIZER_CLASS_MAPPING; | ||
| export type SupportedTokenizerClasses = keyof TokenizerMapping; | ||
| export type TokenizerClassNameMapping<T extends string> = | ||
| T extends SupportedTokenizerClasses | ||
| ? InstanceType<TokenizerMapping[T]> | ||
| : PreTrainedTokenizer; | ||
| export type TokenizerConfigMapping< | ||
| Config extends { | ||
| tokenizer_class: string; | ||
| } | ||
| > = Config["tokenizer_class"] extends SupportedTokenizerClasses | ||
| ? TokenizerMapping[Config["tokenizer_class"]] | ||
| : PreTrainedTokenizer; | ||
| type ValueOf<T> = T[keyof T]; | ||
| export type FromPreTrainedFn< | ||
| M extends InstanceType<ValueOf<TokenizerMapping>> | ||
| > = (params?: { | ||
| // TODO: types | ||
| tokenizerJSON?: Partial<NSTokenizerJSON.Root>; | ||
| tokenizerConfig?: Partial<NSTokenizerConfig.Root>; | ||
| }) => M; | ||
| export namespace NSTokenizerConfig { | ||
| // TODO full types | ||
| export type Root = { | ||
| add_prefix_space?: any; | ||
| bos_token?: any; | ||
| clean_up_tokenization_spaces: boolean; | ||
| eos_token: any; | ||
| model_max_length: number; | ||
| tokenizer_class: string; | ||
| unk_token: any; | ||
| chat_template?: any; | ||
| add_bos_token?: boolean; | ||
| add_eos_token?: boolean; | ||
| added_tokens_decoder?: { [key: string]: AddedTokensDecoder }; | ||
| legacy?: boolean | null; | ||
| merges_file?: null; | ||
| pad_token?: any; | ||
| sp_model_kwargs?: any; | ||
| spaces_between_special_tokens?: boolean; | ||
| use_default_system_prompt?: boolean; | ||
| vocab_file?: null; | ||
| auto_map?: any; | ||
| do_lower_case?: boolean; | ||
| padding_side?: string; | ||
| remove_space?: boolean; | ||
| additional_special_tokens?: string[]; | ||
| errors?: string; | ||
| split_special_tokens?: boolean; | ||
| }; | ||
| export type AddedTokensDecoder = { | ||
| content: string; | ||
| lstrip: boolean; | ||
| normalized: boolean; | ||
| rstrip: boolean; | ||
| single_word: boolean; | ||
| special: boolean; | ||
| }; | ||
| export type AutoMap = { | ||
| AutoTokenizer: Array<null | string>; | ||
| }; | ||
| export type ChatTemplateElement = { | ||
| name: string; | ||
| template: string; | ||
| }; | ||
| export type SPModelKwargs = {}; | ||
| } | ||
| export namespace NSTokenizerJSON { | ||
| // TODO full types | ||
| export type Root = { | ||
| version: string; | ||
| truncation: null; | ||
| padding: null; | ||
| added_tokens: any[]; | ||
| normalizer: any; | ||
| pre_tokenizer: any; | ||
| post_processor: any; | ||
| decoder: any; | ||
| model: any; | ||
| }; | ||
| export type AddedToken = { | ||
| id: number; | ||
| content: string; | ||
| single_word: boolean; | ||
| lstrip: boolean; | ||
| rstrip: boolean; | ||
| normalized: boolean; | ||
| special: boolean; | ||
| }; | ||
| export type PretokenizerElement = { | ||
| type: string; | ||
| decoders?: DecoderDecoder[]; | ||
| add_prefix_space?: boolean; | ||
| trim_offsets?: boolean; | ||
| use_regex?: boolean; | ||
| individual_digits?: boolean; | ||
| }; | ||
| export type DecoderDecoder = { | ||
| type: string; | ||
| pattern?: Pattern; | ||
| content?: string; | ||
| start?: number; | ||
| stop?: number; | ||
| }; | ||
| export type Pattern = { | ||
| String: string; | ||
| }; | ||
| export type Model = { | ||
| type: string; | ||
| dropout: null; | ||
| unk_token: any; | ||
| continuing_subword_prefix: null; | ||
| end_of_word_suffix: null; | ||
| fuse_unk: boolean; | ||
| byte_fallback: boolean; | ||
| vocab: { [key: string]: number }; | ||
| merges: string[]; | ||
| }; | ||
| export type TopLevelNormalizer = { | ||
| type: string; | ||
| normalizers?: NormalizerElement[]; | ||
| }; | ||
| export type NormalizerElement = { | ||
| type: string; | ||
| prepend?: string; | ||
| pattern?: Pattern; | ||
| content?: string; | ||
| }; | ||
| export type PostProcessor = { | ||
| type: string; | ||
| single: Pair[]; | ||
| pair: Pair[]; | ||
| special_tokens: { [key: string]: SpecialToken }; | ||
| add_prefix_space?: boolean; | ||
| trim_offsets?: boolean; | ||
| use_regex?: boolean; | ||
| }; | ||
| export type Pair = { | ||
| SpecialToken?: Sequence; | ||
| Sequence?: Sequence; | ||
| }; | ||
| export type Sequence = { | ||
| id: string; | ||
| type_id: number; | ||
| }; | ||
| export type SpecialToken = { | ||
| id: string; | ||
| ids: number[]; | ||
| tokens: string[]; | ||
| }; | ||
| export type PreTokenizer = { | ||
| type: string; | ||
| pretokenizers?: PretokenizerElement[]; | ||
| }; | ||
| } |
+3
-2
| { | ||
| "name": "@lenml/tokenizers", | ||
| "version": "1.0.6", | ||
| "version": "1.0.7", | ||
| "description": "a lightweight no-dependency fork of transformers.js (only tokenizers)", | ||
@@ -21,3 +21,4 @@ "source": "src/main.ts", | ||
| "dist/**.mjs*", | ||
| "dist/**.ts" | ||
| "dist/**.ts", | ||
| "src/**/*" | ||
| ], | ||
@@ -24,0 +25,0 @@ "keywords": [ |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
1891765
15.54%20
100%9435
282.29%5
25%19
90%