Big News: Socket raises $60M Series C at a $1B valuation to secure software supply chains for AI-driven development.Announcement
Sign In

@lenml/tokenizers

Package Overview
Dependencies
Maintainers
1
Versions
15
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@lenml/tokenizers - npm Package Compare versions

Comparing version
1.0.6
to
1.0.7
+3
src/main.ts
export * from "./TokenizerLoader";
export * as tokenizers from "./tokenizers/tokenizers";
export * from "./types";
import {
AutoTokenizer as _AutoTokenizer,
PreTrainedTokenizer,
} from "./tokenizers/tokenizers";
import { NSTokenizerConfig, NSTokenizerJSON } from "./types";
interface ITokenizerModelJsonData {
tokenizerJSON: Partial<NSTokenizerJSON.Root>;
tokenizerConfig: Partial<NSTokenizerConfig.Root>;
}
interface ITokenizerModelUrls {
tokenizerJSON: string;
tokenizerConfig: string;
}
export class TokenizerLoader {
/**
* Creates a pre-trained tokenizer from the provided model data.
*
* @param {ITokenizerModelJsonData} model - The model data containing the tokenizer JSON and configuration.
* @return {PreTrainedTokenizer} pre-trained tokenizer.
* @throws {Error} If the tokenizer JSON or configuration is missing.
*/
static fromPreTrained(model: ITokenizerModelJsonData): PreTrainedTokenizer {
const { tokenizerJSON, tokenizerConfig } = model;
if (!tokenizerJSON) {
throw new Error("tokenizerJSON is required.");
}
if (!tokenizerConfig) {
throw new Error("tokenizerConfig is required.");
}
// Some tokenizers are saved with the "Fast" suffix, so we remove that if present.
const tokenizerName =
tokenizerConfig.tokenizer_class?.replace(/Fast$/, "") ??
"PreTrainedTokenizer";
let cls = (_AutoTokenizer as any).TOKENIZER_CLASS_MAPPING[tokenizerName];
if (!cls) {
console.warn(
`Unknown tokenizer class "${tokenizerName}", attempting to construct from base class.`
);
cls = PreTrainedTokenizer;
}
return new cls(tokenizerJSON, tokenizerConfig);
}
/**
* Creates a pre-trained tokenizer from the provided model URLs.
*
* @param {ITokenizerModelUrls} model - The model URLs containing the tokenizer JSON and configuration.
* @param {Object} [options] - Optional parameters.
* @param {any} [options.fetch] - The fetch function to use for making HTTP requests. Defaults to global.fetch.
* @param {Partial<ITokenizerModelJsonData>} [options.tokenizerJSON] - Additional tokenizer JSON data to merge with the fetched data.
* @param {Partial<ITokenizerModelJsonData>} [options.tokenizerConfig] - Additional tokenizer configuration data to merge with the fetched data.
* @return {Promise<PreTrainedTokenizer>} A promise that resolves to the pre-trained tokenizer.
*/
static async fromPreTrainedUrls(
model: ITokenizerModelUrls,
options?: {
fetch?: any;
} & Partial<ITokenizerModelJsonData>
) {
const fetch =
(options?.fetch as typeof global.fetch) ??
globalThis.fetch.bind(globalThis);
const [tokenizerJSON, tokenizerConfig] = await Promise.all([
fetch(model.tokenizerJSON).then((res) => res.json()),
fetch(model.tokenizerConfig).then((res) => res.json()),
]);
return TokenizerLoader.fromPreTrained({
tokenizerJSON: {
...tokenizerJSON,
...options?.tokenizerJSON,
},
tokenizerConfig: {
...tokenizerConfig,
...options?.tokenizerConfig,
},
});
}
}
# tokenizers.js
this code fork from `https://github.com/huggingface/transformers`
# What changes?
- remove onnx dependencies
- remove env dependencies (esm/hf_repo_downloader)
# License
Apache-2.0

Sorry, the diff of this file is too big to display

/**
* @file Core utility functions/classes for Transformers.js.
*
* These are only used internally, meaning an end-user shouldn't
* need to access anything here.
*
* @module utils/core
*/
/**
* Helper function to dispatch progress callbacks.
*
* @param {Function} progress_callback The progress callback function to dispatch.
* @param {any} data The data to pass to the progress callback function.
* @returns {void}
* @private
*/
export function dispatchCallback(progress_callback, data) {
if (progress_callback) progress_callback(data);
}
/**
* Reverses the keys and values of an object.
*
* @param {Object} data The object to reverse.
* @returns {Object} The reversed object.
* @see https://ultimatecourses.com/blog/reverse-object-keys-and-values-in-javascript
*/
export function reverseDictionary(data) {
// https://ultimatecourses.com/blog/reverse-object-keys-and-values-in-javascript
return Object.fromEntries(
Object.entries(data).map(([key, value]) => [value, key])
);
}
/**
* Escapes regular expression special characters from a string by replacing them with their escaped counterparts.
*
* @param {string} string The string to escape.
* @returns {string} The escaped string.
*/
export function escapeRegExp(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
}
/**
* A base class for creating callable objects.
*
* @type {new () => {(...args: any[]): any, _call(...args: any[]): any}}
*/
export const Callable = /** @type {any} */ (
class {
/**
* Creates a new instance of the Callable class.
*/
constructor() {
/**
* Creates a closure that delegates to a private method '_call' with the given arguments.
* @type {any}
* @param {...any} args Zero or more arguments to pass to the '_call' method.
* @returns {*} The result of calling the '_call' method.
*/
let closure = function (...args) {
return closure._call(...args);
};
return Object.setPrototypeOf(closure, new.target.prototype);
}
/**
* This method should be implemented in subclasses to provide the
* functionality of the callable object.
*
* @param {any[]} args
* @throws {Error} If the subclass does not implement the `_call` method.
*/
_call(...args) {
throw Error("Must implement _call method in subclass");
}
}
);
/**
* Check if a value is a typed array.
* @param {*} val The value to check.
* @returns {boolean} True if the value is a `TypedArray`, false otherwise.
*
* Adapted from https://stackoverflow.com/a/71091338/13989043
*/
export function isTypedArray(val) {
return val?.prototype?.__proto__?.constructor?.name === "TypedArray";
}
/**
* Check if a value is an integer.
* @param {*} x The value to check.
* @returns {boolean} True if the value is a string, false otherwise.
*/
export function isIntegralNumber(x) {
return Number.isInteger(x) || typeof x === "bigint";
}
/**
* Check if a value is exists.
* @param {*} x The value to check.
* @returns {boolean} True if the value exists, false otherwise.
*/
export function exists(x) {
return x !== undefined && x !== null;
}
/**
* Calculates the dimensions of a nested array.
*
* @param {any[]} arr The nested array to calculate dimensions for.
* @returns {number[]} An array containing the dimensions of the input array.
*/
export function calculateDimensions(arr) {
const dimensions = [];
let current = arr;
while (Array.isArray(current)) {
dimensions.push(current.length);
current = current[0];
}
return dimensions;
}
/**
* Replicate python's .pop() method for objects.
* @param {Object} obj The object to pop from.
* @param {string} key The key to pop.
* @param {*} defaultValue The default value to return if the key does not exist.
* @returns {*} The value of the popped key.
* @throws {Error} If the key does not exist and no default value is provided.
*/
export function pop(obj, key, defaultValue = undefined) {
const value = obj[key];
if (value !== undefined) {
delete obj[key];
return value;
}
if (defaultValue === undefined) {
throw Error(`Key ${key} does not exist in object.`);
}
return defaultValue;
}
/**
* Efficiently merge arrays, creating a new copy.
* Adapted from https://stackoverflow.com/a/6768642/13989043
* @param {Array[]} arrs Arrays to merge.
* @returns {Array} The merged array.
*/
export function mergeArrays(...arrs) {
return Array.prototype.concat.apply([], arrs);
}
/**
* Compute the Cartesian product of given arrays
* @param {...Array} a Arrays to compute the product
* @returns {Array} Returns the computed Cartesian product as an array
* @private
*/
export function product(...a) {
// Cartesian product of items
// Adapted from https://stackoverflow.com/a/43053803
return a.reduce((a, b) => a.flatMap((d) => b.map((e) => [d, e])));
}
/**
* Calculates the index offset for a given index and window size.
* @param {number} i The index.
* @param {number} w The window size.
* @returns {number} The index offset.
*/
export function calculateReflectOffset(i, w) {
return Math.abs(((i + w) % (2 * w)) - w);
}
/**
* @file Custom data structures.
*
* These are only used internally, meaning an end-user shouldn't
* need to access anything here.
*
* @module utils/data-structures
*/
/**
* Efficient Heap-based Implementation of a Priority Queue.
* It uses an array-based binary heap, where the root is at index `0`, and the
* children of node `i` are located at indices `2i + 1` and `2i + 2`, respectively.
*
* Adapted from the following sources:
* - https://stackoverflow.com/a/42919752/13989043 (original)
* - https://github.com/belladoreai/llama-tokenizer-js (minor improvements)
*/
export class PriorityQueue {
/**
* Create a new PriorityQueue.
* @param {Function} comparator Comparator function to determine priority. Defaults to a MaxHeap.
*/
constructor(comparator = (a, b) => a > b) {
this._heap = [];
this._comparator = comparator;
}
/**
* The size of the queue
*/
get size() {
return this._heap.length;
}
/**
* Check if the queue is empty.
* @returns {boolean} `true` if the queue is empty, `false` otherwise.
*/
isEmpty() {
return this.size === 0;
}
/**
* Return the element with the highest priority in the queue.
* @returns {any} The highest priority element in the queue.
*/
peek() {
return this._heap[0];
}
/**
* Add one or more elements to the queue.
* @param {...any} values The values to push into the queue.
* @returns {number} The new size of the queue.
*/
push(...values) {
return this.extend(values);
}
/**
* Add multiple elements to the queue.
* @param {any[]} values The values to push into the queue.
* @returns {number} The new size of the queue.
*/
extend(values) {
for (const value of values) {
this._heap.push(value);
this._siftUp();
}
return this.size;
}
/**
* Remove and return the element with the highest priority in the queue.
* @returns {any} The element with the highest priority in the queue.
*/
pop() {
const poppedValue = this.peek();
const bottom = this.size - 1;
if (bottom > 0) {
this._swap(0, bottom);
}
this._heap.pop();
this._siftDown();
return poppedValue;
}
/**
* Replace the element with the highest priority in the queue with a new value.
* @param {*} value The new value.
* @returns {*} The replaced value.
*/
replace(value) {
const replacedValue = this.peek();
this._heap[0] = value;
this._siftDown();
return replacedValue;
}
/**
* Compute the index for the parent of the node at index `i`.
* @param {number} i The index of the node to get the parent of.
* @returns {number} The index of the parent node.
* @private
*/
_parent(i) {
return ((i + 1) >>> 1) - 1;
}
/**
* Compute the index for the left child of the node at index `i`.
* @param {number} i The index of the node to get the left child of.
* @returns {number} The index of the left child.
* @private
*/
_left(i) {
return (i << 1) + 1;
}
/**
* Compute the index for the right child of the node at index `i`.
* @param {number} i The index of the node to get the right child of.
* @returns {number} The index of the right child.
* @private
*/
_right(i) {
return (i + 1) << 1;
}
/**
* Check if the element at index `i` is greater than the element at index `j`.
* @param {number} i The index of the first element to compare.
* @param {number} j The index of the second element to compare.
* @returns {boolean} `true` if the element at index `i` is greater than the element at index `j`, `false` otherwise.
* @private
*/
_greater(i, j) {
return this._comparator(this._heap[i], this._heap[j]);
}
/**
* Swap the elements at indices `i` and `j`.
* @param {number} i The index of the first element to swap.
* @param {number} j The index of the second element to swap.
* @private
*/
_swap(i, j) {
const temp = this._heap[i];
this._heap[i] = this._heap[j];
this._heap[j] = temp;
}
/**
* Maintain the heap property by updating positions in the heap,
* starting at the last element and moving up the heap.
* @private
*/
_siftUp() {
let node = this.size - 1;
while (node > 0 && this._greater(node, this._parent(node))) {
this._swap(node, this._parent(node));
node = this._parent(node);
}
}
/**
* Maintain the heap property by updating positions in the heap,
* starting at the first element and moving down the heap.
* @private
*/
_siftDown() {
let node = 0;
while (
(this._left(node) < this.size && this._greater(this._left(node), node)) ||
(this._right(node) < this.size && this._greater(this._right(node), node))
) {
const maxChild =
this._right(node) < this.size &&
this._greater(this._right(node), this._left(node))
? this._right(node)
: this._left(node);
this._swap(node, maxChild);
node = maxChild;
}
}
}
/**
* A trie structure to efficiently store and search for strings.
*/
export class CharTrie {
constructor() {
this.root = CharTrieNode.default();
}
/**
* Adds one or more `texts` to the trie.
* @param {string[]} texts The strings to add to the trie.
*/
extend(texts) {
for (let text of texts) {
this.push(text);
}
}
/**
* Adds text to the trie.
* @param {string} text The string to add to the trie.
*/
push(text) {
let node = this.root;
for (let ch of text) {
let child = node.children.get(ch);
if (child === undefined) {
child = CharTrieNode.default();
node.children.set(ch, child);
}
node = child;
}
node.isLeaf = true;
}
/**
* Searches the trie for all strings with a common prefix of `text`.
* @param {string} text The common prefix to search for.
* @yields {string} Each string in the trie that has `text` as a prefix.
*/
*commonPrefixSearch(text) {
let node = this.root;
let prefix = "";
for (let i = 0; i < text.length && node !== undefined; ++i) {
const ch = text[i];
prefix += ch;
node = node.children.get(ch);
if (node !== undefined && node.isLeaf) {
yield prefix;
}
}
}
}
/**
* Represents a node in a character trie.
*/
class CharTrieNode {
/**
* Create a new CharTrieNode.
* @param {boolean} isLeaf Whether the node is a leaf node or not.
* @param {Map<string, CharTrieNode>} children A map containing the node's children, where the key is a character and the value is a `CharTrieNode`.
*/
constructor(isLeaf, children) {
this.isLeaf = isLeaf;
this.children = children;
}
/**
* Returns a new `CharTrieNode` instance with default values.
* @returns {CharTrieNode} A new `CharTrieNode` instance with `isLeaf` set to `false` and an empty `children` map.
*/
static default() {
return new CharTrieNode(false, new Map());
}
}
/**
* A lattice data structure to be used for tokenization.
*/
export class TokenLattice {
/**
* Creates a new TokenLattice instance.
*
* @param {string} sentence The input sentence to be tokenized.
* @param {number} bosTokenId The beginning-of-sequence token ID.
* @param {number} eosTokenId The end-of-sequence token ID.
*/
constructor(sentence, bosTokenId, eosTokenId) {
this.sentence = sentence;
this.len = sentence.length;
this.bosTokenId = bosTokenId;
this.eosTokenId = eosTokenId;
this.nodes = [];
this.beginNodes = Array.from({ length: this.len + 1 }, () => []);
this.endNodes = Array.from({ length: this.len + 1 }, () => []);
const bos = new TokenLatticeNode(this.bosTokenId, 0, 0, 0, 0.0);
const eos = new TokenLatticeNode(this.eosTokenId, 1, this.len, 0, 0.0);
this.nodes.push(bos.clone());
this.nodes.push(eos.clone());
this.beginNodes[this.len].push(eos);
this.endNodes[0].push(bos);
}
/**
* Inserts a new token node into the token lattice.
*
* @param {number} pos The starting position of the token.
* @param {number} length The length of the token.
* @param {number} score The score of the token.
* @param {number} tokenId The token ID of the token.
*/
insert(pos, length, score, tokenId) {
const nodeId = this.nodes.length;
const node = new TokenLatticeNode(tokenId, nodeId, pos, length, score);
this.beginNodes[pos].push(node);
this.endNodes[pos + length].push(node);
this.nodes.push(node);
}
/**
* Implements the Viterbi algorithm to compute the most likely sequence of tokens.
*
* @returns {TokenLatticeNode[]} The array of nodes representing the most likely sequence of tokens.
*/
viterbi() {
const len = this.len;
let pos = 0;
while (pos <= len) {
if (this.beginNodes[pos].length == 0) {
return [];
}
for (let rnode of this.beginNodes[pos]) {
rnode.prev = null;
let bestScore = 0.0;
let bestNode = null;
for (let lnode of this.endNodes[pos]) {
const score = lnode.backtraceScore + rnode.score;
if (bestNode === null || score > bestScore) {
bestNode = lnode.clone();
bestScore = score;
}
}
if (bestNode !== null) {
rnode.prev = bestNode;
rnode.backtraceScore = bestScore;
} else {
return [];
}
}
++pos;
}
const results = [];
const root = this.beginNodes[len][0];
const prev = root.prev;
if (prev === null) {
return [];
}
let node = prev.clone();
while (node.prev !== null) {
results.push(node.clone());
const n = node.clone();
node = n.prev.clone();
}
results.reverse();
return results;
}
/**
* @param {TokenLatticeNode} node
* @returns {string} The array of nodes representing the most likely sequence of tokens.
*/
piece(node) {
return this.sentence.slice(node.pos, node.pos + node.length);
}
/**
* @returns {Array} The array of nodes representing the most likely sequence of tokens.
*/
tokens() {
const nodes = this.viterbi();
return nodes.map((x) => this.piece(x));
}
/**
* @returns {Array} The array of nodes representing the most likely sequence of tokens.
*/
tokenIds() {
const nodes = this.viterbi();
return nodes.map((x) => x.tokenId);
}
}
class TokenLatticeNode {
/**
* Represents a node in a token lattice for a given sentence.
* @param {number} tokenId The ID of the token associated with this node.
* @param {number} nodeId The ID of this node.
* @param {number} pos The starting position of the token in the sentence.
* @param {number} length The length of the token.
* @param {number} score The score associated with the token.
*/
constructor(tokenId, nodeId, pos, length, score) {
this.tokenId = tokenId;
this.nodeId = nodeId;
this.pos = pos;
this.length = length;
this.score = score;
this.prev = null;
this.backtraceScore = 0.0;
}
/**
* Returns a clone of this node.
* @returns {TokenLatticeNode} A clone of this node.
*/
clone() {
const n = new TokenLatticeNode(
this.tokenId,
this.nodeId,
this.pos,
this.length,
this.score
);
n.prev = this.prev;
n.backtraceScore = this.backtraceScore;
return n;
}
}
/**
*
* Retrieves a file from either a remote URL using the Fetch API or from the local file system using the FileSystem API.
* If the filesystem is available and `env.useCache = true`, the file will be downloaded and cached.
*
* @param {string} path_or_repo_id This can be either:
* - a string, the *model id* of a model repo on huggingface.co.
* - a path to a *directory* potentially containing the file.
* @param {string} filename The name of the file to locate in `path_or_repo`.
* @param {boolean} [fatal=true] Whether to throw an error if the file is not found.
* @param {PretrainedOptions} [options] An object containing optional parameters.
*
* @throws Will throw an error if the file is not found and `fatal` is true.
* @returns {Promise} A Promise that resolves with the file content as a buffer.
*/
export async function getModelFile(
path_or_repo_id,
filename,
fatal = true,
options = {}
) {
if (path_or_repo_id.startsWith("http")) {
return fetch(path_or_repo_id + filename).then((response) => {
if (!response.ok) {
if (fatal) {
throw new Error(`File not found at ${path_or_repo_id}${filename}`);
} else {
return null;
}
}
return response.arrayBuffer();
});
} else {
throw new Error(
"Filesystem not supported, please implement your own file reading logic."
);
}
}
/**
* Fetches a JSON file from a given path and file name.
*
* @param {string} modelPath The path to the directory containing the file.
* @param {string} fileName The name of the file to fetch.
* @param {boolean} [fatal=true] Whether to throw an error if the file is not found.
* @param {PretrainedOptions} [options] An object containing optional parameters.
* @returns {Promise<Object>} The JSON data parsed into a JavaScript object.
* @throws Will throw an error if the file is not found and `fatal` is true.
*/
export async function getModelJSON(
modelPath,
fileName,
fatal = true,
options = {}
) {
let buffer = await getModelFile(modelPath, fileName, fatal, options);
if (buffer === null) {
// Return empty object
return {};
}
let decoder = new TextDecoder("utf-8");
let jsonData = decoder.decode(buffer);
return JSON.parse(jsonData);
}
/**
* @file Helper module for mathematical processing.
*
* These functions and classes are only used internally,
* meaning an end-user shouldn't need to access anything here.
*
* @module utils/maths
*/
/**
* @typedef {Int8Array | Uint8Array | Uint8ClampedArray | Int16Array | Uint16Array | Int32Array | Uint32Array | Float32Array | Float64Array} TypedArray
* @typedef {BigInt64Array | BigUint64Array} BigTypedArray
* @typedef {TypedArray | BigTypedArray} AnyTypedArray
*/
/**
* @param {TypedArray} input
*/
export function interpolate_data(
input,
[in_channels, in_height, in_width],
[out_height, out_width],
mode = "bilinear",
align_corners = false
) {
// TODO use mode and align_corners
// Output image dimensions
const x_scale = out_width / in_width;
const y_scale = out_height / in_height;
// Output image
// @ts-ignore
const out_img = new input.constructor(out_height * out_width * in_channels);
// Pre-calculate strides
const inStride = in_height * in_width;
const outStride = out_height * out_width;
for (let i = 0; i < out_height; ++i) {
for (let j = 0; j < out_width; ++j) {
// Calculate output offset
const outOffset = i * out_width + j;
// Calculate input pixel coordinates
const x = (j + 0.5) / x_scale - 0.5;
const y = (i + 0.5) / y_scale - 0.5;
// Calculate the four nearest input pixels
// We also check if the input pixel coordinates are within the image bounds
let x1 = Math.floor(x);
let y1 = Math.floor(y);
const x2 = Math.min(x1 + 1, in_width - 1);
const y2 = Math.min(y1 + 1, in_height - 1);
x1 = Math.max(x1, 0);
y1 = Math.max(y1, 0);
// Calculate the fractional distances between the input pixel and the four nearest pixels
const s = x - x1;
const t = y - y1;
// Perform bilinear interpolation
const w1 = (1 - s) * (1 - t);
const w2 = s * (1 - t);
const w3 = (1 - s) * t;
const w4 = s * t;
// Calculate the four nearest input pixel indices
const yStride = y1 * in_width;
const xStride = y2 * in_width;
const idx1 = yStride + x1;
const idx2 = yStride + x2;
const idx3 = xStride + x1;
const idx4 = xStride + x2;
for (let k = 0; k < in_channels; ++k) {
// Calculate channel offset
const cOffset = k * inStride;
out_img[k * outStride + outOffset] =
w1 * input[cOffset + idx1] +
w2 * input[cOffset + idx2] +
w3 * input[cOffset + idx3] +
w4 * input[cOffset + idx4];
}
}
}
return out_img;
}
/**
* Helper method to permute a `AnyTypedArray` directly
* @template {AnyTypedArray} T
* @param {T} array
* @param {number[]} dims
* @param {number[]} axes
* @returns {[T, number[]]} The permuted array and the new shape.
*/
export function permute_data(array, dims, axes) {
// Calculate the new shape of the permuted array
// and the stride of the original array
const shape = new Array(axes.length);
const stride = new Array(axes.length);
for (let i = axes.length - 1, s = 1; i >= 0; --i) {
stride[i] = s;
shape[i] = dims[axes[i]];
s *= shape[i];
}
// Precompute inverse mapping of stride
const invStride = axes.map((_, i) => stride[axes.indexOf(i)]);
// Create the permuted array with the new shape
// @ts-ignore
const permutedData = new array.constructor(array.length);
// Permute the original array to the new array
for (let i = 0; i < array.length; ++i) {
let newIndex = 0;
for (let j = dims.length - 1, k = i; j >= 0; --j) {
newIndex += (k % dims[j]) * invStride[j];
k = Math.floor(k / dims[j]);
}
permutedData[newIndex] = array[i];
}
return [permutedData, shape];
}
/**
* Compute the softmax of an array of numbers.
* @template {TypedArray|number[]} T
* @param {T} arr The array of numbers to compute the softmax of.
* @returns {T} The softmax array.
*/
export function softmax(arr) {
// Compute the maximum value in the array
const maxVal = max(arr)[0];
// Compute the exponentials of the array values
const exps = arr.map((x) => Math.exp(x - maxVal));
// Compute the sum of the exponentials
// @ts-ignore
const sumExps = exps.reduce((acc, val) => acc + val, 0);
// Compute the softmax values
const softmaxArr = exps.map((x) => x / sumExps);
return /** @type {T} */ (softmaxArr);
}
/**
* Calculates the logarithm of the softmax function for the input array.
* @template {TypedArray|number[]} T
* @param {T} arr The input array to calculate the log_softmax function for.
* @returns {T} The resulting log_softmax array.
*/
export function log_softmax(arr) {
// Compute the softmax values
const softmaxArr = softmax(arr);
// Apply log formula to each element
const logSoftmaxArr = softmaxArr.map((x) => Math.log(x));
return /** @type {T} */ (logSoftmaxArr);
}
/**
* Calculates the dot product of two arrays.
* @param {number[]} arr1 The first array.
* @param {number[]} arr2 The second array.
* @returns {number} The dot product of arr1 and arr2.
*/
export function dot(arr1, arr2) {
let result = 0;
for (let i = 0; i < arr1.length; ++i) {
result += arr1[i] * arr2[i];
}
return result;
}
/**
* Get the top k items from an iterable, sorted by descending order
* @param {any[]|TypedArray} items The items to be sorted
* @param {number|null} [top_k=0] The number of top items to return (default: 0 = return all)
* @returns {[number, any][]} The top k items, sorted by descending order
*/
export function getTopItems(items, top_k = 0) {
// if top == 0, return all
items = Array.from(items)
.map((x, i) => [i, x]) // Get indices ([index, score])
.sort((a, b) => b[1] - a[1]); // Sort by log probabilities
if (top_k !== null && top_k > 0) {
items = items.slice(0, top_k); // Get top k items
}
return items;
}
/**
* Computes the cosine similarity between two arrays.
*
* @param {number[]} arr1 The first array.
* @param {number[]} arr2 The second array.
* @returns {number} The cosine similarity between the two arrays.
*/
export function cos_sim(arr1, arr2) {
// Calculate dot product of the two arrays
const dotProduct = dot(arr1, arr2);
// Calculate the magnitude of the first array
const magnitudeA = magnitude(arr1);
// Calculate the magnitude of the second array
const magnitudeB = magnitude(arr2);
// Calculate the cosine similarity
const cosineSimilarity = dotProduct / (magnitudeA * magnitudeB);
return cosineSimilarity;
}
/**
* Calculates the magnitude of a given array.
* @param {number[]} arr The array to calculate the magnitude of.
* @returns {number} The magnitude of the array.
*/
export function magnitude(arr) {
return Math.sqrt(arr.reduce((acc, val) => acc + val * val, 0));
}
/**
* Returns the value and index of the minimum element in an array.
* @param {number[]|TypedArray} arr array of numbers.
* @returns {number[]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin]
* @throws {Error} If array is empty.
*/
export function min(arr) {
if (arr.length === 0) throw Error("Array must not be empty");
let min = arr[0];
let indexOfMin = 0;
for (let i = 1; i < arr.length; ++i) {
if (arr[i] < min) {
min = arr[i];
indexOfMin = i;
}
}
return [min, indexOfMin];
}
/**
* Returns the value and index of the maximum element in an array.
* @param {number[]|AnyTypedArray} arr array of numbers.
* @returns {[number, number]} the value and index of the maximum element, of the form: [valueOfMax, indexOfMax]
* @throws {Error} If array is empty.
*/
export function max(arr) {
if (arr.length === 0) throw Error("Array must not be empty");
let max = arr[0];
let indexOfMax = 0;
for (let i = 1; i < arr.length; ++i) {
if (arr[i] > max) {
max = arr[i];
indexOfMax = i;
}
}
return [Number(max), indexOfMax];
}
function isPowerOfTwo(number) {
// Check if the number is greater than 0 and has only one bit set to 1
return number > 0 && (number & (number - 1)) === 0;
}
/**
* Implementation of Radix-4 FFT.
*
* P2FFT class provides functionality for performing Fast Fourier Transform on arrays
* which are a power of two in length.
* Code adapted from https://www.npmjs.com/package/fft.js
*/
class P2FFT {
/**
* @param {number} size The size of the input array. Must be a power of two larger than 1.
* @throws {Error} FFT size must be a power of two larger than 1.
*/
constructor(size) {
this.size = size | 0; // convert to a 32-bit signed integer
if (this.size <= 1 || !isPowerOfTwo(this.size))
throw new Error("FFT size must be a power of two larger than 1");
this._csize = size << 1;
this.table = new Float64Array(this.size * 2);
for (let i = 0; i < this.table.length; i += 2) {
const angle = (Math.PI * i) / this.size;
this.table[i] = Math.cos(angle);
this.table[i + 1] = -Math.sin(angle);
}
// Find size's power of two
let power = 0;
for (let t = 1; this.size > t; t <<= 1) ++power;
// Calculate initial step's width:
// * If we are full radix-4, it is 2x smaller to give inital len=8
// * Otherwise it is the same as `power` to give len=4
this._width = power % 2 === 0 ? power - 1 : power;
// Pre-compute bit-reversal patterns
this._bitrev = new Int32Array(1 << this._width);
for (let j = 0; j < this._bitrev.length; ++j) {
this._bitrev[j] = 0;
for (let shift = 0; shift < this._width; shift += 2) {
const revShift = this._width - shift - 2;
this._bitrev[j] |= ((j >>> shift) & 3) << revShift;
}
}
}
/**
* Create a complex number array with size `2 * size`
*
* @returns {Float64Array} A complex number array with size `2 * size`
*/
createComplexArray() {
return new Float64Array(this._csize);
}
/**
* Converts a complex number representation stored in a Float64Array to an array of real numbers.
*
* @param {Float64Array} complex The complex number representation to be converted.
* @param {number[]} [storage] An optional array to store the result in.
* @returns {number[]} An array of real numbers representing the input complex number representation.
*/
fromComplexArray(complex, storage) {
const res = storage || new Array(complex.length >>> 1);
for (let i = 0; i < complex.length; i += 2) res[i >>> 1] = complex[i];
return res;
}
/**
* Convert a real-valued input array to a complex-valued output array.
* @param {Float64Array} input The real-valued input array.
* @param {Float64Array} [storage] Optional buffer to store the output array.
* @returns {Float64Array} The complex-valued output array.
*/
toComplexArray(input, storage) {
const res = storage || this.createComplexArray();
for (let i = 0; i < res.length; i += 2) {
res[i] = input[i >>> 1];
res[i + 1] = 0;
}
return res;
}
/**
* Completes the spectrum by adding its mirrored negative frequency components.
* @param {Float64Array} spectrum The input spectrum.
* @returns {void}
*/
completeSpectrum(spectrum) {
const size = this._csize;
const half = size >>> 1;
for (let i = 2; i < half; i += 2) {
spectrum[size - i] = spectrum[i];
spectrum[size - i + 1] = -spectrum[i + 1];
}
}
/**
* Performs a Fast Fourier Transform (FFT) on the given input data and stores the result in the output buffer.
*
* @param {Float64Array} out The output buffer to store the result.
* @param {Float64Array} data The input data to transform.
*
* @throws {Error} Input and output buffers must be different.
*
* @returns {void}
*/
transform(out, data) {
if (out === data)
throw new Error("Input and output buffers must be different");
this._transform4(out, data, 1 /* DONE */);
}
/**
* Performs a real-valued forward FFT on the given input buffer and stores the result in the given output buffer.
* The input buffer must contain real values only, while the output buffer will contain complex values. The input and
* output buffers must be different.
*
* @param {Float64Array} out The output buffer.
* @param {Float64Array} data The input buffer containing real values.
*
* @throws {Error} If the input and output buffers are the same.
*/
realTransform(out, data) {
if (out === data)
throw new Error("Input and output buffers must be different");
this._realTransform4(out, data, 1 /* DONE */);
}
/**
* Performs an inverse FFT transformation on the given `data` array, and stores the result in `out`.
* The `out` array must be a different buffer than the `data` array. The `out` array will contain the
* result of the transformation. The `data` array will not be modified.
*
* @param {Float64Array} out The output buffer for the transformed data.
* @param {Float64Array} data The input data to transform.
* @throws {Error} If `out` and `data` refer to the same buffer.
* @returns {void}
*/
inverseTransform(out, data) {
if (out === data)
throw new Error("Input and output buffers must be different");
this._transform4(out, data, -1 /* DONE */);
for (let i = 0; i < out.length; ++i) out[i] /= this.size;
}
/**
* Performs a radix-4 implementation of a discrete Fourier transform on a given set of data.
*
* @param {Float64Array} out The output buffer for the transformed data.
* @param {Float64Array} data The input buffer of data to be transformed.
* @param {number} inv A scaling factor to apply to the transform.
* @returns {void}
*/
_transform4(out, data, inv) {
// radix-4 implementation
const size = this._csize;
// Initial step (permute and transform)
const width = this._width;
let step = 1 << width;
let len = (size / step) << 1;
let outOff;
let t;
const bitrev = this._bitrev;
if (len === 4) {
for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) {
const off = bitrev[t];
this._singleTransform2(data, out, outOff, off, step);
}
} else {
// len === 8
for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) {
const off = bitrev[t];
this._singleTransform4(data, out, outOff, off, step, inv);
}
}
// Loop through steps in decreasing order
for (step >>= 2; step >= 2; step >>= 2) {
len = (size / step) << 1;
const quarterLen = len >>> 2;
// Loop through offsets in the data
for (outOff = 0; outOff < size; outOff += len) {
// Full case
const limit = outOff + quarterLen - 1;
for (let i = outOff, k = 0; i < limit; i += 2, k += step) {
const A = i;
const B = A + quarterLen;
const C = B + quarterLen;
const D = C + quarterLen;
// Original values
const Ar = out[A];
const Ai = out[A + 1];
const Br = out[B];
const Bi = out[B + 1];
const Cr = out[C];
const Ci = out[C + 1];
const Dr = out[D];
const Di = out[D + 1];
const tableBr = this.table[k];
const tableBi = inv * this.table[k + 1];
const MBr = Br * tableBr - Bi * tableBi;
const MBi = Br * tableBi + Bi * tableBr;
const tableCr = this.table[2 * k];
const tableCi = inv * this.table[2 * k + 1];
const MCr = Cr * tableCr - Ci * tableCi;
const MCi = Cr * tableCi + Ci * tableCr;
const tableDr = this.table[3 * k];
const tableDi = inv * this.table[3 * k + 1];
const MDr = Dr * tableDr - Di * tableDi;
const MDi = Dr * tableDi + Di * tableDr;
// Pre-Final values
const T0r = Ar + MCr;
const T0i = Ai + MCi;
const T1r = Ar - MCr;
const T1i = Ai - MCi;
const T2r = MBr + MDr;
const T2i = MBi + MDi;
const T3r = inv * (MBr - MDr);
const T3i = inv * (MBi - MDi);
// Final values
out[A] = T0r + T2r;
out[A + 1] = T0i + T2i;
out[B] = T1r + T3i;
out[B + 1] = T1i - T3r;
out[C] = T0r - T2r;
out[C + 1] = T0i - T2i;
out[D] = T1r - T3i;
out[D + 1] = T1i + T3r;
}
}
}
}
/**
* Performs a radix-2 implementation of a discrete Fourier transform on a given set of data.
*
* @param {Float64Array} data The input buffer of data to be transformed.
* @param {Float64Array} out The output buffer for the transformed data.
* @param {number} outOff The offset at which to write the output data.
* @param {number} off The offset at which to begin reading the input data.
* @param {number} step The step size for indexing the input data.
* @returns {void}
*/
_singleTransform2(data, out, outOff, off, step) {
// radix-2 implementation
// NOTE: Only called for len=4
const evenR = data[off];
const evenI = data[off + 1];
const oddR = data[off + step];
const oddI = data[off + step + 1];
out[outOff] = evenR + oddR;
out[outOff + 1] = evenI + oddI;
out[outOff + 2] = evenR - oddR;
out[outOff + 3] = evenI - oddI;
}
/**
* Performs radix-4 transformation on input data of length 8
*
* @param {Float64Array} data Input data array of length 8
* @param {Float64Array} out Output data array of length 8
* @param {number} outOff Index of output array to start writing from
* @param {number} off Index of input array to start reading from
* @param {number} step Step size between elements in input array
* @param {number} inv Scaling factor for inverse transform
*
* @returns {void}
*/
_singleTransform4(data, out, outOff, off, step, inv) {
// radix-4
// NOTE: Only called for len=8
const step2 = step * 2;
const step3 = step * 3;
// Original values
const Ar = data[off];
const Ai = data[off + 1];
const Br = data[off + step];
const Bi = data[off + step + 1];
const Cr = data[off + step2];
const Ci = data[off + step2 + 1];
const Dr = data[off + step3];
const Di = data[off + step3 + 1];
// Pre-Final values
const T0r = Ar + Cr;
const T0i = Ai + Ci;
const T1r = Ar - Cr;
const T1i = Ai - Ci;
const T2r = Br + Dr;
const T2i = Bi + Di;
const T3r = inv * (Br - Dr);
const T3i = inv * (Bi - Di);
// Final values
out[outOff] = T0r + T2r;
out[outOff + 1] = T0i + T2i;
out[outOff + 2] = T1r + T3i;
out[outOff + 3] = T1i - T3r;
out[outOff + 4] = T0r - T2r;
out[outOff + 5] = T0i - T2i;
out[outOff + 6] = T1r - T3i;
out[outOff + 7] = T1i + T3r;
}
/**
* Real input radix-4 implementation
* @param {Float64Array} out Output array for the transformed data
* @param {Float64Array} data Input array of real data to be transformed
* @param {number} inv The scale factor used to normalize the inverse transform
*/
_realTransform4(out, data, inv) {
// Real input radix-4 implementation
const size = this._csize;
// Initial step (permute and transform)
const width = this._width;
let step = 1 << width;
let len = (size / step) << 1;
let outOff;
let t;
const bitrev = this._bitrev;
if (len === 4) {
for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) {
const off = bitrev[t];
this._singleRealTransform2(data, out, outOff, off >>> 1, step >>> 1);
}
} else {
// len === 8
for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) {
const off = bitrev[t];
this._singleRealTransform4(
data,
out,
outOff,
off >>> 1,
step >>> 1,
inv
);
}
}
// TODO: Optimize once https://github.com/indutny/fft.js/issues/25 is fixed
// Loop through steps in decreasing order
for (step >>= 2; step >= 2; step >>= 2) {
len = (size / step) << 1;
const quarterLen = len >>> 2;
// Loop through offsets in the data
for (outOff = 0; outOff < size; outOff += len) {
// Full case
const limit = outOff + quarterLen - 1;
for (let i = outOff, k = 0; i < limit; i += 2, k += step) {
const A = i;
const B = A + quarterLen;
const C = B + quarterLen;
const D = C + quarterLen;
// Original values
const Ar = out[A];
const Ai = out[A + 1];
const Br = out[B];
const Bi = out[B + 1];
const Cr = out[C];
const Ci = out[C + 1];
const Dr = out[D];
const Di = out[D + 1];
const tableBr = this.table[k];
const tableBi = inv * this.table[k + 1];
const MBr = Br * tableBr - Bi * tableBi;
const MBi = Br * tableBi + Bi * tableBr;
const tableCr = this.table[2 * k];
const tableCi = inv * this.table[2 * k + 1];
const MCr = Cr * tableCr - Ci * tableCi;
const MCi = Cr * tableCi + Ci * tableCr;
const tableDr = this.table[3 * k];
const tableDi = inv * this.table[3 * k + 1];
const MDr = Dr * tableDr - Di * tableDi;
const MDi = Dr * tableDi + Di * tableDr;
// Pre-Final values
const T0r = Ar + MCr;
const T0i = Ai + MCi;
const T1r = Ar - MCr;
const T1i = Ai - MCi;
const T2r = MBr + MDr;
const T2i = MBi + MDi;
const T3r = inv * (MBr - MDr);
const T3i = inv * (MBi - MDi);
// Final values
out[A] = T0r + T2r;
out[A + 1] = T0i + T2i;
out[B] = T1r + T3i;
out[B + 1] = T1i - T3r;
out[C] = T0r - T2r;
out[C + 1] = T0i - T2i;
out[D] = T1r - T3i;
out[D + 1] = T1i + T3r;
}
}
}
}
/**
* Performs a single real input radix-2 transformation on the provided data
*
* @param {Float64Array} data The input data array
* @param {Float64Array} out The output data array
* @param {number} outOff The output offset
* @param {number} off The input offset
* @param {number} step The step
*
* @returns {void}
*/
_singleRealTransform2(data, out, outOff, off, step) {
// radix-2 implementation
// NOTE: Only called for len=4
const evenR = data[off];
const oddR = data[off + step];
out[outOff] = evenR + oddR;
out[outOff + 1] = 0;
out[outOff + 2] = evenR - oddR;
out[outOff + 3] = 0;
}
/**
* Computes a single real-valued transform using radix-4 algorithm.
* This method is only called for len=8.
*
* @param {Float64Array} data The input data array.
* @param {Float64Array} out The output data array.
* @param {number} outOff The offset into the output array.
* @param {number} off The offset into the input array.
* @param {number} step The step size for the input array.
* @param {number} inv The value of inverse.
*/
_singleRealTransform4(data, out, outOff, off, step, inv) {
// radix-4
// NOTE: Only called for len=8
const step2 = step * 2;
const step3 = step * 3;
// Original values
const Ar = data[off];
const Br = data[off + step];
const Cr = data[off + step2];
const Dr = data[off + step3];
// Pre-Final values
const T0r = Ar + Cr;
const T1r = Ar - Cr;
const T2r = Br + Dr;
const T3r = inv * (Br - Dr);
// Final values
out[outOff] = T0r + T2r;
out[outOff + 1] = 0;
out[outOff + 2] = T1r;
out[outOff + 3] = -T3r;
out[outOff + 4] = T0r - T2r;
out[outOff + 5] = 0;
out[outOff + 6] = T1r;
out[outOff + 7] = T3r;
}
}
/**
* NP2FFT class provides functionality for performing Fast Fourier Transform on arrays
* which are not a power of two in length. In such cases, the chirp-z transform is used.
*
* For more information, see: https://math.stackexchange.com/questions/77118/non-power-of-2-ffts/77156#77156
*/
class NP2FFT {
/**
* Constructs a new NP2FFT object.
* @param {number} fft_length The length of the FFT
*/
constructor(fft_length) {
// Helper variables
const a = 2 * (fft_length - 1);
const b = 2 * (2 * fft_length - 1);
const nextP2 = 2 ** Math.ceil(Math.log2(b));
this.bufferSize = nextP2;
this._a = a;
// Define buffers
// Compute chirp for transform
const chirp = new Float64Array(b);
const ichirp = new Float64Array(nextP2);
this._chirpBuffer = new Float64Array(nextP2);
this._buffer1 = new Float64Array(nextP2);
this._buffer2 = new Float64Array(nextP2);
this._outBuffer1 = new Float64Array(nextP2);
this._outBuffer2 = new Float64Array(nextP2);
// Compute complex exponentiation
const theta = (-2 * Math.PI) / fft_length;
const baseR = Math.cos(theta);
const baseI = Math.sin(theta);
// Precompute helper for chirp-z transform
for (let i = 0; i < b >> 1; ++i) {
// Compute complex power:
const e = (i + 1 - fft_length) ** 2 / 2.0;
// Compute the modulus and argument of the result
const result_mod = Math.sqrt(baseR ** 2 + baseI ** 2) ** e;
const result_arg = e * Math.atan2(baseI, baseR);
// Convert the result back to rectangular form
// and assign to chirp and ichirp
const i2 = 2 * i;
chirp[i2] = result_mod * Math.cos(result_arg);
chirp[i2 + 1] = result_mod * Math.sin(result_arg);
// conjugate
ichirp[i2] = chirp[i2];
ichirp[i2 + 1] = -chirp[i2 + 1];
}
this._slicedChirpBuffer = chirp.subarray(a, b);
// create object to perform Fast Fourier Transforms
// with `nextP2` complex numbers
this._f = new P2FFT(nextP2 >> 1);
this._f.transform(this._chirpBuffer, ichirp);
}
_transform(output, input, real) {
const ib1 = this._buffer1;
const ib2 = this._buffer2;
const ob2 = this._outBuffer1;
const ob3 = this._outBuffer2;
const cb = this._chirpBuffer;
const sb = this._slicedChirpBuffer;
const a = this._a;
if (real) {
// Real multiplication
for (let j = 0; j < sb.length; j += 2) {
const j2 = j + 1;
const j3 = j >> 1;
const a_real = input[j3];
ib1[j] = a_real * sb[j];
ib1[j2] = a_real * sb[j2];
}
} else {
// Complex multiplication
for (let j = 0; j < sb.length; j += 2) {
const j2 = j + 1;
ib1[j] = input[j] * sb[j] - input[j2] * sb[j2];
ib1[j2] = input[j] * sb[j2] + input[j2] * sb[j];
}
}
this._f.transform(ob2, ib1);
for (let j = 0; j < cb.length; j += 2) {
const j2 = j + 1;
ib2[j] = ob2[j] * cb[j] - ob2[j2] * cb[j2];
ib2[j2] = ob2[j] * cb[j2] + ob2[j2] * cb[j];
}
this._f.inverseTransform(ob3, ib2);
for (let j = 0; j < ob3.length; j += 2) {
const a_real = ob3[j + a];
const a_imag = ob3[j + a + 1];
const b_real = sb[j];
const b_imag = sb[j + 1];
output[j] = a_real * b_real - a_imag * b_imag;
output[j + 1] = a_real * b_imag + a_imag * b_real;
}
}
transform(output, input) {
this._transform(output, input, false);
}
realTransform(output, input) {
this._transform(output, input, true);
}
}
export class FFT {
constructor(fft_length) {
this.fft_length = fft_length;
this.isPowerOfTwo = isPowerOfTwo(fft_length);
if (this.isPowerOfTwo) {
this.fft = new P2FFT(fft_length);
this.outputBufferSize = 2 * fft_length;
} else {
this.fft = new NP2FFT(fft_length);
this.outputBufferSize = this.fft.bufferSize;
}
}
realTransform(out, input) {
this.fft.realTransform(out, input);
}
transform(out, input) {
this.fft.transform(out, input);
}
}
/**
* Performs median filter on the provided data. Padding is done by mirroring the data.
* @param {AnyTypedArray} data The input array
* @param {number} windowSize The window size
*/
export function medianFilter(data, windowSize) {
if (windowSize % 2 === 0 || windowSize <= 0) {
throw new Error("Window size must be a positive odd number");
}
// @ts-ignore
const outputArray = new data.constructor(data.length);
// @ts-ignore
const buffer = new data.constructor(windowSize); // Reusable array for storing values
const halfWindowSize = Math.floor(windowSize / 2);
for (let i = 0; i < data.length; ++i) {
let valuesIndex = 0;
for (let j = -halfWindowSize; j <= halfWindowSize; ++j) {
let index = i + j;
if (index < 0) {
index = Math.abs(index);
} else if (index >= data.length) {
index = 2 * (data.length - 1) - index;
}
buffer[valuesIndex++] = data[index];
}
buffer.sort();
outputArray[i] = buffer[halfWindowSize];
}
return outputArray;
}
/**
* Helper function to round a number to a given number of decimals
* @param {number} num The number to round
* @param {number} decimals The number of decimals
* @returns {number} The rounded number
*/
export function round(num, decimals) {
const pow = Math.pow(10, decimals);
return Math.round(num * pow) / pow;
}
/**
* Helper function to round a number to the nearest integer, with ties rounded to the nearest even number.
* Also known as "bankers' rounding". This is the default rounding mode in python. For example:
* 1.5 rounds to 2 and 2.5 rounds to 2.
*
* @param {number} x The number to round
* @returns {number} The rounded number
*/
export function bankers_round(x) {
const r = Math.round(x);
const br = Math.abs(x) % 1 === 0.5 ? (r % 2 === 0 ? r : r - 1) : r;
return br;
}
/**
* @file Helper module for `Tensor` processing.
*
* These functions and classes are only used internally,
* meaning an end-user shouldn't need to access anything here.
*
* @module utils/tensor
*/
import { interpolate_data, permute_data } from "./maths.js";
const DataTypeMap = Object.freeze({
float32: Float32Array,
float64: Float64Array,
string: Array, // string[]
int8: Int8Array,
uint8: Uint8Array,
int16: Int16Array,
uint16: Uint16Array,
int32: Int32Array,
uint32: Uint32Array,
int64: BigInt64Array,
uint64: BigUint64Array,
bool: Uint8Array,
});
/**
* @typedef {keyof typeof DataTypeMap} DataType
* @typedef {import('./maths.js').AnyTypedArray | any[]} DataArray
*/
// NOTE: Just to facilitate git merge, this class is not used ye
const ONNXTensor = class {};
export class Tensor {
/** @type {number[]} Dimensions of the tensor. */
dims;
/** @type {DataType} Type of the tensor. */
type;
/** @type {DataArray} The data stored in the tensor. */
data;
/** @type {number} The number of elements in the tensor. */
size;
/**
* Create a new Tensor or copy an existing Tensor.
* @param {[DataType, DataArray, number[]]|[import('onnxruntime-common').Tensor]} args
*/
constructor(...args) {
if (args[0] instanceof ONNXTensor) {
// Create shallow copy
Object.assign(this, args[0]);
} else {
// Create new tensor
Object.assign(
this,
new ONNXTensor(
/** @type {DataType} */ (args[0]),
/** @type {Exclude<import('./maths.js').AnyTypedArray, Uint8ClampedArray>} */ (
args[1]
),
args[2]
)
);
}
return new Proxy(this, {
get: (obj, key) => {
if (typeof key === "string") {
let index = Number(key);
if (Number.isInteger(index)) {
// key is an integer (i.e., index)
return obj._getitem(index);
}
}
// @ts-ignore
return obj[key];
},
set: (obj, key, value) => {
// TODO allow setting of data
// @ts-ignore
return (obj[key] = value);
},
});
}
/**
* Returns an iterator object for iterating over the tensor data in row-major order.
* If the tensor has more than one dimension, the iterator will yield subarrays.
* @returns {Iterator} An iterator object for iterating over the tensor data in row-major order.
*/
*[Symbol.iterator]() {
const [iterLength, ...iterDims] = this.dims;
if (iterDims.length > 0) {
const iterSize = iterDims.reduce((a, b) => a * b);
for (let i = 0; i < iterLength; ++i) {
yield this._subarray(i, iterSize, iterDims);
}
} else {
yield* this.data;
}
}
/**
* Index into a Tensor object.
* @param {number} index The index to access.
* @returns {Tensor} The data at the specified index.
*/
_getitem(index) {
const [iterLength, ...iterDims] = this.dims;
index = safeIndex(index, iterLength);
if (iterDims.length > 0) {
const iterSize = iterDims.reduce((a, b) => a * b);
return this._subarray(index, iterSize, iterDims);
} else {
return new Tensor(this.type, [this.data[index]], iterDims);
}
}
/**
* @param {number|bigint} item The item to search for in the tensor
* @returns {number} The index of the first occurrence of item in the tensor data.
*/
indexOf(item) {
for (let index = 0; index < this.data.length; ++index) {
// Note: == instead of === so we can match Ints with BigInts
if (this.data[index] == item) {
return index;
}
}
return -1;
}
/**
* @param {number} index
* @param {number} iterSize
* @param {any} iterDims
* @returns {Tensor}
*/
_subarray(index, iterSize, iterDims) {
const o1 = index * iterSize;
const o2 = (index + 1) * iterSize;
// We use subarray if available (typed array), otherwise we use slice (normal array)
const data =
"subarray" in this.data
? this.data.subarray(o1, o2)
: this.data.slice(o1, o2);
return new Tensor(this.type, data, iterDims);
}
/**
* Returns the value of this tensor as a standard JavaScript Number. This only works
* for tensors with one element. For other cases, see `Tensor.tolist()`.
* @returns {number|bigint} The value of this tensor as a standard JavaScript Number.
* @throws {Error} If the tensor has more than one element.
*/
item() {
if (this.data.length !== 1) {
throw new Error(
`a Tensor with ${this.data.length} elements cannot be converted to Scalar`
);
}
return this.data[0];
}
/**
* Convert tensor data to a n-dimensional JS list
* @returns {Array}
*/
tolist() {
return reshape(this.data, this.dims);
}
/**
* Return a new Tensor with the sigmoid function applied to each element.
* @returns {Tensor} The tensor with the sigmoid function applied.
*/
sigmoid() {
return this.clone().sigmoid_();
}
/**
* Applies the sigmoid function to the tensor in place.
* @returns {Tensor} Returns `this`.
*/
sigmoid_() {
for (let i = 0; i < this.data.length; ++i) {
this.data[i] = 1 / (1 + Math.exp(-this.data[i]));
}
return this;
}
/**
* Return a new Tensor with every element multiplied by a constant.
* @param {number} val The value to multiply by.
* @returns {Tensor} The new tensor.
*/
mul(val) {
return this.clone().mul_(val);
}
/**
* Multiply the tensor by a constant in place.
* @param {number} val The value to multiply by.
* @returns {Tensor} Returns `this`.
*/
mul_(val) {
for (let i = 0; i < this.data.length; ++i) {
this.data[i] *= val;
}
return this;
}
/**
* Return a new Tensor with every element added by a constant.
* @param {number} val The value to add by.
* @returns {Tensor} The new tensor.
*/
add(val) {
return this.clone().add_(val);
}
/**
* Add the tensor by a constant in place.
* @param {number} val The value to add by.
* @returns {Tensor} Returns `this`.
*/
add_(val) {
for (let i = 0; i < this.data.length; ++i) {
this.data[i] += val;
}
return this;
}
clone() {
return new Tensor(this.type, this.data.slice(), this.dims.slice());
}
slice(...slices) {
// This allows for slicing with ranges and numbers
let newTensorDims = [];
let newOffsets = [];
// slices is an array of numbers or arrays of numbers
// e.g., slices = [0, [1, 3], null, [0, 3]]
for (let sliceIndex = 0; sliceIndex < this.dims.length; ++sliceIndex) {
let slice = slices[sliceIndex];
if (slice === null || slice === undefined) {
// null or undefined means take the whole dimension
newOffsets.push([0, this.dims[sliceIndex]]);
newTensorDims.push(this.dims[sliceIndex]);
} else if (typeof slice === "number") {
slice = safeIndex(slice, this.dims[sliceIndex], sliceIndex);
// A number means take a single element
newOffsets.push([slice, slice + 1]);
} else if (Array.isArray(slice) && slice.length === 2) {
// An array of length 2 means take a range of elements
if (slice[0] > slice[1]) {
throw new Error(`Invalid slice: ${slice}`);
}
let offsets = [
Math.max(slice[0], 0),
Math.min(slice[1], this.dims[sliceIndex]),
];
newOffsets.push(offsets);
newTensorDims.push(offsets[1] - offsets[0]);
} else {
throw new Error(`Invalid slice: ${slice}`);
}
}
let newDims = newOffsets.map(([start, end]) => end - start);
let newBufferSize = newDims.reduce((a, b) => a * b);
// Allocate memory
// @ts-ignore
let data = new this.data.constructor(newBufferSize);
// Precompute strides
const stride = this.stride();
for (let i = 0; i < newBufferSize; ++i) {
let originalIndex = 0;
for (let j = newDims.length - 1, num = i; j >= 0; --j) {
const size = newDims[j];
originalIndex += ((num % size) + newOffsets[j][0]) * stride[j];
num = Math.floor(num / size);
}
data[i] = this.data[originalIndex];
}
return new Tensor(this.type, data, newTensorDims);
}
/**
* Return a permuted version of this Tensor, according to the provided dimensions.
* @param {...number} dims Dimensions to permute.
* @returns {Tensor} The permuted tensor.
*/
permute(...dims) {
return permute(this, dims);
}
// TODO: implement transpose. For now (backwards compatibility), it's just an alias for permute()
transpose(...dims) {
return this.permute(...dims);
}
// TODO add .max() and .min() methods
/**
* Returns the sum of each row of the input tensor in the given dimension dim.
*
* @param {number} [dim=null] The dimension or dimensions to reduce. If `null`, all dimensions are reduced.
* @param {boolean} keepdim Whether the output tensor has `dim` retained or not.
* @returns The summed tensor
*/
sum(dim = null, keepdim = false) {
return this.norm(1, dim, keepdim);
}
/**
* Returns the matrix norm or vector norm of a given tensor.
* @param {number|string} [p='fro'] The order of norm
* @param {number} [dim=null] Specifies which dimension of the tensor to calculate the norm across.
* If dim is None, the norm will be calculated across all dimensions of input.
* @param {boolean} [keepdim=false] Whether the output tensors have dim retained or not.
* @returns {Tensor} The norm of the tensor.
*/
norm(p = "fro", dim = null, keepdim = false) {
if (p === "fro") {
// NOTE: Since we only support integer dims, Frobenius norm produces the same result as p=2.
p = 2;
} else if (typeof p === "string") {
throw Error(`Unsupported norm: ${p}`);
}
if (dim === null) {
// @ts-ignore
let val = this.data.reduce((a, b) => a + b ** p, 0) ** (1 / p);
return new Tensor(this.type, [val], []);
}
// Negative indexing
dim = safeIndex(dim, this.dims.length);
// Calculate the shape of the resulting array after summation
const resultDims = this.dims.slice(); // Copy the original dimensions
resultDims[dim] = 1; // Remove the specified axis
// Create a new array to store the accumulated values
// @ts-ignore
const result = new this.data.constructor(this.data.length / this.dims[dim]);
// Iterate over the data array
for (let i = 0; i < this.data.length; ++i) {
// Calculate the index in the resulting array
let resultIndex = 0;
for (
let j = this.dims.length - 1, num = i, resultMultiplier = 1;
j >= 0;
--j
) {
const size = this.dims[j];
if (j !== dim) {
const index = num % size;
resultIndex += index * resultMultiplier;
resultMultiplier *= resultDims[j];
}
num = Math.floor(num / size);
}
// Accumulate the value at the current index
result[resultIndex] += this.data[i] ** p;
}
if (p !== 1) {
for (let i = 0; i < result.length; ++i) {
result[i] = result[i] ** (1 / p);
}
}
if (!keepdim) {
resultDims.splice(dim, 1);
}
return new Tensor(this.type, result, resultDims);
}
/**
* Performs `L_p` normalization of inputs over specified dimension. Operates in place.
* @param {number} [p=2] The exponent value in the norm formulation
* @param {number} [dim=1] The dimension to reduce
* @returns {Tensor} `this` for operation chaining.
*/
normalize_(p = 2.0, dim = 1) {
dim = safeIndex(dim, this.dims.length);
const norm = this.norm(p, dim, true);
for (let i = 0; i < this.data.length; ++i) {
// Calculate the index in the resulting array
let resultIndex = 0;
for (
let j = this.dims.length - 1, num = i, resultMultiplier = 1;
j >= 0;
--j
) {
const size = this.dims[j];
if (j !== dim) {
const index = num % size;
resultIndex += index * resultMultiplier;
resultMultiplier *= this.dims[j];
}
num = Math.floor(num / size);
}
// Divide by normalized value
this.data[i] /= norm.data[resultIndex];
}
return this;
}
/**
* Performs `L_p` normalization of inputs over specified dimension.
* @param {number} [p=2] The exponent value in the norm formulation
* @param {number} [dim=1] The dimension to reduce
* @returns {Tensor} The normalized tensor.
*/
normalize(p = 2.0, dim = 1) {
return this.clone().normalize_(p, dim);
}
/**
* Compute and return the stride of this tensor.
* Stride is the jump necessary to go from one element to the next one in the specified dimension dim.
* @returns {number[]} The stride of this tensor.
*/
stride() {
return dimsToStride(this.dims);
}
/**
* Returns a tensor with all specified dimensions of input of size 1 removed.
*
* NOTE: The returned tensor shares the storage with the input tensor, so changing the contents of one will change the contents of the other.
* If you would like a copy, use `tensor.clone()` before squeezing.
*
* @param {number} [dim=null] If given, the input will be squeezed only in the specified dimensions.
* @returns The squeezed tensor
*/
squeeze(dim = null) {
return new Tensor(this.type, this.data, calc_squeeze_dims(this.dims, dim));
}
/**
* In-place version of @see {@link Tensor.squeeze}
*/
squeeze_(dim = null) {
this.dims = calc_squeeze_dims(this.dims, dim);
return this;
}
/**
* Returns a new tensor with a dimension of size one inserted at the specified position.
*
* NOTE: The returned tensor shares the same underlying data with this tensor.
*
* @param {number} dim The index at which to insert the singleton dimension
* @returns The unsqueezed tensor
*/
unsqueeze(dim = null) {
return new Tensor(
this.type,
this.data,
calc_unsqueeze_dims(this.dims, dim)
);
}
/**
* In-place version of @see {@link Tensor.unsqueeze}
*/
unsqueeze_(dim = null) {
this.dims = calc_unsqueeze_dims(this.dims, dim);
return this;
}
/**
* In-place version of @see {@link Tensor.flatten}
*/
flatten_(start_dim = 0, end_dim = -1) {
// TODO validate inputs
end_dim = (end_dim + this.dims.length) % this.dims.length;
let dimsToKeepBefore = this.dims.slice(0, start_dim);
let dimsToFlatten = this.dims.slice(start_dim, end_dim + 1);
let dimsToKeepAfter = this.dims.slice(end_dim + 1);
this.dims = [
...dimsToKeepBefore,
dimsToFlatten.reduce((a, b) => a * b, 1),
...dimsToKeepAfter,
];
return this;
}
/**
* Flattens input by reshaping it into a one-dimensional tensor.
* If `start_dim` or `end_dim` are passed, only dimensions starting with `start_dim`
* and ending with `end_dim` are flattened. The order of elements in input is unchanged.
* @param {number} start_dim the first dim to flatten
* @param {number} end_dim the last dim to flatten
* @returns The flattened tensor.
*/
flatten(start_dim = 0, end_dim = -1) {
return this.clone().flatten_(start_dim, end_dim);
}
/**
* Returns a new tensor with the same data as the `self` tensor but of a different `shape`.
* @param {...number} dims the desired size
* @returns {Tensor} The tensor with the same data but different shape
*/
view(...dims) {
// TODO: validate dims
let inferredIndex = -1;
for (let i = 0; i < dims.length; ++i) {
if (dims[i] === -1) {
if (inferredIndex !== -1) {
throw new Error("Only one dimension can be inferred");
}
inferredIndex = i;
}
}
if (inferredIndex !== -1) {
// Some dimension must be inferred
const productOther = dims.reduce((product, curr, index) => {
return index !== inferredIndex ? product * curr : product;
}, 1);
dims[inferredIndex] = this.data.length / productOther;
}
return new Tensor(this.type, this.data, dims); // NOTE: uses same underlying storage
}
neg_() {
for (let i = 0; i < this.data.length; ++i) {
this.data[i] = -this.data[i];
}
return this;
}
neg() {
return this.clone().neg_();
}
/**
* In-place version of @see {@link Tensor.clamp}
*/
clamp_(min, max) {
for (let i = 0; i < this.data.length; ++i) {
this.data[i] = Math.min(Math.max(this.data[i], min), max);
}
return this;
}
/**
* Clamps all elements in input into the range [ min, max ]
* @param {number} min lower-bound of the range to be clamped to
* @param {number} max upper-bound of the range to be clamped to
* @returns the output tensor.
*/
clamp(min, max) {
return this.clone().clamp_(min, max);
}
/**
* In-place version of @see {@link Tensor.round}
*/
round_() {
for (let i = 0; i < this.data.length; ++i) {
this.data[i] = Math.round(this.data[i]);
}
return this;
}
/**
* Rounds elements of input to the nearest integer.
* @returns the output tensor.
*/
round() {
return this.clone().round_();
}
/**
* Performs Tensor dtype conversion.
* @param {DataType} type The desired data type.
* @returns {Tensor} The converted tensor.
*/
to(type) {
// If the self Tensor already has the correct dtype, then self is returned.
if (this.type === type) return this;
// Otherwise, the returned tensor is a copy of self with the desired dtype.
if (!DataTypeMap.hasOwnProperty(type)) {
throw new Error(`Unsupported type: ${type}`);
}
// @ts-ignore
return new Tensor(type, DataTypeMap[type].from(this.data), this.dims);
}
}
/**
* This creates a nested array of a given type and depth (see examples).
*
* @example
* NestArray<string, 1>; // string[]
* @example
* NestArray<number, 2>; // number[][]
* @example
* NestArray<string, 3>; // string[][][] etc.
* @template T
* @template {number} Depth
* @template {never[]} [Acc=[]]
* @typedef {Acc['length'] extends Depth ? T : NestArray<T[], Depth, [...Acc, never]>} NestArray
*/
/**
* Reshapes a 1-dimensional array into an n-dimensional array, according to the provided dimensions.
*
* @example
* reshape([10 ], [1 ]); // Type: number[] Value: [10]
* reshape([1, 2, 3, 4 ], [2, 2 ]); // Type: number[][] Value: [[1, 2], [3, 4]]
* reshape([1, 2, 3, 4, 5, 6, 7, 8], [2, 2, 2]); // Type: number[][][] Value: [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
* reshape([1, 2, 3, 4, 5, 6, 7, 8], [4, 2 ]); // Type: number[][] Value: [[1, 2], [3, 4], [5, 6], [7, 8]]
* @param {T[]|DataArray} data The input array to reshape.
* @param {DIM} dimensions The target shape/dimensions.
* @template T
* @template {[number]|number[]} DIM
* @returns {NestArray<T, DIM["length"]>} The reshaped array.
*/
function reshape(data, dimensions) {
const totalElements = data.length;
const dimensionSize = dimensions.reduce((a, b) => a * b);
if (totalElements !== dimensionSize) {
throw Error(
`cannot reshape array of size ${totalElements} into shape (${dimensions})`
);
}
/** @type {any} */
let reshapedArray = data;
for (let i = dimensions.length - 1; i >= 0; i--) {
reshapedArray = reshapedArray.reduce(
(acc, val) => {
let lastArray = acc[acc.length - 1];
if (lastArray.length < dimensions[i]) {
lastArray.push(val);
} else {
acc.push([val]);
}
return acc;
},
[[]]
);
}
return reshapedArray[0];
}
/**
* Permutes a tensor according to the provided axes.
* @param {any} tensor The input tensor to permute.
* @param {Array} axes The axes to permute the tensor along.
* @returns {Tensor} The permuted tensor.
*/
export function permute(tensor, axes) {
const [permutedData, shape] = permute_data(tensor.data, tensor.dims, axes);
return new Tensor(tensor.type, permutedData, shape);
}
/**
* Interpolates an Tensor to the given size.
* @param {Tensor} input The input tensor to interpolate. Data must be channel-first (i.e., [c, h, w])
* @param {number[]} size The output size of the image
* @param {string} mode The interpolation mode
* @param {boolean} align_corners Whether to align corners.
* @returns {Tensor} The interpolated tensor.
*/
export function interpolate(
input,
[out_height, out_width],
mode = "bilinear",
align_corners = false
) {
// Input image dimensions
const in_channels = input.dims.at(-3) ?? 1;
const in_height = input.dims.at(-2);
const in_width = input.dims.at(-1);
let output = interpolate_data(
/** @type {import('./maths.js').TypedArray}*/ (input.data),
[in_channels, in_height, in_width],
[out_height, out_width],
mode,
align_corners
);
return new Tensor(input.type, output, [in_channels, out_height, out_width]);
}
/**
* Perform mean pooling of the last hidden state followed by a normalization step.
* @param {Tensor} last_hidden_state Tensor of shape [batchSize, seqLength, embedDim]
* @param {Tensor} attention_mask Tensor of shape [batchSize, seqLength]
* @returns {Tensor} Returns a new Tensor of shape [batchSize, embedDim].
*/
export function mean_pooling(last_hidden_state, attention_mask) {
// last_hidden_state: [batchSize, seqLength, embedDim]
// attention_mask: [batchSize, seqLength]
let shape = [last_hidden_state.dims[0], last_hidden_state.dims[2]];
// @ts-ignore
let returnedData = new last_hidden_state.data.constructor(
shape[0] * shape[1]
);
let [batchSize, seqLength, embedDim] = last_hidden_state.dims;
let outIndex = 0;
for (let i = 0; i < batchSize; ++i) {
let offset = i * embedDim * seqLength;
for (let k = 0; k < embedDim; ++k) {
let sum = 0;
let count = 0;
let attnMaskOffset = i * seqLength;
let offset2 = offset + k;
// Pool over all words in sequence
for (let j = 0; j < seqLength; ++j) {
// index into attention mask
let attn = Number(attention_mask.data[attnMaskOffset + j]);
count += attn;
sum += last_hidden_state.data[offset2 + j * embedDim] * attn;
}
let avg = sum / count;
returnedData[outIndex++] = avg;
}
}
return new Tensor(last_hidden_state.type, returnedData, shape);
}
/**
* Apply Layer Normalization for last certain number of dimensions.
* @param {Tensor} input The input tensor
* @param {number[]} normalized_shape input shape from an expected input of size
* @param {Object} options The options for the layer normalization
* @param {number} [options.eps=1e-5] A value added to the denominator for numerical stability.
* @returns {Tensor} The normalized tensor.
*/
export function layer_norm(input, normalized_shape, { eps = 1e-5 } = {}) {
if (input.dims.length !== 2) {
throw new Error("`layer_norm` currently only supports 2D input.");
}
const [batchSize, featureDim] = input.dims;
if (normalized_shape.length !== 1 && normalized_shape[0] !== featureDim) {
throw new Error(
"`normalized_shape` must be a 1D array with shape `[input.dims[1]]`."
);
}
const [std, mean] = std_mean(input, 1, 0, true);
// @ts-ignore
const returnedData = new input.data.constructor(input.data.length);
for (let i = 0; i < batchSize; ++i) {
const offset = i * featureDim;
for (let j = 0; j < featureDim; ++j) {
const offset2 = offset + j;
returnedData[offset2] =
(input.data[offset2] - mean.data[i]) / (std.data[i] + eps);
}
}
return new Tensor(input.type, returnedData, input.dims);
}
/**
* Helper function to calculate new dimensions when performing a squeeze operation.
* @param {number[]} dims The dimensions of the tensor.
* @param {number|number[]|null} dim The dimension(s) to squeeze.
* @returns The new dimensions.
* @private
*/
function calc_squeeze_dims(dims, dim) {
dims = dims.slice();
if (dim === null) {
dims = dims.filter((d) => d !== 1);
} else if (typeof dim === "number") {
if (dims[dim] === 1) {
dims.splice(dim, 1);
}
} else if (Array.isArray(dim)) {
dims = dims.filter((x, i) => {
return x !== 1 || !dim.includes(i);
});
}
return dims;
}
/**
* Helper function to calculate new dimensions when performing an unsqueeze operation.
* @param {number[]} dims The dimensions of the tensor.
* @param {number} dim The dimension to unsqueeze.
* @returns The new dimensions.
* @private
*/
function calc_unsqueeze_dims(dims, dim) {
// Dimension out of range (e.g., "expected to be in range of [-4, 3], but got 4")
// + 1 since we allow inserting at the end (i.e. dim = -1)
dim = safeIndex(dim, dims.length + 1);
dims = dims.slice();
// Insert 1 into specified dimension
dims.splice(dim, 0, 1);
return dims;
}
/**
* Safely calculate the index for an array of a given size, allowing negative indexing.
* @param {number} index The index that will be used.
* @param {number} size The size of the array.
* @param {number} [dimension=null] The dimension that the index is for (optional).
* @returns {number} The index, guaranteed to be non-negative and less than `arrayLength`.
*
* @throws {Error} If the index is out of range.
* @private
*/
function safeIndex(index, size, dimension = null) {
if (index < -size || index >= size) {
throw new Error(
`IndexError: index ${index} is out of bounds for dimension${
dimension === null ? "" : " " + dimension
} with size ${size}`
);
}
if (index < 0) {
// Negative indexing, ensuring positive index
index = ((index % size) + size) % size;
}
return index;
}
/**
* Concatenates an array of tensors along a specified dimension.
* @param {Tensor[]} tensors The array of tensors to concatenate.
* @param {number} dim The dimension to concatenate along.
* @returns {Tensor} The concatenated tensor.
*/
export function cat(tensors, dim = 0) {
dim = safeIndex(dim, tensors[0].dims.length);
// TODO do validation of shapes
const resultDims = tensors[0].dims.slice();
resultDims[dim] = tensors.reduce((a, b) => a + b.dims[dim], 0);
// Create a new array to store the accumulated values
const resultSize = resultDims.reduce((a, b) => a * b, 1);
// @ts-ignore
const result = new tensors[0].data.constructor(resultSize);
// Create output tensor of same type as first
const resultType = tensors[0].type;
if (dim === 0) {
// Handle special case for performance reasons
let offset = 0;
for (let t of tensors) {
result.set(t.data, offset);
offset += t.data.length;
}
} else {
let currentDim = 0;
for (let t = 0; t < tensors.length; ++t) {
let tensor = tensors[t];
// Iterate over the data array
for (let i = 0; i < tensor.data.length; ++i) {
// Calculate the index in the resulting array
let resultIndex = 0;
for (
let j = tensor.dims.length - 1, num = i, resultMultiplier = 1;
j >= 0;
--j
) {
const size = tensor.dims[j];
let index = num % size;
if (j === dim) {
index += currentDim;
}
resultIndex += index * resultMultiplier;
resultMultiplier *= resultDims[j];
num = Math.floor(num / size);
}
// Accumulate the value at the current index
result[resultIndex] = tensor.data[i];
}
currentDim += tensor.dims[dim];
}
}
return new Tensor(resultType, result, resultDims);
}
/**
* Stack an array of tensors along a specified dimension.
* @param {Tensor[]} tensors The array of tensors to stack.
* @param {number} dim The dimension to stack along.
* @returns {Tensor} The stacked tensor.
*/
export function stack(tensors, dim = 0) {
// TODO do validation of shapes
// NOTE: stack expects each tensor to be equal size
return cat(
tensors.map((t) => t.unsqueeze(dim)),
dim
);
}
/**
* Calculates the standard deviation and mean over the dimensions specified by dim. dim can be a single dimension or `null` to reduce over all dimensions.
* @param {Tensor} input the input tenso
* @param {number|null} dim the dimension to reduce. If None, all dimensions are reduced.
* @param {number} correction difference between the sample size and sample degrees of freedom. Defaults to Bessel's correction, correction=1.
* @param {boolean} keepdim whether the output tensor has dim retained or not.
* @returns {Tensor[]} A tuple of (std, mean) tensors.
*/
export function std_mean(input, dim = null, correction = 1, keepdim = false) {
if (dim === null) {
// None to reduce over all dimensions.
// @ts-ignore
const sum = input.data.reduce((a, b) => a + b, 0);
const mean = sum / input.data.length;
// @ts-ignore
const std = Math.sqrt(
input.data.reduce((a, b) => a + (b - mean) ** 2, 0) /
(input.data.length - correction)
);
const meanTensor = new Tensor(
input.type,
[mean],
[
/* scalar */
]
);
const stdTensor = new Tensor(
input.type,
[std],
[
/* scalar */
]
);
return [stdTensor, meanTensor];
}
// Negative indexing
dim = safeIndex(dim, input.dims.length);
const meanTensor = mean(input, dim, keepdim);
// Calculate the shape of the resulting array after summation
const resultDims = input.dims.slice(); // Copy the original dimensions
resultDims[dim] = 1; // Remove the specified axis
// Create a new array to store the accumulated values
// @ts-ignore
const result = new input.data.constructor(
input.data.length / input.dims[dim]
);
// Iterate over the data array
for (let i = 0; i < input.data.length; ++i) {
// Calculate the index in the resulting array
let resultIndex = 0;
for (
let j = input.dims.length - 1, num = i, resultMultiplier = 1;
j >= 0;
--j
) {
const size = input.dims[j];
if (j !== dim) {
const index = num % size;
resultIndex += index * resultMultiplier;
resultMultiplier *= resultDims[j];
}
num = Math.floor(num / size);
}
// Accumulate the value at the current index
result[resultIndex] += (input.data[i] - meanTensor.data[resultIndex]) ** 2;
}
for (let i = 0; i < result.length; ++i) {
result[i] = Math.sqrt(result[i] / (input.dims[dim] - correction));
}
if (!keepdim) {
resultDims.splice(dim, 1);
}
const stdTensor = new Tensor(input.type, result, resultDims);
return [stdTensor, meanTensor];
}
/**
* Returns the mean value of each row of the input tensor in the given dimension dim.
* @param {Tensor} input the input tensor.
* @param {number|null} dim the dimension to reduce.
* @param {boolean} keepdim whether the output tensor has dim retained or not.
* @returns A new tensor with means taken along the specified dimension.
*/
export function mean(input, dim = null, keepdim = false) {
if (dim === null) {
// None to reduce over all dimensions.
// @ts-ignore
let val = input.data.reduce((a, b) => a + b, 0);
return new Tensor(
input.type,
[val / input.data.length],
[
/* scalar */
]
);
}
// Negative indexing
dim = safeIndex(dim, input.dims.length);
// Calculate the shape of the resulting array after summation
const resultDims = input.dims.slice(); // Copy the original dimensions
resultDims[dim] = 1; // Remove the specified axis
// Create a new array to store the accumulated values
// @ts-ignore
const result = new input.data.constructor(
input.data.length / input.dims[dim]
);
// Iterate over the data array
for (let i = 0; i < input.data.length; ++i) {
// Calculate the index in the resulting array
let resultIndex = 0;
for (
let j = input.dims.length - 1, num = i, resultMultiplier = 1;
j >= 0;
--j
) {
const size = input.dims[j];
if (j !== dim) {
const index = num % size;
resultIndex += index * resultMultiplier;
resultMultiplier *= resultDims[j];
}
num = Math.floor(num / size);
}
// Accumulate the value at the current index
result[resultIndex] += input.data[i];
}
if (input.dims[dim] !== 1) {
for (let i = 0; i < result.length; ++i) {
result[i] = result[i] / input.dims[dim];
}
}
if (!keepdim) {
resultDims.splice(dim, 1);
}
return new Tensor(input.type, result, resultDims);
}
/**
*
* Measures similarity between two temporal sequences (e.g., input audio and output tokens
* to generate token-level timestamps).
* @param {Tensor} matrix
* @returns {number[][]}
*/
export function dynamicTimeWarping(matrix) {
const [output_length, input_length] = matrix.dims;
const outputShape = [output_length + 1, input_length + 1];
const cost = new Tensor(
"float32",
new Float32Array(outputShape[0] * outputShape[1]).fill(Infinity),
outputShape
);
const trace = new Tensor(
"float32",
new Float32Array(outputShape[0] * outputShape[1]).fill(-1),
outputShape
);
// same as `cost[0][0] = 0`;
cost[0].data[0] = 0;
for (let j = 1; j < input_length + 1; ++j) {
for (let i = 1; i < output_length + 1; ++i) {
const c0 = cost[i - 1][j - 1].item();
const c1 = cost[i - 1][j].item();
const c2 = cost[i][j - 1].item();
let c, t;
if (c0 < c1 && c0 < c2) {
c = c0;
t = 0;
} else if (c1 < c0 && c1 < c2) {
c = c1;
t = 1;
} else {
c = c2;
t = 2;
}
cost[i].data[j] = matrix[i - 1][j - 1].item() + c;
trace[i].data[j] = t;
}
}
// backtrace
let i = output_length;
let j = input_length;
// @ts-ignore
trace.data.fill(2, 0, outputShape[1]); // trace[0, :] = 2
for (let i = 0; i < outputShape[0]; ++i) {
// trace[:, 0] = 1
trace[i].data[0] = 1;
}
let text_indices = [];
let time_indices = [];
while (i > 0 || j > 0) {
text_indices.push(i - 1);
time_indices.push(j - 1);
const t = trace[i][j].item();
switch (t) {
case 0:
--i;
--j;
break;
case 1:
--i;
break;
case 2:
--j;
break;
default:
throw new Error(
`Internal error in dynamic time warping. Unexpected trace[${i}, ${j}]. Please file a bug report.`
);
}
}
text_indices.reverse();
time_indices.reverse();
return [text_indices, time_indices];
}
function dimsToStride(dims) {
const stride = new Array(dims.length);
for (let i = dims.length - 1, s2 = 1; i >= 0; --i) {
stride[i] = s2;
s2 *= dims[i];
}
return stride;
}
/**
* Returns a tensor filled with the scalar value 1, with the shape defined by the variable argument size.
* @param {number[]} size A sequence of integers defining the shape of the output tensor.
*/
export function ones(size) {
const numElements = size.reduce((a, b) => a * b, 1);
return new Tensor("int64", new BigInt64Array(numElements).fill(1n), size);
}
/**
* Returns a tensor filled with the scalar value 1, with the same size as input.
* @param {Tensor} tensor The size of input will determine size of the output tensor.
* @returns The ones tensor.
*/
export function ones_like(tensor) {
return ones(tensor.dims);
}
/**
* Quantizes the embeddings tensor to binary or unsigned binary precision.
* @param {Tensor} tensor The tensor to quantize.
* @param {'binary'|'ubinary'} precision The precision to use for quantization.
* @returns {Tensor} The quantized tensor.
*/
export function quantize_embeddings(tensor, precision) {
if (tensor.dims.length !== 2) {
throw new Error("The tensor must have 2 dimensions");
}
if (tensor.dims.at(-1) % 8 !== 0) {
throw new Error("The last dimension of the tensor must be a multiple of 8");
}
if (!["binary", "ubinary"].includes(precision)) {
throw new Error("The precision must be either 'binary' or 'ubinary'");
}
const signed = precision === "binary";
const dtype = signed ? "int8" : "uint8";
// Create a typed array to store the packed bits
const cls = signed ? Int8Array : Uint8Array;
const inputData = tensor.data;
const outputData = new cls(inputData.length / 8);
// Iterate over each number in the array
for (let i = 0; i < inputData.length; ++i) {
// Determine if the number is greater than 0
const bit = inputData[i] > 0 ? 1 : 0;
// Calculate the index in the typed array and the position within the byte
const arrayIndex = Math.floor(i / 8);
const bitPosition = i % 8;
// Pack the bit into the typed array
outputData[arrayIndex] |= bit << (7 - bitPosition);
if (signed && bitPosition === 0) {
outputData[arrayIndex] -= 128;
}
}
return new Tensor(dtype, outputData, [tensor.dims[0], tensor.dims[1] / 8]);
}
import type {
AutoTokenizer,
PreTrainedTokenizer,
} from "./tokenizers/tokenizers";
export type TokenizerMapping = typeof AutoTokenizer.TOKENIZER_CLASS_MAPPING;
export type SupportedTokenizerClasses = keyof TokenizerMapping;
export type TokenizerClassNameMapping<T extends string> =
T extends SupportedTokenizerClasses
? InstanceType<TokenizerMapping[T]>
: PreTrainedTokenizer;
export type TokenizerConfigMapping<
Config extends {
tokenizer_class: string;
}
> = Config["tokenizer_class"] extends SupportedTokenizerClasses
? TokenizerMapping[Config["tokenizer_class"]]
: PreTrainedTokenizer;
type ValueOf<T> = T[keyof T];
export type FromPreTrainedFn<
M extends InstanceType<ValueOf<TokenizerMapping>>
> = (params?: {
// TODO: types
tokenizerJSON?: Partial<NSTokenizerJSON.Root>;
tokenizerConfig?: Partial<NSTokenizerConfig.Root>;
}) => M;
export namespace NSTokenizerConfig {
// TODO full types
export type Root = {
add_prefix_space?: any;
bos_token?: any;
clean_up_tokenization_spaces: boolean;
eos_token: any;
model_max_length: number;
tokenizer_class: string;
unk_token: any;
chat_template?: any;
add_bos_token?: boolean;
add_eos_token?: boolean;
added_tokens_decoder?: { [key: string]: AddedTokensDecoder };
legacy?: boolean | null;
merges_file?: null;
pad_token?: any;
sp_model_kwargs?: any;
spaces_between_special_tokens?: boolean;
use_default_system_prompt?: boolean;
vocab_file?: null;
auto_map?: any;
do_lower_case?: boolean;
padding_side?: string;
remove_space?: boolean;
additional_special_tokens?: string[];
errors?: string;
split_special_tokens?: boolean;
};
export type AddedTokensDecoder = {
content: string;
lstrip: boolean;
normalized: boolean;
rstrip: boolean;
single_word: boolean;
special: boolean;
};
export type AutoMap = {
AutoTokenizer: Array<null | string>;
};
export type ChatTemplateElement = {
name: string;
template: string;
};
export type SPModelKwargs = {};
}
export namespace NSTokenizerJSON {
// TODO full types
export type Root = {
version: string;
truncation: null;
padding: null;
added_tokens: any[];
normalizer: any;
pre_tokenizer: any;
post_processor: any;
decoder: any;
model: any;
};
export type AddedToken = {
id: number;
content: string;
single_word: boolean;
lstrip: boolean;
rstrip: boolean;
normalized: boolean;
special: boolean;
};
export type PretokenizerElement = {
type: string;
decoders?: DecoderDecoder[];
add_prefix_space?: boolean;
trim_offsets?: boolean;
use_regex?: boolean;
individual_digits?: boolean;
};
export type DecoderDecoder = {
type: string;
pattern?: Pattern;
content?: string;
start?: number;
stop?: number;
};
export type Pattern = {
String: string;
};
export type Model = {
type: string;
dropout: null;
unk_token: any;
continuing_subword_prefix: null;
end_of_word_suffix: null;
fuse_unk: boolean;
byte_fallback: boolean;
vocab: { [key: string]: number };
merges: string[];
};
export type TopLevelNormalizer = {
type: string;
normalizers?: NormalizerElement[];
};
export type NormalizerElement = {
type: string;
prepend?: string;
pattern?: Pattern;
content?: string;
};
export type PostProcessor = {
type: string;
single: Pair[];
pair: Pair[];
special_tokens: { [key: string]: SpecialToken };
add_prefix_space?: boolean;
trim_offsets?: boolean;
use_regex?: boolean;
};
export type Pair = {
SpecialToken?: Sequence;
Sequence?: Sequence;
};
export type Sequence = {
id: string;
type_id: number;
};
export type SpecialToken = {
id: string;
ids: number[];
tokens: string[];
};
export type PreTokenizer = {
type: string;
pretokenizers?: PretokenizerElement[];
};
}
+3
-2
{
"name": "@lenml/tokenizers",
"version": "1.0.6",
"version": "1.0.7",
"description": "a lightweight no-dependency fork of transformers.js (only tokenizers)",

@@ -21,3 +21,4 @@ "source": "src/main.ts",

"dist/**.mjs*",
"dist/**.ts"
"dist/**.ts",
"src/**/*"
],

@@ -24,0 +25,0 @@ "keywords": [