@lenml/tokenizers - npm Package Compare versions

Comparing version

1.0.6

1.0.7

+3

src/main.ts

		export * from "./TokenizerLoader";
		export * as tokenizers from "./tokenizers/tokenizers";
		export * from "./types";

+81

src/TokenizerLoader.ts

		import {
		AutoTokenizer as _AutoTokenizer,
		PreTrainedTokenizer,
		} from "./tokenizers/tokenizers";
		import { NSTokenizerConfig, NSTokenizerJSON } from "./types";

		interface ITokenizerModelJsonData {
		tokenizerJSON: Partial<NSTokenizerJSON.Root>;
		tokenizerConfig: Partial<NSTokenizerConfig.Root>;
		}
		interface ITokenizerModelUrls {
		tokenizerJSON: string;
		tokenizerConfig: string;
		}

		export class TokenizerLoader {
		/**
		* Creates a pre-trained tokenizer from the provided model data.
		*
		* @param {ITokenizerModelJsonData} model - The model data containing the tokenizer JSON and configuration.
		* @return {PreTrainedTokenizer} pre-trained tokenizer.
		* @throws {Error} If the tokenizer JSON or configuration is missing.
		*/
		static fromPreTrained(model: ITokenizerModelJsonData): PreTrainedTokenizer {
		const { tokenizerJSON, tokenizerConfig } = model;
		if (!tokenizerJSON) {
		throw new Error("tokenizerJSON is required.");
		}
		if (!tokenizerConfig) {
		throw new Error("tokenizerConfig is required.");
		}
		// Some tokenizers are saved with the "Fast" suffix, so we remove that if present.
		const tokenizerName =
		tokenizerConfig.tokenizer_class?.replace(/Fast$/, "") ??
		"PreTrainedTokenizer";

		let cls = (_AutoTokenizer as any).TOKENIZER_CLASS_MAPPING[tokenizerName];
		if (!cls) {
		console.warn(
		`Unknown tokenizer class "${tokenizerName}", attempting to construct from base class.`
		);
		cls = PreTrainedTokenizer;
		}
		return new cls(tokenizerJSON, tokenizerConfig);
		}

		/**
		* Creates a pre-trained tokenizer from the provided model URLs.
		*
		* @param {ITokenizerModelUrls} model - The model URLs containing the tokenizer JSON and configuration.
		* @param {Object} [options] - Optional parameters.
		* @param {any} [options.fetch] - The fetch function to use for making HTTP requests. Defaults to global.fetch.
		* @param {Partial<ITokenizerModelJsonData>} [options.tokenizerJSON] - Additional tokenizer JSON data to merge with the fetched data.
		* @param {Partial<ITokenizerModelJsonData>} [options.tokenizerConfig] - Additional tokenizer configuration data to merge with the fetched data.
		* @return {Promise<PreTrainedTokenizer>} A promise that resolves to the pre-trained tokenizer.
		*/
		static async fromPreTrainedUrls(
		model: ITokenizerModelUrls,
		options?: {
		fetch?: any;
		} & Partial<ITokenizerModelJsonData>
		) {
		const fetch =
		(options?.fetch as typeof global.fetch) ??
		globalThis.fetch.bind(globalThis);
		const [tokenizerJSON, tokenizerConfig] = await Promise.all([
		fetch(model.tokenizerJSON).then((res) => res.json()),
		fetch(model.tokenizerConfig).then((res) => res.json()),
		]);
		return TokenizerLoader.fromPreTrained({
		tokenizerJSON: {
		...tokenizerJSON,
		...options?.tokenizerJSON,
		},
		tokenizerConfig: {
		...tokenizerConfig,
		...options?.tokenizerConfig,
		},
		});
		}
		}

+10

src/tokenizers/readme.md

		# tokenizers.js

		this code fork from `https://github.com/huggingface/transformers`

		# What changes?
		- remove onnx dependencies
		- remove env dependencies (esm/hf_repo_downloader)

		# License
		Apache-2.0

src/tokenizers/tokenizers.js

Sorry, the diff of this file is too big to display

+177

src/tokenizers/utils/core.js

		/**
		* @file Core utility functions/classes for Transformers.js.
		*
		* These are only used internally, meaning an end-user shouldn't
		* need to access anything here.
		*
		* @module utils/core
		*/

		/**
		* Helper function to dispatch progress callbacks.
		*
		* @param {Function} progress_callback The progress callback function to dispatch.
		* @param {any} data The data to pass to the progress callback function.
		* @returns {void}
		* @private
		*/
		export function dispatchCallback(progress_callback, data) {
		if (progress_callback) progress_callback(data);
		}

		/**
		* Reverses the keys and values of an object.
		*
		* @param {Object} data The object to reverse.
		* @returns {Object} The reversed object.
		* @see https://ultimatecourses.com/blog/reverse-object-keys-and-values-in-javascript
		*/
		export function reverseDictionary(data) {
		// https://ultimatecourses.com/blog/reverse-object-keys-and-values-in-javascript
		return Object.fromEntries(
		Object.entries(data).map(([key, value]) => [value, key])
		);
		}

		/**
		* Escapes regular expression special characters from a string by replacing them with their escaped counterparts.
		*
		* @param {string} string The string to escape.
		* @returns {string} The escaped string.
		*/
		export function escapeRegExp(string) {
		return string.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&"); // $& means the whole matched string
		}

		/**
		* A base class for creating callable objects.
		*
		* @type {new () => {(...args: any[]): any, _call(...args: any[]): any}}
		*/
		export const Callable = /** @type {any} */ (
		class {
		/**
		* Creates a new instance of the Callable class.
		*/
		constructor() {
		/**
		* Creates a closure that delegates to a private method '_call' with the given arguments.
		* @type {any}
		* @param {...any} args Zero or more arguments to pass to the '_call' method.
		* @returns {*} The result of calling the '_call' method.
		*/
		let closure = function (...args) {
		return closure._call(...args);
		};
		return Object.setPrototypeOf(closure, new.target.prototype);
		}

		/**
		* This method should be implemented in subclasses to provide the
		* functionality of the callable object.
		*
		* @param {any[]} args
		* @throws {Error} If the subclass does not implement the `_call` method.
		*/
		_call(...args) {
		throw Error("Must implement _call method in subclass");
		}
		}
		);

		/**
		* Check if a value is a typed array.
		* @param {*} val The value to check.
		* @returns {boolean} True if the value is a `TypedArray`, false otherwise.
		*
		* Adapted from https://stackoverflow.com/a/71091338/13989043
		*/
		export function isTypedArray(val) {
		return val?.prototype?.__proto__?.constructor?.name === "TypedArray";
		}

		/**
		* Check if a value is an integer.
		* @param {*} x The value to check.
		* @returns {boolean} True if the value is a string, false otherwise.
		*/
		export function isIntegralNumber(x) {
		return Number.isInteger(x) \|\| typeof x === "bigint";
		}

		/**
		* Check if a value is exists.
		* @param {*} x The value to check.
		* @returns {boolean} True if the value exists, false otherwise.
		*/
		export function exists(x) {
		return x !== undefined && x !== null;
		}

		/**
		* Calculates the dimensions of a nested array.
		*
		* @param {any[]} arr The nested array to calculate dimensions for.
		* @returns {number[]} An array containing the dimensions of the input array.
		*/
		export function calculateDimensions(arr) {
		const dimensions = [];
		let current = arr;
		while (Array.isArray(current)) {
		dimensions.push(current.length);
		current = current[0];
		}
		return dimensions;
		}

		/**
		* Replicate python's .pop() method for objects.
		* @param {Object} obj The object to pop from.
		* @param {string} key The key to pop.
		* @param {*} defaultValue The default value to return if the key does not exist.
		* @returns {*} The value of the popped key.
		* @throws {Error} If the key does not exist and no default value is provided.
		*/
		export function pop(obj, key, defaultValue = undefined) {
		const value = obj[key];
		if (value !== undefined) {
		delete obj[key];
		return value;
		}
		if (defaultValue === undefined) {
		throw Error(`Key ${key} does not exist in object.`);
		}
		return defaultValue;
		}

		/**
		* Efficiently merge arrays, creating a new copy.
		* Adapted from https://stackoverflow.com/a/6768642/13989043
		* @param {Array[]} arrs Arrays to merge.
		* @returns {Array} The merged array.
		*/
		export function mergeArrays(...arrs) {
		return Array.prototype.concat.apply([], arrs);
		}

		/**
		* Compute the Cartesian product of given arrays
		* @param {...Array} a Arrays to compute the product
		* @returns {Array} Returns the computed Cartesian product as an array
		* @private
		*/
		export function product(...a) {
		// Cartesian product of items
		// Adapted from https://stackoverflow.com/a/43053803
		return a.reduce((a, b) => a.flatMap((d) => b.map((e) => [d, e])));
		}

		/**
		* Calculates the index offset for a given index and window size.
		* @param {number} i The index.
		* @param {number} w The window size.
		* @returns {number} The index offset.
		*/
		export function calculateReflectOffset(i, w) {
		return Math.abs(((i + w) % (2 * w)) - w);
		}

+420

src/tokenizers/utils/data-structures.js

		/**
		* @file Custom data structures.
		*
		* These are only used internally, meaning an end-user shouldn't
		* need to access anything here.
		*
		* @module utils/data-structures
		*/

		/**
		* Efficient Heap-based Implementation of a Priority Queue.
		* It uses an array-based binary heap, where the root is at index `0`, and the
		* children of node `i` are located at indices `2i + 1` and `2i + 2`, respectively.
		*
		* Adapted from the following sources:
		* - https://stackoverflow.com/a/42919752/13989043 (original)
		* - https://github.com/belladoreai/llama-tokenizer-js (minor improvements)
		*/
		export class PriorityQueue {
		/**
		* Create a new PriorityQueue.
		* @param {Function} comparator Comparator function to determine priority. Defaults to a MaxHeap.
		*/
		constructor(comparator = (a, b) => a > b) {
		this._heap = [];
		this._comparator = comparator;
		}

		/**
		* The size of the queue
		*/
		get size() {
		return this._heap.length;
		}

		/**
		* Check if the queue is empty.
		* @returns {boolean} `true` if the queue is empty, `false` otherwise.
		*/
		isEmpty() {
		return this.size === 0;
		}

		/**
		* Return the element with the highest priority in the queue.
		* @returns {any} The highest priority element in the queue.
		*/
		peek() {
		return this._heap[0];
		}

		/**
		* Add one or more elements to the queue.
		* @param {...any} values The values to push into the queue.
		* @returns {number} The new size of the queue.
		*/
		push(...values) {
		return this.extend(values);
		}

		/**
		* Add multiple elements to the queue.
		* @param {any[]} values The values to push into the queue.
		* @returns {number} The new size of the queue.
		*/
		extend(values) {
		for (const value of values) {
		this._heap.push(value);
		this._siftUp();
		}
		return this.size;
		}

		/**
		* Remove and return the element with the highest priority in the queue.
		* @returns {any} The element with the highest priority in the queue.
		*/
		pop() {
		const poppedValue = this.peek();
		const bottom = this.size - 1;
		if (bottom > 0) {
		this._swap(0, bottom);
		}
		this._heap.pop();
		this._siftDown();
		return poppedValue;
		}

		/**
		* Replace the element with the highest priority in the queue with a new value.
		* @param {*} value The new value.
		* @returns {*} The replaced value.
		*/
		replace(value) {
		const replacedValue = this.peek();
		this._heap[0] = value;
		this._siftDown();
		return replacedValue;
		}

		/**
		* Compute the index for the parent of the node at index `i`.
		* @param {number} i The index of the node to get the parent of.
		* @returns {number} The index of the parent node.
		* @private
		*/
		_parent(i) {
		return ((i + 1) >>> 1) - 1;
		}

		/**
		* Compute the index for the left child of the node at index `i`.
		* @param {number} i The index of the node to get the left child of.
		* @returns {number} The index of the left child.
		* @private
		*/
		_left(i) {
		return (i << 1) + 1;
		}

		/**
		* Compute the index for the right child of the node at index `i`.
		* @param {number} i The index of the node to get the right child of.
		* @returns {number} The index of the right child.
		* @private
		*/
		_right(i) {
		return (i + 1) << 1;
		}

		/**
		* Check if the element at index `i` is greater than the element at index `j`.
		* @param {number} i The index of the first element to compare.
		* @param {number} j The index of the second element to compare.
		* @returns {boolean} `true` if the element at index `i` is greater than the element at index `j`, `false` otherwise.
		* @private
		*/
		_greater(i, j) {
		return this._comparator(this._heap[i], this._heap[j]);
		}

		/**
		* Swap the elements at indices `i` and `j`.
		* @param {number} i The index of the first element to swap.
		* @param {number} j The index of the second element to swap.
		* @private
		*/
		_swap(i, j) {
		const temp = this._heap[i];
		this._heap[i] = this._heap[j];
		this._heap[j] = temp;
		}

		/**
		* Maintain the heap property by updating positions in the heap,
		* starting at the last element and moving up the heap.
		* @private
		*/
		_siftUp() {
		let node = this.size - 1;
		while (node > 0 && this._greater(node, this._parent(node))) {
		this._swap(node, this._parent(node));
		node = this._parent(node);
		}
		}
		/**
		* Maintain the heap property by updating positions in the heap,
		* starting at the first element and moving down the heap.
		* @private
		*/
		_siftDown() {
		let node = 0;
		while (
		(this._left(node) < this.size && this._greater(this._left(node), node)) \|\|
		(this._right(node) < this.size && this._greater(this._right(node), node))
		) {
		const maxChild =
		this._right(node) < this.size &&
		this._greater(this._right(node), this._left(node))
		? this._right(node)
		: this._left(node);
		this._swap(node, maxChild);
		node = maxChild;
		}
		}
		}

		/**
		* A trie structure to efficiently store and search for strings.
		*/
		export class CharTrie {
		constructor() {
		this.root = CharTrieNode.default();
		}

		/**
		* Adds one or more `texts` to the trie.
		* @param {string[]} texts The strings to add to the trie.
		*/
		extend(texts) {
		for (let text of texts) {
		this.push(text);
		}
		}

		/**
		* Adds text to the trie.
		* @param {string} text The string to add to the trie.
		*/
		push(text) {
		let node = this.root;
		for (let ch of text) {
		let child = node.children.get(ch);
		if (child === undefined) {
		child = CharTrieNode.default();
		node.children.set(ch, child);
		}
		node = child;
		}
		node.isLeaf = true;
		}

		/**
		* Searches the trie for all strings with a common prefix of `text`.
		* @param {string} text The common prefix to search for.
		* @yields {string} Each string in the trie that has `text` as a prefix.
		*/
		*commonPrefixSearch(text) {
		let node = this.root;
		let prefix = "";
		for (let i = 0; i < text.length && node !== undefined; ++i) {
		const ch = text[i];
		prefix += ch;
		node = node.children.get(ch);
		if (node !== undefined && node.isLeaf) {
		yield prefix;
		}
		}
		}
		}

		/**
		* Represents a node in a character trie.
		*/
		class CharTrieNode {
		/**
		* Create a new CharTrieNode.
		* @param {boolean} isLeaf Whether the node is a leaf node or not.
		* @param {Map<string, CharTrieNode>} children A map containing the node's children, where the key is a character and the value is a `CharTrieNode`.
		*/
		constructor(isLeaf, children) {
		this.isLeaf = isLeaf;
		this.children = children;
		}

		/**
		* Returns a new `CharTrieNode` instance with default values.
		* @returns {CharTrieNode} A new `CharTrieNode` instance with `isLeaf` set to `false` and an empty `children` map.
		*/
		static default() {
		return new CharTrieNode(false, new Map());
		}
		}

		/**
		* A lattice data structure to be used for tokenization.
		*/
		export class TokenLattice {
		/**
		* Creates a new TokenLattice instance.
		*
		* @param {string} sentence The input sentence to be tokenized.
		* @param {number} bosTokenId The beginning-of-sequence token ID.
		* @param {number} eosTokenId The end-of-sequence token ID.
		*/
		constructor(sentence, bosTokenId, eosTokenId) {
		this.sentence = sentence;
		this.len = sentence.length;
		this.bosTokenId = bosTokenId;
		this.eosTokenId = eosTokenId;
		this.nodes = [];
		this.beginNodes = Array.from({ length: this.len + 1 }, () => []);
		this.endNodes = Array.from({ length: this.len + 1 }, () => []);

		const bos = new TokenLatticeNode(this.bosTokenId, 0, 0, 0, 0.0);
		const eos = new TokenLatticeNode(this.eosTokenId, 1, this.len, 0, 0.0);
		this.nodes.push(bos.clone());
		this.nodes.push(eos.clone());
		this.beginNodes[this.len].push(eos);
		this.endNodes[0].push(bos);
		}

		/**
		* Inserts a new token node into the token lattice.
		*
		* @param {number} pos The starting position of the token.
		* @param {number} length The length of the token.
		* @param {number} score The score of the token.
		* @param {number} tokenId The token ID of the token.
		*/
		insert(pos, length, score, tokenId) {
		const nodeId = this.nodes.length;
		const node = new TokenLatticeNode(tokenId, nodeId, pos, length, score);
		this.beginNodes[pos].push(node);
		this.endNodes[pos + length].push(node);
		this.nodes.push(node);
		}

		/**
		* Implements the Viterbi algorithm to compute the most likely sequence of tokens.
		*
		* @returns {TokenLatticeNode[]} The array of nodes representing the most likely sequence of tokens.
		*/
		viterbi() {
		const len = this.len;
		let pos = 0;
		while (pos <= len) {
		if (this.beginNodes[pos].length == 0) {
		return [];
		}
		for (let rnode of this.beginNodes[pos]) {
		rnode.prev = null;
		let bestScore = 0.0;
		let bestNode = null;
		for (let lnode of this.endNodes[pos]) {
		const score = lnode.backtraceScore + rnode.score;
		if (bestNode === null \|\| score > bestScore) {
		bestNode = lnode.clone();
		bestScore = score;
		}
		}

		if (bestNode !== null) {
		rnode.prev = bestNode;
		rnode.backtraceScore = bestScore;
		} else {
		return [];
		}
		}
		++pos;
		}

		const results = [];
		const root = this.beginNodes[len][0];
		const prev = root.prev;
		if (prev === null) {
		return [];
		}

		let node = prev.clone();
		while (node.prev !== null) {
		results.push(node.clone());
		const n = node.clone();
		node = n.prev.clone();
		}

		results.reverse();
		return results;
		}

		/**
		* @param {TokenLatticeNode} node
		* @returns {string} The array of nodes representing the most likely sequence of tokens.
		*/
		piece(node) {
		return this.sentence.slice(node.pos, node.pos + node.length);
		}

		/**
		* @returns {Array} The array of nodes representing the most likely sequence of tokens.
		*/
		tokens() {
		const nodes = this.viterbi();
		return nodes.map((x) => this.piece(x));
		}

		/**
		* @returns {Array} The array of nodes representing the most likely sequence of tokens.
		*/
		tokenIds() {
		const nodes = this.viterbi();
		return nodes.map((x) => x.tokenId);
		}
		}
		class TokenLatticeNode {
		/**
		* Represents a node in a token lattice for a given sentence.
		* @param {number} tokenId The ID of the token associated with this node.
		* @param {number} nodeId The ID of this node.
		* @param {number} pos The starting position of the token in the sentence.
		* @param {number} length The length of the token.
		* @param {number} score The score associated with the token.
		*/
		constructor(tokenId, nodeId, pos, length, score) {
		this.tokenId = tokenId;
		this.nodeId = nodeId;
		this.pos = pos;
		this.length = length;
		this.score = score;
		this.prev = null;
		this.backtraceScore = 0.0;
		}

		/**
		* Returns a clone of this node.
		* @returns {TokenLatticeNode} A clone of this node.
		*/
		clone() {
		const n = new TokenLatticeNode(
		this.tokenId,
		this.nodeId,
		this.pos,
		this.length,
		this.score
		);
		n.prev = this.prev;
		n.backtraceScore = this.backtraceScore;
		return n;
		}
		}

+66

src/tokenizers/utils/hub.js

		/**
		*
		* Retrieves a file from either a remote URL using the Fetch API or from the local file system using the FileSystem API.
		* If the filesystem is available and `env.useCache = true`, the file will be downloaded and cached.
		*
		* @param {string} path_or_repo_id This can be either:
		* - a string, the model id of a model repo on huggingface.co.
		* - a path to a directory potentially containing the file.
		* @param {string} filename The name of the file to locate in `path_or_repo`.
		* @param {boolean} [fatal=true] Whether to throw an error if the file is not found.
		* @param {PretrainedOptions} [options] An object containing optional parameters.
		*
		* @throws Will throw an error if the file is not found and `fatal` is true.
		* @returns {Promise} A Promise that resolves with the file content as a buffer.
		*/
		export async function getModelFile(
		path_or_repo_id,
		filename,
		fatal = true,
		options = {}
		) {
		if (path_or_repo_id.startsWith("http")) {
		return fetch(path_or_repo_id + filename).then((response) => {
		if (!response.ok) {
		if (fatal) {
		throw new Error(`File not found at ${path_or_repo_id}${filename}`);
		} else {
		return null;
		}
		}
		return response.arrayBuffer();
		});
		} else {
		throw new Error(
		"Filesystem not supported, please implement your own file reading logic."
		);
		}
		}

		/**
		* Fetches a JSON file from a given path and file name.
		*
		* @param {string} modelPath The path to the directory containing the file.
		* @param {string} fileName The name of the file to fetch.
		* @param {boolean} [fatal=true] Whether to throw an error if the file is not found.
		* @param {PretrainedOptions} [options] An object containing optional parameters.
		* @returns {Promise<Object>} The JSON data parsed into a JavaScript object.
		* @throws Will throw an error if the file is not found and `fatal` is true.
		*/
		export async function getModelJSON(
		modelPath,
		fileName,
		fatal = true,
		options = {}
		) {
		let buffer = await getModelFile(modelPath, fileName, fatal, options);
		if (buffer === null) {
		// Return empty object
		return {};
		}

		let decoder = new TextDecoder("utf-8");
		let jsonData = decoder.decode(buffer);

		return JSON.parse(jsonData);
		}

+972

src/tokenizers/utils/maths.js

		/**
		* @file Helper module for mathematical processing.
		*
		* These functions and classes are only used internally,
		* meaning an end-user shouldn't need to access anything here.
		*
		* @module utils/maths
		*/

		/**
		* @typedef {Int8Array \| Uint8Array \| Uint8ClampedArray \| Int16Array \| Uint16Array \| Int32Array \| Uint32Array \| Float32Array \| Float64Array} TypedArray
		* @typedef {BigInt64Array \| BigUint64Array} BigTypedArray
		* @typedef {TypedArray \| BigTypedArray} AnyTypedArray
		*/

		/**
		* @param {TypedArray} input
		*/
		export function interpolate_data(
		input,
		[in_channels, in_height, in_width],
		[out_height, out_width],
		mode = "bilinear",
		align_corners = false
		) {
		// TODO use mode and align_corners

		// Output image dimensions
		const x_scale = out_width / in_width;
		const y_scale = out_height / in_height;

		// Output image
		// @ts-ignore
		const out_img = new input.constructor(out_height * out_width * in_channels);

		// Pre-calculate strides
		const inStride = in_height * in_width;
		const outStride = out_height * out_width;

		for (let i = 0; i < out_height; ++i) {
		for (let j = 0; j < out_width; ++j) {
		// Calculate output offset
		const outOffset = i * out_width + j;

		// Calculate input pixel coordinates
		const x = (j + 0.5) / x_scale - 0.5;
		const y = (i + 0.5) / y_scale - 0.5;

		// Calculate the four nearest input pixels
		// We also check if the input pixel coordinates are within the image bounds
		let x1 = Math.floor(x);
		let y1 = Math.floor(y);
		const x2 = Math.min(x1 + 1, in_width - 1);
		const y2 = Math.min(y1 + 1, in_height - 1);

		x1 = Math.max(x1, 0);
		y1 = Math.max(y1, 0);

		// Calculate the fractional distances between the input pixel and the four nearest pixels
		const s = x - x1;
		const t = y - y1;

		// Perform bilinear interpolation
		const w1 = (1 - s) * (1 - t);
		const w2 = s * (1 - t);
		const w3 = (1 - s) * t;
		const w4 = s * t;

		// Calculate the four nearest input pixel indices
		const yStride = y1 * in_width;
		const xStride = y2 * in_width;
		const idx1 = yStride + x1;
		const idx2 = yStride + x2;
		const idx3 = xStride + x1;
		const idx4 = xStride + x2;

		for (let k = 0; k < in_channels; ++k) {
		// Calculate channel offset
		const cOffset = k * inStride;

		out_img[k * outStride + outOffset] =
		w1 * input[cOffset + idx1] +
		w2 * input[cOffset + idx2] +
		w3 * input[cOffset + idx3] +
		w4 * input[cOffset + idx4];
		}
		}
		}

		return out_img;
		}

		/**
		* Helper method to permute a `AnyTypedArray` directly
		* @template {AnyTypedArray} T
		* @param {T} array
		* @param {number[]} dims
		* @param {number[]} axes
		* @returns {[T, number[]]} The permuted array and the new shape.
		*/
		export function permute_data(array, dims, axes) {
		// Calculate the new shape of the permuted array
		// and the stride of the original array
		const shape = new Array(axes.length);
		const stride = new Array(axes.length);

		for (let i = axes.length - 1, s = 1; i >= 0; --i) {
		stride[i] = s;
		shape[i] = dims[axes[i]];
		s *= shape[i];
		}

		// Precompute inverse mapping of stride
		const invStride = axes.map((_, i) => stride[axes.indexOf(i)]);

		// Create the permuted array with the new shape
		// @ts-ignore
		const permutedData = new array.constructor(array.length);

		// Permute the original array to the new array
		for (let i = 0; i < array.length; ++i) {
		let newIndex = 0;
		for (let j = dims.length - 1, k = i; j >= 0; --j) {
		newIndex += (k % dims[j]) * invStride[j];
		k = Math.floor(k / dims[j]);
		}
		permutedData[newIndex] = array[i];
		}

		return [permutedData, shape];
		}

		/**
		* Compute the softmax of an array of numbers.
		* @template {TypedArray\|number[]} T
		* @param {T} arr The array of numbers to compute the softmax of.
		* @returns {T} The softmax array.
		*/
		export function softmax(arr) {
		// Compute the maximum value in the array
		const maxVal = max(arr)[0];

		// Compute the exponentials of the array values
		const exps = arr.map((x) => Math.exp(x - maxVal));

		// Compute the sum of the exponentials
		// @ts-ignore
		const sumExps = exps.reduce((acc, val) => acc + val, 0);

		// Compute the softmax values
		const softmaxArr = exps.map((x) => x / sumExps);

		return /** @type {T} */ (softmaxArr);
		}

		/**
		* Calculates the logarithm of the softmax function for the input array.
		* @template {TypedArray\|number[]} T
		* @param {T} arr The input array to calculate the log_softmax function for.
		* @returns {T} The resulting log_softmax array.
		*/
		export function log_softmax(arr) {
		// Compute the softmax values
		const softmaxArr = softmax(arr);

		// Apply log formula to each element
		const logSoftmaxArr = softmaxArr.map((x) => Math.log(x));

		return /** @type {T} */ (logSoftmaxArr);
		}

		/**
		* Calculates the dot product of two arrays.
		* @param {number[]} arr1 The first array.
		* @param {number[]} arr2 The second array.
		* @returns {number} The dot product of arr1 and arr2.
		*/
		export function dot(arr1, arr2) {
		let result = 0;
		for (let i = 0; i < arr1.length; ++i) {
		result += arr1[i] * arr2[i];
		}
		return result;
		}

		/**
		* Get the top k items from an iterable, sorted by descending order
		* @param {any[]\|TypedArray} items The items to be sorted
		* @param {number\|null} [top_k=0] The number of top items to return (default: 0 = return all)
		* @returns {[number, any][]} The top k items, sorted by descending order
		*/
		export function getTopItems(items, top_k = 0) {
		// if top == 0, return all

		items = Array.from(items)
		.map((x, i) => [i, x]) // Get indices ([index, score])
		.sort((a, b) => b[1] - a[1]); // Sort by log probabilities

		if (top_k !== null && top_k > 0) {
		items = items.slice(0, top_k); // Get top k items
		}

		return items;
		}

		/**
		* Computes the cosine similarity between two arrays.
		*
		* @param {number[]} arr1 The first array.
		* @param {number[]} arr2 The second array.
		* @returns {number} The cosine similarity between the two arrays.
		*/
		export function cos_sim(arr1, arr2) {
		// Calculate dot product of the two arrays
		const dotProduct = dot(arr1, arr2);

		// Calculate the magnitude of the first array
		const magnitudeA = magnitude(arr1);

		// Calculate the magnitude of the second array
		const magnitudeB = magnitude(arr2);

		// Calculate the cosine similarity
		const cosineSimilarity = dotProduct / (magnitudeA * magnitudeB);

		return cosineSimilarity;
		}

		/**
		* Calculates the magnitude of a given array.
		* @param {number[]} arr The array to calculate the magnitude of.
		* @returns {number} The magnitude of the array.
		*/
		export function magnitude(arr) {
		return Math.sqrt(arr.reduce((acc, val) => acc + val * val, 0));
		}

		/**
		* Returns the value and index of the minimum element in an array.
		* @param {number[]\|TypedArray} arr array of numbers.
		* @returns {number[]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin]
		* @throws {Error} If array is empty.
		*/
		export function min(arr) {
		if (arr.length === 0) throw Error("Array must not be empty");
		let min = arr[0];
		let indexOfMin = 0;
		for (let i = 1; i < arr.length; ++i) {
		if (arr[i] < min) {
		min = arr[i];
		indexOfMin = i;
		}
		}
		return [min, indexOfMin];
		}

		/**
		* Returns the value and index of the maximum element in an array.
		* @param {number[]\|AnyTypedArray} arr array of numbers.
		* @returns {[number, number]} the value and index of the maximum element, of the form: [valueOfMax, indexOfMax]
		* @throws {Error} If array is empty.
		*/
		export function max(arr) {
		if (arr.length === 0) throw Error("Array must not be empty");
		let max = arr[0];
		let indexOfMax = 0;
		for (let i = 1; i < arr.length; ++i) {
		if (arr[i] > max) {
		max = arr[i];
		indexOfMax = i;
		}
		}
		return [Number(max), indexOfMax];
		}

		function isPowerOfTwo(number) {
		// Check if the number is greater than 0 and has only one bit set to 1
		return number > 0 && (number & (number - 1)) === 0;
		}

		/**
		* Implementation of Radix-4 FFT.
		*
		* P2FFT class provides functionality for performing Fast Fourier Transform on arrays
		* which are a power of two in length.
		* Code adapted from https://www.npmjs.com/package/fft.js
		*/
		class P2FFT {
		/**
		* @param {number} size The size of the input array. Must be a power of two larger than 1.
		* @throws {Error} FFT size must be a power of two larger than 1.
		*/
		constructor(size) {
		this.size = size \| 0; // convert to a 32-bit signed integer
		if (this.size <= 1 \|\| !isPowerOfTwo(this.size))
		throw new Error("FFT size must be a power of two larger than 1");

		this._csize = size << 1;

		this.table = new Float64Array(this.size * 2);
		for (let i = 0; i < this.table.length; i += 2) {
		const angle = (Math.PI * i) / this.size;
		this.table[i] = Math.cos(angle);
		this.table[i + 1] = -Math.sin(angle);
		}

		// Find size's power of two
		let power = 0;
		for (let t = 1; this.size > t; t <<= 1) ++power;

		// Calculate initial step's width:
		// * If we are full radix-4, it is 2x smaller to give inital len=8
		// * Otherwise it is the same as `power` to give len=4
		this._width = power % 2 === 0 ? power - 1 : power;

		// Pre-compute bit-reversal patterns
		this._bitrev = new Int32Array(1 << this._width);
		for (let j = 0; j < this._bitrev.length; ++j) {
		this._bitrev[j] = 0;
		for (let shift = 0; shift < this._width; shift += 2) {
		const revShift = this._width - shift - 2;
		this._bitrev[j] \|= ((j >>> shift) & 3) << revShift;
		}
		}
		}

		/**
		* Create a complex number array with size `2 * size`
		*
		* @returns {Float64Array} A complex number array with size `2 * size`
		*/
		createComplexArray() {
		return new Float64Array(this._csize);
		}

		/**
		* Converts a complex number representation stored in a Float64Array to an array of real numbers.
		*
		* @param {Float64Array} complex The complex number representation to be converted.
		* @param {number[]} [storage] An optional array to store the result in.
		* @returns {number[]} An array of real numbers representing the input complex number representation.
		*/
		fromComplexArray(complex, storage) {
		const res = storage \|\| new Array(complex.length >>> 1);
		for (let i = 0; i < complex.length; i += 2) res[i >>> 1] = complex[i];
		return res;
		}

		/**
		* Convert a real-valued input array to a complex-valued output array.
		* @param {Float64Array} input The real-valued input array.
		* @param {Float64Array} [storage] Optional buffer to store the output array.
		* @returns {Float64Array} The complex-valued output array.
		*/
		toComplexArray(input, storage) {
		const res = storage \|\| this.createComplexArray();
		for (let i = 0; i < res.length; i += 2) {
		res[i] = input[i >>> 1];
		res[i + 1] = 0;
		}
		return res;
		}

		/**
		* Completes the spectrum by adding its mirrored negative frequency components.
		* @param {Float64Array} spectrum The input spectrum.
		* @returns {void}
		*/
		completeSpectrum(spectrum) {
		const size = this._csize;
		const half = size >>> 1;
		for (let i = 2; i < half; i += 2) {
		spectrum[size - i] = spectrum[i];
		spectrum[size - i + 1] = -spectrum[i + 1];
		}
		}

		/**
		* Performs a Fast Fourier Transform (FFT) on the given input data and stores the result in the output buffer.
		*
		* @param {Float64Array} out The output buffer to store the result.
		* @param {Float64Array} data The input data to transform.
		*
		* @throws {Error} Input and output buffers must be different.
		*
		* @returns {void}
		*/
		transform(out, data) {
		if (out === data)
		throw new Error("Input and output buffers must be different");

		this._transform4(out, data, 1 /* DONE */);
		}

		/**
		* Performs a real-valued forward FFT on the given input buffer and stores the result in the given output buffer.
		* The input buffer must contain real values only, while the output buffer will contain complex values. The input and
		* output buffers must be different.
		*
		* @param {Float64Array} out The output buffer.
		* @param {Float64Array} data The input buffer containing real values.
		*
		* @throws {Error} If the input and output buffers are the same.
		*/
		realTransform(out, data) {
		if (out === data)
		throw new Error("Input and output buffers must be different");

		this._realTransform4(out, data, 1 /* DONE */);
		}

		/**
		* Performs an inverse FFT transformation on the given `data` array, and stores the result in `out`.
		* The `out` array must be a different buffer than the `data` array. The `out` array will contain the
		* result of the transformation. The `data` array will not be modified.
		*
		* @param {Float64Array} out The output buffer for the transformed data.
		* @param {Float64Array} data The input data to transform.
		* @throws {Error} If `out` and `data` refer to the same buffer.
		* @returns {void}
		*/
		inverseTransform(out, data) {
		if (out === data)
		throw new Error("Input and output buffers must be different");

		this._transform4(out, data, -1 /* DONE */);
		for (let i = 0; i < out.length; ++i) out[i] /= this.size;
		}

		/**
		* Performs a radix-4 implementation of a discrete Fourier transform on a given set of data.
		*
		* @param {Float64Array} out The output buffer for the transformed data.
		* @param {Float64Array} data The input buffer of data to be transformed.
		* @param {number} inv A scaling factor to apply to the transform.
		* @returns {void}
		*/
		_transform4(out, data, inv) {
		// radix-4 implementation

		const size = this._csize;

		// Initial step (permute and transform)
		const width = this._width;
		let step = 1 << width;
		let len = (size / step) << 1;

		let outOff;
		let t;
		const bitrev = this._bitrev;
		if (len === 4) {
		for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) {
		const off = bitrev[t];
		this._singleTransform2(data, out, outOff, off, step);
		}
		} else {
		// len === 8
		for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) {
		const off = bitrev[t];
		this._singleTransform4(data, out, outOff, off, step, inv);
		}
		}

		// Loop through steps in decreasing order
		for (step >>= 2; step >= 2; step >>= 2) {
		len = (size / step) << 1;
		const quarterLen = len >>> 2;

		// Loop through offsets in the data
		for (outOff = 0; outOff < size; outOff += len) {
		// Full case
		const limit = outOff + quarterLen - 1;
		for (let i = outOff, k = 0; i < limit; i += 2, k += step) {
		const A = i;
		const B = A + quarterLen;
		const C = B + quarterLen;
		const D = C + quarterLen;

		// Original values
		const Ar = out[A];
		const Ai = out[A + 1];
		const Br = out[B];
		const Bi = out[B + 1];
		const Cr = out[C];
		const Ci = out[C + 1];
		const Dr = out[D];
		const Di = out[D + 1];

		const tableBr = this.table[k];
		const tableBi = inv * this.table[k + 1];
		const MBr = Br * tableBr - Bi * tableBi;
		const MBi = Br * tableBi + Bi * tableBr;

		const tableCr = this.table[2 * k];
		const tableCi = inv * this.table[2 * k + 1];
		const MCr = Cr * tableCr - Ci * tableCi;
		const MCi = Cr * tableCi + Ci * tableCr;

		const tableDr = this.table[3 * k];
		const tableDi = inv * this.table[3 * k + 1];
		const MDr = Dr * tableDr - Di * tableDi;
		const MDi = Dr * tableDi + Di * tableDr;

		// Pre-Final values
		const T0r = Ar + MCr;
		const T0i = Ai + MCi;
		const T1r = Ar - MCr;
		const T1i = Ai - MCi;
		const T2r = MBr + MDr;
		const T2i = MBi + MDi;
		const T3r = inv * (MBr - MDr);
		const T3i = inv * (MBi - MDi);

		// Final values
		out[A] = T0r + T2r;
		out[A + 1] = T0i + T2i;
		out[B] = T1r + T3i;
		out[B + 1] = T1i - T3r;
		out[C] = T0r - T2r;
		out[C + 1] = T0i - T2i;
		out[D] = T1r - T3i;
		out[D + 1] = T1i + T3r;
		}
		}
		}
		}

		/**
		* Performs a radix-2 implementation of a discrete Fourier transform on a given set of data.
		*
		* @param {Float64Array} data The input buffer of data to be transformed.
		* @param {Float64Array} out The output buffer for the transformed data.
		* @param {number} outOff The offset at which to write the output data.
		* @param {number} off The offset at which to begin reading the input data.
		* @param {number} step The step size for indexing the input data.
		* @returns {void}
		*/
		_singleTransform2(data, out, outOff, off, step) {
		// radix-2 implementation
		// NOTE: Only called for len=4

		const evenR = data[off];
		const evenI = data[off + 1];
		const oddR = data[off + step];
		const oddI = data[off + step + 1];

		out[outOff] = evenR + oddR;
		out[outOff + 1] = evenI + oddI;
		out[outOff + 2] = evenR - oddR;
		out[outOff + 3] = evenI - oddI;
		}

		/**
		* Performs radix-4 transformation on input data of length 8
		*
		* @param {Float64Array} data Input data array of length 8
		* @param {Float64Array} out Output data array of length 8
		* @param {number} outOff Index of output array to start writing from
		* @param {number} off Index of input array to start reading from
		* @param {number} step Step size between elements in input array
		* @param {number} inv Scaling factor for inverse transform
		*
		* @returns {void}
		*/
		_singleTransform4(data, out, outOff, off, step, inv) {
		// radix-4
		// NOTE: Only called for len=8
		const step2 = step * 2;
		const step3 = step * 3;

		// Original values
		const Ar = data[off];
		const Ai = data[off + 1];
		const Br = data[off + step];
		const Bi = data[off + step + 1];
		const Cr = data[off + step2];
		const Ci = data[off + step2 + 1];
		const Dr = data[off + step3];
		const Di = data[off + step3 + 1];

		// Pre-Final values
		const T0r = Ar + Cr;
		const T0i = Ai + Ci;
		const T1r = Ar - Cr;
		const T1i = Ai - Ci;
		const T2r = Br + Dr;
		const T2i = Bi + Di;
		const T3r = inv * (Br - Dr);
		const T3i = inv * (Bi - Di);

		// Final values
		out[outOff] = T0r + T2r;
		out[outOff + 1] = T0i + T2i;
		out[outOff + 2] = T1r + T3i;
		out[outOff + 3] = T1i - T3r;
		out[outOff + 4] = T0r - T2r;
		out[outOff + 5] = T0i - T2i;
		out[outOff + 6] = T1r - T3i;
		out[outOff + 7] = T1i + T3r;
		}

		/**
		* Real input radix-4 implementation
		* @param {Float64Array} out Output array for the transformed data
		* @param {Float64Array} data Input array of real data to be transformed
		* @param {number} inv The scale factor used to normalize the inverse transform
		*/
		_realTransform4(out, data, inv) {
		// Real input radix-4 implementation
		const size = this._csize;

		// Initial step (permute and transform)
		const width = this._width;
		let step = 1 << width;
		let len = (size / step) << 1;

		let outOff;
		let t;
		const bitrev = this._bitrev;
		if (len === 4) {
		for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) {
		const off = bitrev[t];
		this._singleRealTransform2(data, out, outOff, off >>> 1, step >>> 1);
		}
		} else {
		// len === 8
		for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) {
		const off = bitrev[t];
		this._singleRealTransform4(
		data,
		out,
		outOff,
		off >>> 1,
		step >>> 1,
		inv
		);
		}
		}

		// TODO: Optimize once https://github.com/indutny/fft.js/issues/25 is fixed
		// Loop through steps in decreasing order
		for (step >>= 2; step >= 2; step >>= 2) {
		len = (size / step) << 1;
		const quarterLen = len >>> 2;

		// Loop through offsets in the data
		for (outOff = 0; outOff < size; outOff += len) {
		// Full case
		const limit = outOff + quarterLen - 1;
		for (let i = outOff, k = 0; i < limit; i += 2, k += step) {
		const A = i;
		const B = A + quarterLen;
		const C = B + quarterLen;
		const D = C + quarterLen;

		// Original values
		const Ar = out[A];
		const Ai = out[A + 1];
		const Br = out[B];
		const Bi = out[B + 1];
		const Cr = out[C];
		const Ci = out[C + 1];
		const Dr = out[D];
		const Di = out[D + 1];

		const tableBr = this.table[k];
		const tableBi = inv * this.table[k + 1];
		const MBr = Br * tableBr - Bi * tableBi;
		const MBi = Br * tableBi + Bi * tableBr;

		const tableCr = this.table[2 * k];
		const tableCi = inv * this.table[2 * k + 1];
		const MCr = Cr * tableCr - Ci * tableCi;
		const MCi = Cr * tableCi + Ci * tableCr;

		const tableDr = this.table[3 * k];
		const tableDi = inv * this.table[3 * k + 1];
		const MDr = Dr * tableDr - Di * tableDi;
		const MDi = Dr * tableDi + Di * tableDr;

		// Pre-Final values
		const T0r = Ar + MCr;
		const T0i = Ai + MCi;
		const T1r = Ar - MCr;
		const T1i = Ai - MCi;
		const T2r = MBr + MDr;
		const T2i = MBi + MDi;
		const T3r = inv * (MBr - MDr);
		const T3i = inv * (MBi - MDi);

		// Final values
		out[A] = T0r + T2r;
		out[A + 1] = T0i + T2i;
		out[B] = T1r + T3i;
		out[B + 1] = T1i - T3r;
		out[C] = T0r - T2r;
		out[C + 1] = T0i - T2i;
		out[D] = T1r - T3i;
		out[D + 1] = T1i + T3r;
		}
		}
		}
		}

		/**
		* Performs a single real input radix-2 transformation on the provided data
		*
		* @param {Float64Array} data The input data array
		* @param {Float64Array} out The output data array
		* @param {number} outOff The output offset
		* @param {number} off The input offset
		* @param {number} step The step
		*
		* @returns {void}
		*/
		_singleRealTransform2(data, out, outOff, off, step) {
		// radix-2 implementation
		// NOTE: Only called for len=4

		const evenR = data[off];
		const oddR = data[off + step];

		out[outOff] = evenR + oddR;
		out[outOff + 1] = 0;
		out[outOff + 2] = evenR - oddR;
		out[outOff + 3] = 0;
		}

		/**
		* Computes a single real-valued transform using radix-4 algorithm.
		* This method is only called for len=8.
		*
		* @param {Float64Array} data The input data array.
		* @param {Float64Array} out The output data array.
		* @param {number} outOff The offset into the output array.
		* @param {number} off The offset into the input array.
		* @param {number} step The step size for the input array.
		* @param {number} inv The value of inverse.
		*/
		_singleRealTransform4(data, out, outOff, off, step, inv) {
		// radix-4
		// NOTE: Only called for len=8
		const step2 = step * 2;
		const step3 = step * 3;

		// Original values
		const Ar = data[off];
		const Br = data[off + step];
		const Cr = data[off + step2];
		const Dr = data[off + step3];

		// Pre-Final values
		const T0r = Ar + Cr;
		const T1r = Ar - Cr;
		const T2r = Br + Dr;
		const T3r = inv * (Br - Dr);

		// Final values
		out[outOff] = T0r + T2r;
		out[outOff + 1] = 0;
		out[outOff + 2] = T1r;
		out[outOff + 3] = -T3r;
		out[outOff + 4] = T0r - T2r;
		out[outOff + 5] = 0;
		out[outOff + 6] = T1r;
		out[outOff + 7] = T3r;
		}
		}

		/**
		* NP2FFT class provides functionality for performing Fast Fourier Transform on arrays
		* which are not a power of two in length. In such cases, the chirp-z transform is used.
		*
		* For more information, see: https://math.stackexchange.com/questions/77118/non-power-of-2-ffts/77156#77156
		*/
		class NP2FFT {
		/**
		* Constructs a new NP2FFT object.
		* @param {number} fft_length The length of the FFT
		*/
		constructor(fft_length) {
		// Helper variables
		const a = 2 * (fft_length - 1);
		const b = 2 * (2 * fft_length - 1);
		const nextP2 = 2 ** Math.ceil(Math.log2(b));
		this.bufferSize = nextP2;
		this._a = a;

		// Define buffers
		// Compute chirp for transform
		const chirp = new Float64Array(b);
		const ichirp = new Float64Array(nextP2);
		this._chirpBuffer = new Float64Array(nextP2);
		this._buffer1 = new Float64Array(nextP2);
		this._buffer2 = new Float64Array(nextP2);
		this._outBuffer1 = new Float64Array(nextP2);
		this._outBuffer2 = new Float64Array(nextP2);

		// Compute complex exponentiation
		const theta = (-2 * Math.PI) / fft_length;
		const baseR = Math.cos(theta);
		const baseI = Math.sin(theta);

		// Precompute helper for chirp-z transform
		for (let i = 0; i < b >> 1; ++i) {
		// Compute complex power:
		const e = (i + 1 - fft_length) ** 2 / 2.0;

		// Compute the modulus and argument of the result
		const result_mod = Math.sqrt(baseR 2 + baseI 2) ** e;
		const result_arg = e * Math.atan2(baseI, baseR);

		// Convert the result back to rectangular form
		// and assign to chirp and ichirp
		const i2 = 2 * i;
		chirp[i2] = result_mod * Math.cos(result_arg);
		chirp[i2 + 1] = result_mod * Math.sin(result_arg);

		// conjugate
		ichirp[i2] = chirp[i2];
		ichirp[i2 + 1] = -chirp[i2 + 1];
		}
		this._slicedChirpBuffer = chirp.subarray(a, b);

		// create object to perform Fast Fourier Transforms
		// with `nextP2` complex numbers
		this._f = new P2FFT(nextP2 >> 1);
		this._f.transform(this._chirpBuffer, ichirp);
		}

		_transform(output, input, real) {
		const ib1 = this._buffer1;
		const ib2 = this._buffer2;
		const ob2 = this._outBuffer1;
		const ob3 = this._outBuffer2;
		const cb = this._chirpBuffer;
		const sb = this._slicedChirpBuffer;
		const a = this._a;

		if (real) {
		// Real multiplication
		for (let j = 0; j < sb.length; j += 2) {
		const j2 = j + 1;
		const j3 = j >> 1;

		const a_real = input[j3];
		ib1[j] = a_real * sb[j];
		ib1[j2] = a_real * sb[j2];
		}
		} else {
		// Complex multiplication
		for (let j = 0; j < sb.length; j += 2) {
		const j2 = j + 1;
		ib1[j] = input[j] * sb[j] - input[j2] * sb[j2];
		ib1[j2] = input[j] * sb[j2] + input[j2] * sb[j];
		}
		}
		this._f.transform(ob2, ib1);

		for (let j = 0; j < cb.length; j += 2) {
		const j2 = j + 1;

		ib2[j] = ob2[j] * cb[j] - ob2[j2] * cb[j2];
		ib2[j2] = ob2[j] * cb[j2] + ob2[j2] * cb[j];
		}
		this._f.inverseTransform(ob3, ib2);

		for (let j = 0; j < ob3.length; j += 2) {
		const a_real = ob3[j + a];
		const a_imag = ob3[j + a + 1];
		const b_real = sb[j];
		const b_imag = sb[j + 1];

		output[j] = a_real * b_real - a_imag * b_imag;
		output[j + 1] = a_real * b_imag + a_imag * b_real;
		}
		}

		transform(output, input) {
		this._transform(output, input, false);
		}

		realTransform(output, input) {
		this._transform(output, input, true);
		}
		}

		export class FFT {
		constructor(fft_length) {
		this.fft_length = fft_length;
		this.isPowerOfTwo = isPowerOfTwo(fft_length);
		if (this.isPowerOfTwo) {
		this.fft = new P2FFT(fft_length);
		this.outputBufferSize = 2 * fft_length;
		} else {
		this.fft = new NP2FFT(fft_length);
		this.outputBufferSize = this.fft.bufferSize;
		}
		}

		realTransform(out, input) {
		this.fft.realTransform(out, input);
		}

		transform(out, input) {
		this.fft.transform(out, input);
		}
		}

		/**
		* Performs median filter on the provided data. Padding is done by mirroring the data.
		* @param {AnyTypedArray} data The input array
		* @param {number} windowSize The window size
		*/
		export function medianFilter(data, windowSize) {
		if (windowSize % 2 === 0 \|\| windowSize <= 0) {
		throw new Error("Window size must be a positive odd number");
		}

		// @ts-ignore
		const outputArray = new data.constructor(data.length);

		// @ts-ignore
		const buffer = new data.constructor(windowSize); // Reusable array for storing values

		const halfWindowSize = Math.floor(windowSize / 2);

		for (let i = 0; i < data.length; ++i) {
		let valuesIndex = 0;

		for (let j = -halfWindowSize; j <= halfWindowSize; ++j) {
		let index = i + j;
		if (index < 0) {
		index = Math.abs(index);
		} else if (index >= data.length) {
		index = 2 * (data.length - 1) - index;
		}

		buffer[valuesIndex++] = data[index];
		}

		buffer.sort();
		outputArray[i] = buffer[halfWindowSize];
		}

		return outputArray;
		}

		/**
		* Helper function to round a number to a given number of decimals
		* @param {number} num The number to round
		* @param {number} decimals The number of decimals
		* @returns {number} The rounded number
		*/
		export function round(num, decimals) {
		const pow = Math.pow(10, decimals);
		return Math.round(num * pow) / pow;
		}

		/**
		* Helper function to round a number to the nearest integer, with ties rounded to the nearest even number.
		* Also known as "bankers' rounding". This is the default rounding mode in python. For example:
		* 1.5 rounds to 2 and 2.5 rounds to 2.
		*
		* @param {number} x The number to round
		* @returns {number} The rounded number
		*/
		export function bankers_round(x) {
		const r = Math.round(x);
		const br = Math.abs(x) % 1 === 0.5 ? (r % 2 === 0 ? r : r - 1) : r;
		return br;
		}

+1277

src/tokenizers/utils/tensor.js

		/**
		* @file Helper module for `Tensor` processing.
		*
		* These functions and classes are only used internally,
		* meaning an end-user shouldn't need to access anything here.
		*
		* @module utils/tensor
		*/

		import { interpolate_data, permute_data } from "./maths.js";

		const DataTypeMap = Object.freeze({
		float32: Float32Array,
		float64: Float64Array,
		string: Array, // string[]
		int8: Int8Array,
		uint8: Uint8Array,
		int16: Int16Array,
		uint16: Uint16Array,
		int32: Int32Array,
		uint32: Uint32Array,
		int64: BigInt64Array,
		uint64: BigUint64Array,
		bool: Uint8Array,
		});

		/**
		* @typedef {keyof typeof DataTypeMap} DataType
		* @typedef {import('./maths.js').AnyTypedArray \| any[]} DataArray
		*/

		// NOTE: Just to facilitate git merge, this class is not used ye
		const ONNXTensor = class {};

		export class Tensor {
		/** @type {number[]} Dimensions of the tensor. */
		dims;

		/** @type {DataType} Type of the tensor. */
		type;

		/** @type {DataArray} The data stored in the tensor. */
		data;

		/** @type {number} The number of elements in the tensor. */
		size;

		/**
		* Create a new Tensor or copy an existing Tensor.
		* @param {[DataType, DataArray, number[]]\|[import('onnxruntime-common').Tensor]} args
		*/
		constructor(...args) {
		if (args[0] instanceof ONNXTensor) {
		// Create shallow copy
		Object.assign(this, args[0]);
		} else {
		// Create new tensor
		Object.assign(
		this,
		new ONNXTensor(
		/** @type {DataType} */ (args[0]),
		/** @type {Exclude<import('./maths.js').AnyTypedArray, Uint8ClampedArray>} */ (
		args[1]
		),
		args[2]
		)
		);
		}

		return new Proxy(this, {
		get: (obj, key) => {
		if (typeof key === "string") {
		let index = Number(key);
		if (Number.isInteger(index)) {
		// key is an integer (i.e., index)
		return obj._getitem(index);
		}
		}
		// @ts-ignore
		return obj[key];
		},
		set: (obj, key, value) => {
		// TODO allow setting of data

		// @ts-ignore
		return (obj[key] = value);
		},
		});
		}

		/**
		* Returns an iterator object for iterating over the tensor data in row-major order.
		* If the tensor has more than one dimension, the iterator will yield subarrays.
		* @returns {Iterator} An iterator object for iterating over the tensor data in row-major order.
		*/
		*[Symbol.iterator]() {
		const [iterLength, ...iterDims] = this.dims;

		if (iterDims.length > 0) {
		const iterSize = iterDims.reduce((a, b) => a * b);
		for (let i = 0; i < iterLength; ++i) {
		yield this._subarray(i, iterSize, iterDims);
		}
		} else {
		yield* this.data;
		}
		}

		/**
		* Index into a Tensor object.
		* @param {number} index The index to access.
		* @returns {Tensor} The data at the specified index.
		*/
		_getitem(index) {
		const [iterLength, ...iterDims] = this.dims;

		index = safeIndex(index, iterLength);

		if (iterDims.length > 0) {
		const iterSize = iterDims.reduce((a, b) => a * b);
		return this._subarray(index, iterSize, iterDims);
		} else {
		return new Tensor(this.type, [this.data[index]], iterDims);
		}
		}

		/**
		* @param {number\|bigint} item The item to search for in the tensor
		* @returns {number} The index of the first occurrence of item in the tensor data.
		*/
		indexOf(item) {
		for (let index = 0; index < this.data.length; ++index) {
		// Note: == instead of === so we can match Ints with BigInts
		if (this.data[index] == item) {
		return index;
		}
		}
		return -1;
		}

		/**
		* @param {number} index
		* @param {number} iterSize
		* @param {any} iterDims
		* @returns {Tensor}
		*/
		_subarray(index, iterSize, iterDims) {
		const o1 = index * iterSize;
		const o2 = (index + 1) * iterSize;

		// We use subarray if available (typed array), otherwise we use slice (normal array)
		const data =
		"subarray" in this.data
		? this.data.subarray(o1, o2)
		: this.data.slice(o1, o2);
		return new Tensor(this.type, data, iterDims);
		}

		/**
		* Returns the value of this tensor as a standard JavaScript Number. This only works
		* for tensors with one element. For other cases, see `Tensor.tolist()`.
		* @returns {number\|bigint} The value of this tensor as a standard JavaScript Number.
		* @throws {Error} If the tensor has more than one element.
		*/
		item() {
		if (this.data.length !== 1) {
		throw new Error(
		`a Tensor with ${this.data.length} elements cannot be converted to Scalar`
		);
		}
		return this.data[0];
		}

		/**
		* Convert tensor data to a n-dimensional JS list
		* @returns {Array}
		*/
		tolist() {
		return reshape(this.data, this.dims);
		}

		/**
		* Return a new Tensor with the sigmoid function applied to each element.
		* @returns {Tensor} The tensor with the sigmoid function applied.
		*/
		sigmoid() {
		return this.clone().sigmoid_();
		}

		/**
		* Applies the sigmoid function to the tensor in place.
		* @returns {Tensor} Returns `this`.
		*/
		sigmoid_() {
		for (let i = 0; i < this.data.length; ++i) {
		this.data[i] = 1 / (1 + Math.exp(-this.data[i]));
		}
		return this;
		}

		/**
		* Return a new Tensor with every element multiplied by a constant.
		* @param {number} val The value to multiply by.
		* @returns {Tensor} The new tensor.
		*/
		mul(val) {
		return this.clone().mul_(val);
		}

		/**
		* Multiply the tensor by a constant in place.
		* @param {number} val The value to multiply by.
		* @returns {Tensor} Returns `this`.
		*/
		mul_(val) {
		for (let i = 0; i < this.data.length; ++i) {
		this.data[i] *= val;
		}
		return this;
		}

		/**
		* Return a new Tensor with every element added by a constant.
		* @param {number} val The value to add by.
		* @returns {Tensor} The new tensor.
		*/
		add(val) {
		return this.clone().add_(val);
		}

		/**
		* Add the tensor by a constant in place.
		* @param {number} val The value to add by.
		* @returns {Tensor} Returns `this`.
		*/
		add_(val) {
		for (let i = 0; i < this.data.length; ++i) {
		this.data[i] += val;
		}
		return this;
		}
		clone() {
		return new Tensor(this.type, this.data.slice(), this.dims.slice());
		}

		slice(...slices) {
		// This allows for slicing with ranges and numbers
		let newTensorDims = [];
		let newOffsets = [];

		// slices is an array of numbers or arrays of numbers
		// e.g., slices = [0, [1, 3], null, [0, 3]]
		for (let sliceIndex = 0; sliceIndex < this.dims.length; ++sliceIndex) {
		let slice = slices[sliceIndex];

		if (slice === null \|\| slice === undefined) {
		// null or undefined means take the whole dimension
		newOffsets.push([0, this.dims[sliceIndex]]);
		newTensorDims.push(this.dims[sliceIndex]);
		} else if (typeof slice === "number") {
		slice = safeIndex(slice, this.dims[sliceIndex], sliceIndex);

		// A number means take a single element
		newOffsets.push([slice, slice + 1]);
		} else if (Array.isArray(slice) && slice.length === 2) {
		// An array of length 2 means take a range of elements

		if (slice[0] > slice[1]) {
		throw new Error(`Invalid slice: ${slice}`);
		}

		let offsets = [
		Math.max(slice[0], 0),
		Math.min(slice[1], this.dims[sliceIndex]),
		];

		newOffsets.push(offsets);
		newTensorDims.push(offsets[1] - offsets[0]);
		} else {
		throw new Error(`Invalid slice: ${slice}`);
		}
		}

		let newDims = newOffsets.map(([start, end]) => end - start);
		let newBufferSize = newDims.reduce((a, b) => a * b);

		// Allocate memory
		// @ts-ignore
		let data = new this.data.constructor(newBufferSize);

		// Precompute strides
		const stride = this.stride();

		for (let i = 0; i < newBufferSize; ++i) {
		let originalIndex = 0;
		for (let j = newDims.length - 1, num = i; j >= 0; --j) {
		const size = newDims[j];
		originalIndex += ((num % size) + newOffsets[j][0]) * stride[j];
		num = Math.floor(num / size);
		}
		data[i] = this.data[originalIndex];
		}
		return new Tensor(this.type, data, newTensorDims);
		}

		/**
		* Return a permuted version of this Tensor, according to the provided dimensions.
		* @param {...number} dims Dimensions to permute.
		* @returns {Tensor} The permuted tensor.
		*/
		permute(...dims) {
		return permute(this, dims);
		}

		// TODO: implement transpose. For now (backwards compatibility), it's just an alias for permute()
		transpose(...dims) {
		return this.permute(...dims);
		}

		// TODO add .max() and .min() methods

		/**
		* Returns the sum of each row of the input tensor in the given dimension dim.
		*
		* @param {number} [dim=null] The dimension or dimensions to reduce. If `null`, all dimensions are reduced.
		* @param {boolean} keepdim Whether the output tensor has `dim` retained or not.
		* @returns The summed tensor
		*/
		sum(dim = null, keepdim = false) {
		return this.norm(1, dim, keepdim);
		}

		/**
		* Returns the matrix norm or vector norm of a given tensor.
		* @param {number\|string} [p='fro'] The order of norm
		* @param {number} [dim=null] Specifies which dimension of the tensor to calculate the norm across.
		* If dim is None, the norm will be calculated across all dimensions of input.
		* @param {boolean} [keepdim=false] Whether the output tensors have dim retained or not.
		* @returns {Tensor} The norm of the tensor.
		*/
		norm(p = "fro", dim = null, keepdim = false) {
		if (p === "fro") {
		// NOTE: Since we only support integer dims, Frobenius norm produces the same result as p=2.
		p = 2;
		} else if (typeof p === "string") {
		throw Error(`Unsupported norm: ${p}`);
		}

		if (dim === null) {
		// @ts-ignore
		let val = this.data.reduce((a, b) => a + b p, 0) (1 / p);
		return new Tensor(this.type, [val], []);
		}

		// Negative indexing
		dim = safeIndex(dim, this.dims.length);

		// Calculate the shape of the resulting array after summation
		const resultDims = this.dims.slice(); // Copy the original dimensions
		resultDims[dim] = 1; // Remove the specified axis

		// Create a new array to store the accumulated values
		// @ts-ignore
		const result = new this.data.constructor(this.data.length / this.dims[dim]);

		// Iterate over the data array
		for (let i = 0; i < this.data.length; ++i) {
		// Calculate the index in the resulting array
		let resultIndex = 0;

		for (
		let j = this.dims.length - 1, num = i, resultMultiplier = 1;
		j >= 0;
		--j
		) {
		const size = this.dims[j];
		if (j !== dim) {
		const index = num % size;
		resultIndex += index * resultMultiplier;
		resultMultiplier *= resultDims[j];
		}
		num = Math.floor(num / size);
		}

		// Accumulate the value at the current index
		result[resultIndex] += this.data[i] ** p;
		}

		if (p !== 1) {
		for (let i = 0; i < result.length; ++i) {
		result[i] = result[i] ** (1 / p);
		}
		}

		if (!keepdim) {
		resultDims.splice(dim, 1);
		}

		return new Tensor(this.type, result, resultDims);
		}

		/**
		* Performs `L_p` normalization of inputs over specified dimension. Operates in place.
		* @param {number} [p=2] The exponent value in the norm formulation
		* @param {number} [dim=1] The dimension to reduce
		* @returns {Tensor} `this` for operation chaining.
		*/
		normalize_(p = 2.0, dim = 1) {
		dim = safeIndex(dim, this.dims.length);

		const norm = this.norm(p, dim, true);

		for (let i = 0; i < this.data.length; ++i) {
		// Calculate the index in the resulting array
		let resultIndex = 0;

		for (
		let j = this.dims.length - 1, num = i, resultMultiplier = 1;
		j >= 0;
		--j
		) {
		const size = this.dims[j];
		if (j !== dim) {
		const index = num % size;
		resultIndex += index * resultMultiplier;
		resultMultiplier *= this.dims[j];
		}
		num = Math.floor(num / size);
		}

		// Divide by normalized value
		this.data[i] /= norm.data[resultIndex];
		}

		return this;
		}

		/**
		* Performs `L_p` normalization of inputs over specified dimension.
		* @param {number} [p=2] The exponent value in the norm formulation
		* @param {number} [dim=1] The dimension to reduce
		* @returns {Tensor} The normalized tensor.
		*/
		normalize(p = 2.0, dim = 1) {
		return this.clone().normalize_(p, dim);
		}

		/**
		* Compute and return the stride of this tensor.
		* Stride is the jump necessary to go from one element to the next one in the specified dimension dim.
		* @returns {number[]} The stride of this tensor.
		*/
		stride() {
		return dimsToStride(this.dims);
		}

		/**
		* Returns a tensor with all specified dimensions of input of size 1 removed.
		*
		* NOTE: The returned tensor shares the storage with the input tensor, so changing the contents of one will change the contents of the other.
		* If you would like a copy, use `tensor.clone()` before squeezing.
		*
		* @param {number} [dim=null] If given, the input will be squeezed only in the specified dimensions.
		* @returns The squeezed tensor
		*/
		squeeze(dim = null) {
		return new Tensor(this.type, this.data, calc_squeeze_dims(this.dims, dim));
		}

		/**
		* In-place version of @see {@link Tensor.squeeze}
		*/
		squeeze_(dim = null) {
		this.dims = calc_squeeze_dims(this.dims, dim);
		return this;
		}

		/**
		* Returns a new tensor with a dimension of size one inserted at the specified position.
		*
		* NOTE: The returned tensor shares the same underlying data with this tensor.
		*
		* @param {number} dim The index at which to insert the singleton dimension
		* @returns The unsqueezed tensor
		*/
		unsqueeze(dim = null) {
		return new Tensor(
		this.type,
		this.data,
		calc_unsqueeze_dims(this.dims, dim)
		);
		}

		/**
		* In-place version of @see {@link Tensor.unsqueeze}
		*/
		unsqueeze_(dim = null) {
		this.dims = calc_unsqueeze_dims(this.dims, dim);
		return this;
		}

		/**
		* In-place version of @see {@link Tensor.flatten}
		*/
		flatten_(start_dim = 0, end_dim = -1) {
		// TODO validate inputs
		end_dim = (end_dim + this.dims.length) % this.dims.length;

		let dimsToKeepBefore = this.dims.slice(0, start_dim);
		let dimsToFlatten = this.dims.slice(start_dim, end_dim + 1);
		let dimsToKeepAfter = this.dims.slice(end_dim + 1);

		this.dims = [
		...dimsToKeepBefore,
		dimsToFlatten.reduce((a, b) => a * b, 1),
		...dimsToKeepAfter,
		];
		return this;
		}

		/**
		* Flattens input by reshaping it into a one-dimensional tensor.
		* If `start_dim` or `end_dim` are passed, only dimensions starting with `start_dim`
		* and ending with `end_dim` are flattened. The order of elements in input is unchanged.
		* @param {number} start_dim the first dim to flatten
		* @param {number} end_dim the last dim to flatten
		* @returns The flattened tensor.
		*/
		flatten(start_dim = 0, end_dim = -1) {
		return this.clone().flatten_(start_dim, end_dim);
		}

		/**
		* Returns a new tensor with the same data as the `self` tensor but of a different `shape`.
		* @param {...number} dims the desired size
		* @returns {Tensor} The tensor with the same data but different shape
		*/
		view(...dims) {
		// TODO: validate dims
		let inferredIndex = -1;
		for (let i = 0; i < dims.length; ++i) {
		if (dims[i] === -1) {
		if (inferredIndex !== -1) {
		throw new Error("Only one dimension can be inferred");
		}
		inferredIndex = i;
		}
		}

		if (inferredIndex !== -1) {
		// Some dimension must be inferred
		const productOther = dims.reduce((product, curr, index) => {
		return index !== inferredIndex ? product * curr : product;
		}, 1);

		dims[inferredIndex] = this.data.length / productOther;
		}
		return new Tensor(this.type, this.data, dims); // NOTE: uses same underlying storage
		}

		neg_() {
		for (let i = 0; i < this.data.length; ++i) {
		this.data[i] = -this.data[i];
		}
		return this;
		}
		neg() {
		return this.clone().neg_();
		}

		/**
		* In-place version of @see {@link Tensor.clamp}
		*/
		clamp_(min, max) {
		for (let i = 0; i < this.data.length; ++i) {
		this.data[i] = Math.min(Math.max(this.data[i], min), max);
		}
		return this;
		}

		/**
		* Clamps all elements in input into the range [ min, max ]
		* @param {number} min lower-bound of the range to be clamped to
		* @param {number} max upper-bound of the range to be clamped to
		* @returns the output tensor.
		*/
		clamp(min, max) {
		return this.clone().clamp_(min, max);
		}

		/**
		* In-place version of @see {@link Tensor.round}
		*/
		round_() {
		for (let i = 0; i < this.data.length; ++i) {
		this.data[i] = Math.round(this.data[i]);
		}
		return this;
		}

		/**
		* Rounds elements of input to the nearest integer.
		* @returns the output tensor.
		*/
		round() {
		return this.clone().round_();
		}

		/**
		* Performs Tensor dtype conversion.
		* @param {DataType} type The desired data type.
		* @returns {Tensor} The converted tensor.
		*/
		to(type) {
		// If the self Tensor already has the correct dtype, then self is returned.
		if (this.type === type) return this;

		// Otherwise, the returned tensor is a copy of self with the desired dtype.
		if (!DataTypeMap.hasOwnProperty(type)) {
		throw new Error(`Unsupported type: ${type}`);
		}
		// @ts-ignore
		return new Tensor(type, DataTypeMap[type].from(this.data), this.dims);
		}
		}

		/**
		* This creates a nested array of a given type and depth (see examples).
		*
		* @example
		* NestArray<string, 1>; // string[]
		* @example
		* NestArray<number, 2>; // number[][]
		* @example
		* NestArray<string, 3>; // string[][][] etc.
		* @template T
		* @template {number} Depth
		* @template {never[]} [Acc=[]]
		* @typedef {Acc['length'] extends Depth ? T : NestArray<T[], Depth, [...Acc, never]>} NestArray
		*/

		/**
		* Reshapes a 1-dimensional array into an n-dimensional array, according to the provided dimensions.
		*
		* @example
		* reshape([10 ], [1 ]); // Type: number[] Value: [10]
		* reshape([1, 2, 3, 4 ], [2, 2 ]); // Type: number[][] Value: [[1, 2], [3, 4]]
		* reshape([1, 2, 3, 4, 5, 6, 7, 8], [2, 2, 2]); // Type: number[][][] Value: [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
		* reshape([1, 2, 3, 4, 5, 6, 7, 8], [4, 2 ]); // Type: number[][] Value: [[1, 2], [3, 4], [5, 6], [7, 8]]
		* @param {T[]\|DataArray} data The input array to reshape.
		* @param {DIM} dimensions The target shape/dimensions.
		* @template T
		* @template {[number]\|number[]} DIM
		* @returns {NestArray<T, DIM["length"]>} The reshaped array.
		*/
		function reshape(data, dimensions) {
		const totalElements = data.length;
		const dimensionSize = dimensions.reduce((a, b) => a * b);

		if (totalElements !== dimensionSize) {
		throw Error(
		`cannot reshape array of size ${totalElements} into shape (${dimensions})`
		);
		}

		/** @type {any} */
		let reshapedArray = data;

		for (let i = dimensions.length - 1; i >= 0; i--) {
		reshapedArray = reshapedArray.reduce(
		(acc, val) => {
		let lastArray = acc[acc.length - 1];

		if (lastArray.length < dimensions[i]) {
		lastArray.push(val);
		} else {
		acc.push([val]);
		}

		return acc;
		},
		[[]]
		);
		}

		return reshapedArray[0];
		}

		/**
		* Permutes a tensor according to the provided axes.
		* @param {any} tensor The input tensor to permute.
		* @param {Array} axes The axes to permute the tensor along.
		* @returns {Tensor} The permuted tensor.
		*/
		export function permute(tensor, axes) {
		const [permutedData, shape] = permute_data(tensor.data, tensor.dims, axes);
		return new Tensor(tensor.type, permutedData, shape);
		}

		/**
		* Interpolates an Tensor to the given size.
		* @param {Tensor} input The input tensor to interpolate. Data must be channel-first (i.e., [c, h, w])
		* @param {number[]} size The output size of the image
		* @param {string} mode The interpolation mode
		* @param {boolean} align_corners Whether to align corners.
		* @returns {Tensor} The interpolated tensor.
		*/
		export function interpolate(
		input,
		[out_height, out_width],
		mode = "bilinear",
		align_corners = false
		) {
		// Input image dimensions
		const in_channels = input.dims.at(-3) ?? 1;
		const in_height = input.dims.at(-2);
		const in_width = input.dims.at(-1);

		let output = interpolate_data(
		/** @type {import('./maths.js').TypedArray}*/ (input.data),
		[in_channels, in_height, in_width],
		[out_height, out_width],
		mode,
		align_corners
		);
		return new Tensor(input.type, output, [in_channels, out_height, out_width]);
		}

		/**
		* Perform mean pooling of the last hidden state followed by a normalization step.
		* @param {Tensor} last_hidden_state Tensor of shape [batchSize, seqLength, embedDim]
		* @param {Tensor} attention_mask Tensor of shape [batchSize, seqLength]
		* @returns {Tensor} Returns a new Tensor of shape [batchSize, embedDim].
		*/
		export function mean_pooling(last_hidden_state, attention_mask) {
		// last_hidden_state: [batchSize, seqLength, embedDim]
		// attention_mask: [batchSize, seqLength]

		let shape = [last_hidden_state.dims[0], last_hidden_state.dims[2]];
		// @ts-ignore
		let returnedData = new last_hidden_state.data.constructor(
		shape[0] * shape[1]
		);
		let [batchSize, seqLength, embedDim] = last_hidden_state.dims;

		let outIndex = 0;
		for (let i = 0; i < batchSize; ++i) {
		let offset = i * embedDim * seqLength;

		for (let k = 0; k < embedDim; ++k) {
		let sum = 0;
		let count = 0;

		let attnMaskOffset = i * seqLength;
		let offset2 = offset + k;
		// Pool over all words in sequence
		for (let j = 0; j < seqLength; ++j) {
		// index into attention mask
		let attn = Number(attention_mask.data[attnMaskOffset + j]);

		count += attn;
		sum += last_hidden_state.data[offset2 + j * embedDim] * attn;
		}

		let avg = sum / count;
		returnedData[outIndex++] = avg;
		}
		}

		return new Tensor(last_hidden_state.type, returnedData, shape);
		}

		/**
		* Apply Layer Normalization for last certain number of dimensions.
		* @param {Tensor} input The input tensor
		* @param {number[]} normalized_shape input shape from an expected input of size
		* @param {Object} options The options for the layer normalization
		* @param {number} [options.eps=1e-5] A value added to the denominator for numerical stability.
		* @returns {Tensor} The normalized tensor.
		*/
		export function layer_norm(input, normalized_shape, { eps = 1e-5 } = {}) {
		if (input.dims.length !== 2) {
		throw new Error("`layer_norm` currently only supports 2D input.");
		}

		const [batchSize, featureDim] = input.dims;

		if (normalized_shape.length !== 1 && normalized_shape[0] !== featureDim) {
		throw new Error(
		"`normalized_shape` must be a 1D array with shape `[input.dims[1]]`."
		);
		}

		const [std, mean] = std_mean(input, 1, 0, true);

		// @ts-ignore
		const returnedData = new input.data.constructor(input.data.length);

		for (let i = 0; i < batchSize; ++i) {
		const offset = i * featureDim;
		for (let j = 0; j < featureDim; ++j) {
		const offset2 = offset + j;
		returnedData[offset2] =
		(input.data[offset2] - mean.data[i]) / (std.data[i] + eps);
		}
		}
		return new Tensor(input.type, returnedData, input.dims);
		}

		/**
		* Helper function to calculate new dimensions when performing a squeeze operation.
		* @param {number[]} dims The dimensions of the tensor.
		* @param {number\|number[]\|null} dim The dimension(s) to squeeze.
		* @returns The new dimensions.
		* @private
		*/
		function calc_squeeze_dims(dims, dim) {
		dims = dims.slice();
		if (dim === null) {
		dims = dims.filter((d) => d !== 1);
		} else if (typeof dim === "number") {
		if (dims[dim] === 1) {
		dims.splice(dim, 1);
		}
		} else if (Array.isArray(dim)) {
		dims = dims.filter((x, i) => {
		return x !== 1 \|\| !dim.includes(i);
		});
		}
		return dims;
		}

		/**
		* Helper function to calculate new dimensions when performing an unsqueeze operation.
		* @param {number[]} dims The dimensions of the tensor.
		* @param {number} dim The dimension to unsqueeze.
		* @returns The new dimensions.
		* @private
		*/
		function calc_unsqueeze_dims(dims, dim) {
		// Dimension out of range (e.g., "expected to be in range of [-4, 3], but got 4")
		// + 1 since we allow inserting at the end (i.e. dim = -1)
		dim = safeIndex(dim, dims.length + 1);
		dims = dims.slice();
		// Insert 1 into specified dimension
		dims.splice(dim, 0, 1);
		return dims;
		}

		/**
		* Safely calculate the index for an array of a given size, allowing negative indexing.
		* @param {number} index The index that will be used.
		* @param {number} size The size of the array.
		* @param {number} [dimension=null] The dimension that the index is for (optional).
		* @returns {number} The index, guaranteed to be non-negative and less than `arrayLength`.
		*
		* @throws {Error} If the index is out of range.
		* @private
		*/
		function safeIndex(index, size, dimension = null) {
		if (index < -size \|\| index >= size) {
		throw new Error(
		`IndexError: index ${index} is out of bounds for dimension${
		dimension === null ? "" : " " + dimension
		} with size ${size}`
		);
		}

		if (index < 0) {
		// Negative indexing, ensuring positive index
		index = ((index % size) + size) % size;
		}
		return index;
		}

		/**
		* Concatenates an array of tensors along a specified dimension.
		* @param {Tensor[]} tensors The array of tensors to concatenate.
		* @param {number} dim The dimension to concatenate along.
		* @returns {Tensor} The concatenated tensor.
		*/
		export function cat(tensors, dim = 0) {
		dim = safeIndex(dim, tensors[0].dims.length);

		// TODO do validation of shapes

		const resultDims = tensors[0].dims.slice();
		resultDims[dim] = tensors.reduce((a, b) => a + b.dims[dim], 0);

		// Create a new array to store the accumulated values
		const resultSize = resultDims.reduce((a, b) => a * b, 1);
		// @ts-ignore
		const result = new tensors[0].data.constructor(resultSize);

		// Create output tensor of same type as first
		const resultType = tensors[0].type;

		if (dim === 0) {
		// Handle special case for performance reasons

		let offset = 0;
		for (let t of tensors) {
		result.set(t.data, offset);
		offset += t.data.length;
		}
		} else {
		let currentDim = 0;

		for (let t = 0; t < tensors.length; ++t) {
		let tensor = tensors[t];

		// Iterate over the data array
		for (let i = 0; i < tensor.data.length; ++i) {
		// Calculate the index in the resulting array
		let resultIndex = 0;

		for (
		let j = tensor.dims.length - 1, num = i, resultMultiplier = 1;
		j >= 0;
		--j
		) {
		const size = tensor.dims[j];
		let index = num % size;
		if (j === dim) {
		index += currentDim;
		}
		resultIndex += index * resultMultiplier;
		resultMultiplier *= resultDims[j];
		num = Math.floor(num / size);
		}
		// Accumulate the value at the current index
		result[resultIndex] = tensor.data[i];
		}

		currentDim += tensor.dims[dim];
		}
		}
		return new Tensor(resultType, result, resultDims);
		}

		/**
		* Stack an array of tensors along a specified dimension.
		* @param {Tensor[]} tensors The array of tensors to stack.
		* @param {number} dim The dimension to stack along.
		* @returns {Tensor} The stacked tensor.
		*/
		export function stack(tensors, dim = 0) {
		// TODO do validation of shapes
		// NOTE: stack expects each tensor to be equal size
		return cat(
		tensors.map((t) => t.unsqueeze(dim)),
		dim
		);
		}

		/**
		* Calculates the standard deviation and mean over the dimensions specified by dim. dim can be a single dimension or `null` to reduce over all dimensions.
		* @param {Tensor} input the input tenso
		* @param {number\|null} dim the dimension to reduce. If None, all dimensions are reduced.
		* @param {number} correction difference between the sample size and sample degrees of freedom. Defaults to Bessel's correction, correction=1.
		* @param {boolean} keepdim whether the output tensor has dim retained or not.
		* @returns {Tensor[]} A tuple of (std, mean) tensors.
		*/
		export function std_mean(input, dim = null, correction = 1, keepdim = false) {
		if (dim === null) {
		// None to reduce over all dimensions.
		// @ts-ignore
		const sum = input.data.reduce((a, b) => a + b, 0);
		const mean = sum / input.data.length;
		// @ts-ignore
		const std = Math.sqrt(
		input.data.reduce((a, b) => a + (b - mean) ** 2, 0) /
		(input.data.length - correction)
		);

		const meanTensor = new Tensor(
		input.type,
		[mean],
		[
		/* scalar */
		]
		);
		const stdTensor = new Tensor(
		input.type,
		[std],
		[
		/* scalar */
		]
		);

		return [stdTensor, meanTensor];
		}

		// Negative indexing
		dim = safeIndex(dim, input.dims.length);

		const meanTensor = mean(input, dim, keepdim);

		// Calculate the shape of the resulting array after summation
		const resultDims = input.dims.slice(); // Copy the original dimensions
		resultDims[dim] = 1; // Remove the specified axis

		// Create a new array to store the accumulated values
		// @ts-ignore
		const result = new input.data.constructor(
		input.data.length / input.dims[dim]
		);

		// Iterate over the data array
		for (let i = 0; i < input.data.length; ++i) {
		// Calculate the index in the resulting array
		let resultIndex = 0;

		for (
		let j = input.dims.length - 1, num = i, resultMultiplier = 1;
		j >= 0;
		--j
		) {
		const size = input.dims[j];
		if (j !== dim) {
		const index = num % size;
		resultIndex += index * resultMultiplier;
		resultMultiplier *= resultDims[j];
		}
		num = Math.floor(num / size);
		}

		// Accumulate the value at the current index
		result[resultIndex] += (input.data[i] - meanTensor.data[resultIndex]) ** 2;
		}

		for (let i = 0; i < result.length; ++i) {
		result[i] = Math.sqrt(result[i] / (input.dims[dim] - correction));
		}

		if (!keepdim) {
		resultDims.splice(dim, 1);
		}

		const stdTensor = new Tensor(input.type, result, resultDims);

		return [stdTensor, meanTensor];
		}

		/**
		* Returns the mean value of each row of the input tensor in the given dimension dim.
		* @param {Tensor} input the input tensor.
		* @param {number\|null} dim the dimension to reduce.
		* @param {boolean} keepdim whether the output tensor has dim retained or not.
		* @returns A new tensor with means taken along the specified dimension.
		*/
		export function mean(input, dim = null, keepdim = false) {
		if (dim === null) {
		// None to reduce over all dimensions.
		// @ts-ignore
		let val = input.data.reduce((a, b) => a + b, 0);
		return new Tensor(
		input.type,
		[val / input.data.length],
		[
		/* scalar */
		]
		);
		}

		// Negative indexing
		dim = safeIndex(dim, input.dims.length);

		// Calculate the shape of the resulting array after summation
		const resultDims = input.dims.slice(); // Copy the original dimensions
		resultDims[dim] = 1; // Remove the specified axis

		// Create a new array to store the accumulated values
		// @ts-ignore
		const result = new input.data.constructor(
		input.data.length / input.dims[dim]
		);

		// Iterate over the data array
		for (let i = 0; i < input.data.length; ++i) {
		// Calculate the index in the resulting array
		let resultIndex = 0;

		for (
		let j = input.dims.length - 1, num = i, resultMultiplier = 1;
		j >= 0;
		--j
		) {
		const size = input.dims[j];
		if (j !== dim) {
		const index = num % size;
		resultIndex += index * resultMultiplier;
		resultMultiplier *= resultDims[j];
		}
		num = Math.floor(num / size);
		}

		// Accumulate the value at the current index
		result[resultIndex] += input.data[i];
		}

		if (input.dims[dim] !== 1) {
		for (let i = 0; i < result.length; ++i) {
		result[i] = result[i] / input.dims[dim];
		}
		}

		if (!keepdim) {
		resultDims.splice(dim, 1);
		}

		return new Tensor(input.type, result, resultDims);
		}

		/**
		*
		* Measures similarity between two temporal sequences (e.g., input audio and output tokens
		* to generate token-level timestamps).
		* @param {Tensor} matrix
		* @returns {number[][]}
		*/
		export function dynamicTimeWarping(matrix) {
		const [output_length, input_length] = matrix.dims;

		const outputShape = [output_length + 1, input_length + 1];

		const cost = new Tensor(
		"float32",
		new Float32Array(outputShape[0] * outputShape[1]).fill(Infinity),
		outputShape
		);

		const trace = new Tensor(
		"float32",
		new Float32Array(outputShape[0] * outputShape[1]).fill(-1),
		outputShape
		);

		// same as `cost[0][0] = 0`;
		cost[0].data[0] = 0;

		for (let j = 1; j < input_length + 1; ++j) {
		for (let i = 1; i < output_length + 1; ++i) {
		const c0 = cost[i - 1][j - 1].item();
		const c1 = cost[i - 1][j].item();
		const c2 = cost[i][j - 1].item();

		let c, t;
		if (c0 < c1 && c0 < c2) {
		c = c0;
		t = 0;
		} else if (c1 < c0 && c1 < c2) {
		c = c1;
		t = 1;
		} else {
		c = c2;
		t = 2;
		}

		cost[i].data[j] = matrix[i - 1][j - 1].item() + c;
		trace[i].data[j] = t;
		}
		}

		// backtrace
		let i = output_length;
		let j = input_length;

		// @ts-ignore
		trace.data.fill(2, 0, outputShape[1]); // trace[0, :] = 2
		for (let i = 0; i < outputShape[0]; ++i) {
		// trace[:, 0] = 1
		trace[i].data[0] = 1;
		}

		let text_indices = [];
		let time_indices = [];

		while (i > 0 \|\| j > 0) {
		text_indices.push(i - 1);
		time_indices.push(j - 1);

		const t = trace[i][j].item();
		switch (t) {
		case 0:
		--i;
		--j;
		break;
		case 1:
		--i;
		break;
		case 2:
		--j;
		break;
		default:
		throw new Error(
		`Internal error in dynamic time warping. Unexpected trace[${i}, ${j}]. Please file a bug report.`
		);
		}
		}

		text_indices.reverse();
		time_indices.reverse();

		return [text_indices, time_indices];
		}

		function dimsToStride(dims) {
		const stride = new Array(dims.length);
		for (let i = dims.length - 1, s2 = 1; i >= 0; --i) {
		stride[i] = s2;
		s2 *= dims[i];
		}
		return stride;
		}

		/**
		* Returns a tensor filled with the scalar value 1, with the shape defined by the variable argument size.
		* @param {number[]} size A sequence of integers defining the shape of the output tensor.
		*/
		export function ones(size) {
		const numElements = size.reduce((a, b) => a * b, 1);
		return new Tensor("int64", new BigInt64Array(numElements).fill(1n), size);
		}

		/**
		* Returns a tensor filled with the scalar value 1, with the same size as input.
		* @param {Tensor} tensor The size of input will determine size of the output tensor.
		* @returns The ones tensor.
		*/
		export function ones_like(tensor) {
		return ones(tensor.dims);
		}

		/**
		* Quantizes the embeddings tensor to binary or unsigned binary precision.
		* @param {Tensor} tensor The tensor to quantize.
		* @param {'binary'\|'ubinary'} precision The precision to use for quantization.
		* @returns {Tensor} The quantized tensor.
		*/
		export function quantize_embeddings(tensor, precision) {
		if (tensor.dims.length !== 2) {
		throw new Error("The tensor must have 2 dimensions");
		}
		if (tensor.dims.at(-1) % 8 !== 0) {
		throw new Error("The last dimension of the tensor must be a multiple of 8");
		}
		if (!["binary", "ubinary"].includes(precision)) {
		throw new Error("The precision must be either 'binary' or 'ubinary'");
		}

		const signed = precision === "binary";
		const dtype = signed ? "int8" : "uint8";

		// Create a typed array to store the packed bits
		const cls = signed ? Int8Array : Uint8Array;
		const inputData = tensor.data;
		const outputData = new cls(inputData.length / 8);

		// Iterate over each number in the array
		for (let i = 0; i < inputData.length; ++i) {
		// Determine if the number is greater than 0
		const bit = inputData[i] > 0 ? 1 : 0;

		// Calculate the index in the typed array and the position within the byte
		const arrayIndex = Math.floor(i / 8);
		const bitPosition = i % 8;

		// Pack the bit into the typed array
		outputData[arrayIndex] \|= bit << (7 - bitPosition);
		if (signed && bitPosition === 0) {
		outputData[arrayIndex] -= 128;
		}
		}

		return new Tensor(dtype, outputData, [tensor.dims[0], tensor.dims[1] / 8]);
		}

+181

src/types.ts

		import type {
		AutoTokenizer,
		PreTrainedTokenizer,
		} from "./tokenizers/tokenizers";

		export type TokenizerMapping = typeof AutoTokenizer.TOKENIZER_CLASS_MAPPING;
		export type SupportedTokenizerClasses = keyof TokenizerMapping;
		export type TokenizerClassNameMapping<T extends string> =
		T extends SupportedTokenizerClasses
		? InstanceType<TokenizerMapping[T]>
		: PreTrainedTokenizer;

		export type TokenizerConfigMapping<
		Config extends {
		tokenizer_class: string;
		}
		> = Config["tokenizer_class"] extends SupportedTokenizerClasses
		? TokenizerMapping[Config["tokenizer_class"]]
		: PreTrainedTokenizer;

		type ValueOf<T> = T[keyof T];

		export type FromPreTrainedFn<
		M extends InstanceType<ValueOf<TokenizerMapping>>
		> = (params?: {
		// TODO: types
		tokenizerJSON?: Partial<NSTokenizerJSON.Root>;
		tokenizerConfig?: Partial<NSTokenizerConfig.Root>;
		}) => M;

		export namespace NSTokenizerConfig {
		// TODO full types
		export type Root = {
		add_prefix_space?: any;
		bos_token?: any;
		clean_up_tokenization_spaces: boolean;
		eos_token: any;
		model_max_length: number;
		tokenizer_class: string;
		unk_token: any;
		chat_template?: any;
		add_bos_token?: boolean;
		add_eos_token?: boolean;
		added_tokens_decoder?: { [key: string]: AddedTokensDecoder };
		legacy?: boolean \| null;
		merges_file?: null;
		pad_token?: any;
		sp_model_kwargs?: any;
		spaces_between_special_tokens?: boolean;
		use_default_system_prompt?: boolean;
		vocab_file?: null;
		auto_map?: any;
		do_lower_case?: boolean;
		padding_side?: string;
		remove_space?: boolean;
		additional_special_tokens?: string[];
		errors?: string;
		split_special_tokens?: boolean;
		};

		export type AddedTokensDecoder = {
		content: string;
		lstrip: boolean;
		normalized: boolean;
		rstrip: boolean;
		single_word: boolean;
		special: boolean;
		};

		export type AutoMap = {
		AutoTokenizer: Array<null \| string>;
		};

		export type ChatTemplateElement = {
		name: string;
		template: string;
		};

		export type SPModelKwargs = {};
		}

		export namespace NSTokenizerJSON {
		// TODO full types
		export type Root = {
		version: string;
		truncation: null;
		padding: null;
		added_tokens: any[];
		normalizer: any;
		pre_tokenizer: any;
		post_processor: any;
		decoder: any;
		model: any;
		};

		export type AddedToken = {
		id: number;
		content: string;
		single_word: boolean;
		lstrip: boolean;
		rstrip: boolean;
		normalized: boolean;
		special: boolean;
		};

		export type PretokenizerElement = {
		type: string;
		decoders?: DecoderDecoder[];
		add_prefix_space?: boolean;
		trim_offsets?: boolean;
		use_regex?: boolean;
		individual_digits?: boolean;
		};

		export type DecoderDecoder = {
		type: string;
		pattern?: Pattern;
		content?: string;
		start?: number;
		stop?: number;
		};

		export type Pattern = {
		String: string;
		};

		export type Model = {
		type: string;
		dropout: null;
		unk_token: any;
		continuing_subword_prefix: null;
		end_of_word_suffix: null;
		fuse_unk: boolean;
		byte_fallback: boolean;
		vocab: { [key: string]: number };
		merges: string[];
		};

		export type TopLevelNormalizer = {
		type: string;
		normalizers?: NormalizerElement[];
		};

		export type NormalizerElement = {
		type: string;
		prepend?: string;
		pattern?: Pattern;
		content?: string;
		};

		export type PostProcessor = {
		type: string;
		single: Pair[];
		pair: Pair[];
		special_tokens: { [key: string]: SpecialToken };
		add_prefix_space?: boolean;
		trim_offsets?: boolean;
		use_regex?: boolean;
		};

		export type Pair = {
		SpecialToken?: Sequence;
		Sequence?: Sequence;
		};

		export type Sequence = {
		id: string;
		type_id: number;
		};

		export type SpecialToken = {
		id: string;
		ids: number[];
		tokens: string[];
		};

		export type PreTokenizer = {
		type: string;
		pretokenizers?: PretokenizerElement[];
		};
		}

+3

-2

package.json

		{
		"name": "@lenml/tokenizers",
		"version": "1.0.6",
		"version": "1.0.7",
		"description": "a lightweight no-dependency fork of transformers.js (only tokenizers)",
		@@ -21,3 +21,4 @@ "source": "src/main.ts",
		"dist/*.mjs",
		"dist/**.ts"
		"dist/**.ts",
		"src/*/"
		],
		@@ -24,0 +25,0 @@ "keywords": [

@lenml/tokenizers - npm Package Compare versions

New alerts

Improved metrics

Worsened metrics