Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

gpt-tokenizer

Package Overview
Dependencies
Maintainers
1
Versions
24
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

gpt-tokenizer - npm Package Compare versions

Comparing version 2.2.3 to 2.3.0

cjs/encodingParams/Cl100KBase.d.ts

52

cjs/BytePairEncodingCore.d.ts

@@ -1,25 +0,31 @@

import { EncoderMap } from './EncoderMap.js';
export declare class BytePairEncodingCore {
encoder: EncoderMap;
decoder: Map<number, Uint8Array>;
export type RawBytePairRanks = readonly (string | readonly number[])[];
export interface BytePairEncodingConfig {
mergeableBytePairRanks: RawBytePairRanks;
specialTokenMapping?: Map<string, number>;
tokenSplitRegex: RegExp;
specialTokensEncoder: Map<string, number>;
specialTokensDecoder: Map<number, Uint8Array>;
specialTokenPatternRegex: RegExp;
textEncoder: TextEncoder;
constructor({ bytePairEncoder, specialTokenEncoder, tokenSplitRegex, }: {
bytePairEncoder: EncoderMap;
specialTokenEncoder?: Map<string, number>;
tokenSplitRegex: RegExp;
});
encodeNative(text: string, allowedSpecial: Set<string>): Generator<number[], number, undefined>;
findNextSpecialStartIndex(text: string, allowedSpecial: Set<string>, startIndex: number, specialRegex: RegExp): number | undefined;
decodeNative(tokens: Iterable<number>): Generator<Uint8Array>;
decodeNativeAsync(tokens: AsyncIterable<number>): AsyncGenerator<Uint8Array>;
tryDecodeToken(token: number): Uint8Array | undefined;
bytePairEncode(inputBytes: Uint8Array, bytePairRanks: EncoderMap): number[];
bytePairMerge(piece: Uint8Array, bytePairRanks: EncoderMap, transform: (pair: {
start: number;
end: number;
}) => number): number[];
}
export declare class BytePairEncodingCore {
readonly bytePairEncoderSize: number;
private bytePairEncoder;
private bytePairEncoderSortedLookup;
private bytePairRanksDecoder;
private tokenSplitRegex;
private specialTokensEncoder;
private specialTokensDecoder;
private specialTokenPatternRegex;
private stringDecoder;
private textEncoder;
constructor({ mergeableBytePairRanks: bytePairEncoder, specialTokenMapping: specialTokenEncoder, tokenSplitRegex, }: BytePairEncodingConfig);
getBpeRankFromString(key: string): number | undefined;
getBpeRankFromStringOrThrow(key: string): number;
getBpeRankFromBytes(key: Uint8Array): number | undefined;
getBpeRankFromBytesOrThrow(key: Uint8Array): number;
binarySearch(key: Uint8Array): number;
encodeNative(text: string, allowedSpecial?: Set<string>): Generator<number[], number, undefined>;
findNextSpecialStartIndex(text: string, allowedSpecial: Set<string> | undefined, startIndex: number, specialRegex: RegExp): number | undefined;
decodeNative(tokens: Iterable<number>): Generator<Uint8Array | string, void, void>;
decodeNativeAsync(tokens: AsyncIterable<number>): AsyncGenerator<Uint8Array | string>;
tryDecodeToken(tokenRank: number): Uint8Array | string | undefined;
bytePairEncode(input: string): number[];
bytePairMerge(piece: Uint8Array, getByteForRange: (start: number, end: number) => number): number[];
}
"use strict";
/* eslint-disable no-continue */
Object.defineProperty(exports, "__esModule", { value: true });
exports.BytePairEncodingCore = void 0;
const EncoderMap_js_1 = require("./EncoderMap.js");
const escapeRegExp_js_1 = require("./escapeRegExp.js");
const utfUtil_js_1 = require("./utfUtil.js");
const util_js_1 = require("./util.js");
class BytePairEncodingCore {
encoder;
decoder;
bytePairEncoderSize;
bytePairEncoder;
bytePairEncoderSortedLookup;
bytePairRanksDecoder = new Map();
tokenSplitRegex;

@@ -13,17 +16,27 @@ specialTokensEncoder;

specialTokenPatternRegex;
stringDecoder;
textEncoder = new TextEncoder();
constructor({ bytePairEncoder, specialTokenEncoder, tokenSplitRegex, }) {
this.encoder = bytePairEncoder ?? new EncoderMap_js_1.EncoderMap();
this.decoder = bytePairEncoder
? new Map([...bytePairEncoder].map(([key, value]) => [value, key]))
: new Map();
constructor({ mergeableBytePairRanks: bytePairEncoder, specialTokenMapping: specialTokenEncoder, tokenSplitRegex, }) {
this.bytePairEncoder = bytePairEncoder;
this.stringDecoder = new Map();
// size without array holes (which may be present in the encoder)
this.bytePairEncoderSize = Object.keys(bytePairEncoder).length;
const binaryLookup = [];
// forEach skips array holes:
bytePairEncoder.forEach((value, rank) => {
if (typeof value === 'string') {
this.stringDecoder.set(value, rank);
return;
}
const byteArray = new Uint8Array(value);
binaryLookup.push([byteArray, rank]);
this.bytePairRanksDecoder.set(rank, byteArray);
});
this.bytePairEncoderSortedLookup = binaryLookup.sort((a, b) => (0, utfUtil_js_1.compareUint8Arrays)(a[0], b[0]));
this.specialTokensEncoder = specialTokenEncoder ?? new Map();
this.specialTokensDecoder = specialTokenEncoder
? new Map([...specialTokenEncoder].map(([key, value]) => [
value,
this.textEncoder.encode(key),
]))
? new Map([...specialTokenEncoder].map(([key, value]) => [value, key]))
: new Map();
this.tokenSplitRegex = tokenSplitRegex;
const parts = [...this.specialTokensEncoder.keys()].map(escapeRegExp_js_1.escapeRegExp);
const parts = [...this.specialTokensEncoder.keys()].map(util_js_1.escapeRegExp);
const joinedParts = parts.join('|');

@@ -37,2 +50,60 @@ try {

}
getBpeRankFromString(key) {
return this.stringDecoder.get(key);
}
getBpeRankFromStringOrThrow(key) {
const value = this.getBpeRankFromString(key);
if (value === undefined) {
throw new Error(`The byte-pair encoding does not contain a value for: ${key}`);
}
return value;
}
getBpeRankFromBytes(key) {
const keyAsString = (0, utfUtil_js_1.tryConvertToString)(key);
if (keyAsString !== undefined) {
return this.getBpeRankFromString(keyAsString);
}
// Perform binary search on the binary keys
const index = this.binarySearch(key);
if (index !== -1) {
return this.bytePairEncoderSortedLookup[index][1];
}
return undefined;
}
getBpeRankFromBytesOrThrow(key) {
const value = this.getBpeRankFromBytes(key);
if (value === undefined) {
throw new Error(`The byte-pair encoding does not contain a value for: ${key.toString()}`);
}
return value;
}
// Binary search on the binary keys
binarySearch(key) {
let low = 0;
let high = this.bytePairEncoderSortedLookup.length - 1;
while (low <= high) {
// eslint-disable-next-line no-bitwise
const mid = (low + high) >>> 1;
const midKey = this.bytePairEncoderSortedLookup[mid][0];
let cmp = 0;
for (let i = 0; i < Math.min(midKey.length, key.length); i++) {
cmp = midKey[i] - key[i];
if (cmp !== 0)
break;
}
if (cmp === 0) {
cmp = midKey.length - key.length;
}
if (cmp === 0) {
return mid;
}
if (cmp < 0) {
low = mid + 1;
}
else {
high = mid - 1;
}
}
return -1;
}
*encodeNative(text, allowedSpecial) {

@@ -48,11 +119,9 @@ let startIndex = 0;

for (const [match] of textSegment.matchAll(this.tokenSplitRegex)) {
const encodedPiece = this.textEncoder.encode(match);
const token = this.encoder.get(encodedPiece);
const token = this.getBpeRankFromString(match);
if (token !== undefined) {
lastTokenLength = 1;
yield [token];
// eslint-disable-next-line no-continue
continue;
}
const tokens = this.bytePairEncode(encodedPiece, this.encoder);
const tokens = this.bytePairEncode(match);
lastTokenLength = tokens.length;

@@ -86,3 +155,3 @@ yield tokens;

const [specialToken] = nextSpecialMatch;
if (allowedSpecial.has(specialToken)) {
if (allowedSpecial?.has(specialToken)) {
return nextSpecialMatch.index + searchIndex;

@@ -103,41 +172,69 @@ }

for await (const token of tokens) {
const tokenBytes = this.tryDecodeToken(token);
if (tokenBytes) {
yield tokenBytes;
const tokenBytesOrString = this.tryDecodeToken(token);
if (tokenBytesOrString) {
yield tokenBytesOrString;
}
}
}
tryDecodeToken(token) {
return this.decoder.get(token) ?? this.specialTokensDecoder.get(token);
tryDecodeToken(tokenRank) {
const value = this.bytePairEncoder[tokenRank];
if (typeof value === 'string') {
return value;
}
if (typeof value === 'object') {
const fromBinary = this.bytePairRanksDecoder.get(tokenRank);
if (fromBinary) {
return fromBinary;
}
}
return this.specialTokensDecoder.get(tokenRank);
}
bytePairEncode(inputBytes, bytePairRanks) {
if (inputBytes.length === 1) {
return [bytePairRanks.getOrThrow(inputBytes)];
bytePairEncode(input) {
if (input.length === 1 && (0, utfUtil_js_1.isAscii)(input.codePointAt(0))) {
return [this.getBpeRankFromStringOrThrow(input)];
}
return this.bytePairMerge(inputBytes, bytePairRanks, (pair) => {
const key = inputBytes.slice(pair.start, pair.end);
return bytePairRanks.getOrThrow(key);
const inputBytes = this.textEncoder.encode(input);
return this.bytePairMerge(inputBytes, (start, end) => {
const key = inputBytes.subarray(start, end);
return this.getBpeRankFromBytesOrThrow(key);
});
}
bytePairMerge(piece, bytePairRanks, transform) {
bytePairMerge(
// Input array of bytes to process
piece,
// Function to apply to each final segment after merging
getByteForRange) {
// Create an array of partition objects. Each partition tracks the start index in 'piece'
// and a rank value for adjacent pairs (initially set to positive infinity).
const partitions = Array.from({ length: piece.length + 1 }, (_, i) => ({
start: i,
rank: Number.POSITIVE_INFINITY,
rank: Number.POSITIVE_INFINITY, // Rank starts at infinity (unmerged)
}));
// Helper function to get the rank of a byte pair starting at 'startIndex'.
// 'skip' determines how far we look ahead (usually 0, for consecutive pairs).
const getRank = (startIndex, skip) => {
if (startIndex + skip + 2 >= partitions.length) {
// Avoid out-of-bounds errors, return undefined when no valid pair exists
return undefined;
}
const key = piece.slice(partitions[startIndex].start, partitions[startIndex + skip + 2].start);
return bytePairRanks.get(key);
// Get the byte pair by extracting a subarray starting at 'startIndex' and ending at
// the start of the partition after 'skip + 2'.
const key = piece.subarray(partitions[startIndex].start, partitions[startIndex + skip + 2].start);
// Retrieve the rank of this byte pair from the BPE rank function
return this.getBpeRankFromBytes(key);
};
// Initialize the ranks for all adjacent pairs in the array
for (let i = 0; i < partitions.length - 2; i++) {
// Get the rank for the pair starting at index 'i'
const rank = getRank(i, 0);
if (rank !== undefined) {
// Assign the rank to the partition at index 'i'
partitions[i].rank = rank;
}
}
// Iteratively merge byte pairs until no more useful merges can be done
while (partitions.length > 1) {
let minRank = Number.POSITIVE_INFINITY;
let minRankIdx = 0;
// Find the partition with the minimum rank, i.e., the most important pair to merge next
let i = 0;

@@ -151,7 +248,10 @@ for (const partition of partitions) {

}
// If no valid pair is left to merge, exit the loop
if (minRank === Number.POSITIVE_INFINITY) {
break;
}
// Update the rank of the partition after the merged one
partitions[minRankIdx].rank =
getRank(minRankIdx, 1) ?? Number.POSITIVE_INFINITY;
// Update the rank of the partition before the merged one (if exists)
if (minRankIdx > 0) {

@@ -161,10 +261,13 @@ partitions[minRankIdx - 1].rank =

}
// Merge by removing the partition after the one we just merged
partitions.splice(minRankIdx + 1, 1);
}
// Create the final output by applying the transform function to each partitioned range
const output = [];
for (let i = 0; i < partitions.length - 1; i++) {
output.push(transform({
start: partitions[i].start,
end: partitions[i + 1].start,
}));
output.push(getByteForRange(
// start index
partitions[i].start,
// end index
partitions[i + 1].start));
}

@@ -171,0 +274,0 @@ return output;

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,6 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const cl100k_base_js_1 = __importDefault(require("../encodings/cl100k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApi('cl100k_base', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(cl100k_base_js_1.default));
const api = GptEncoding_js_1.GptEncoding.getEncodingApi('cl100k_base', () => cl100k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -29,0 +28,0 @@ exports.decode = decode;

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,6 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const o200k_base_js_1 = __importDefault(require("../encodings/o200k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApi('o200k_base', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(o200k_base_js_1.default));
const api = GptEncoding_js_1.GptEncoding.getEncodingApi('o200k_base', () => o200k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -29,0 +28,0 @@ exports.decode = decode;

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,6 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const p50k_base_js_1 = __importDefault(require("../encodings/p50k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApi('p50k_base', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(p50k_base_js_1.default));
const api = GptEncoding_js_1.GptEncoding.getEncodingApi('p50k_base', () => p50k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, } = api;

@@ -29,0 +28,0 @@ exports.decode = decode;

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,6 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const p50k_base_js_1 = __importDefault(require("../encodings/p50k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApi('p50k_edit', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(p50k_base_js_1.default));
const api = GptEncoding_js_1.GptEncoding.getEncodingApi('p50k_edit', () => p50k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, } = api;

@@ -29,0 +28,0 @@ exports.decode = decode;

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,6 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const r50k_base_js_1 = __importDefault(require("../encodings/r50k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApi('r50k_base', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(r50k_base_js_1.default));
const api = GptEncoding_js_1.GptEncoding.getEncodingApi('r50k_base', () => r50k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, } = api;

@@ -29,0 +28,0 @@ exports.decode = decode;

export default encoder;
/** @type {[string, number][]} */
declare const encoder: [string, number][];
/** @type {(string | number[])[]} */
declare const encoder: (string | number[])[];
export default encoder;
/** @type {[string, number][]} */
declare const encoder: [string, number][];
/** @type {(string | number[])[]} */
declare const encoder: (string | number[])[];
export default encoder;
/** @type {[string, number][]} */
declare const encoder: [string, number][];
/** @type {(string | number[])[]} */
declare const encoder: (string | number[])[];
export default encoder;
/** @type {[string, number][]} */
declare const encoder: [string, number][];
/** @type {(string | number[])[]} */
declare const encoder: (string | number[])[];

@@ -22,6 +22,8 @@ import { type EncodingName, type ModelName } from './mapping.js';

static FimSuffix: string;
decoder: TextDecoder;
modelName?: ModelName;
private decoder;
private bytePairEncodingCoreProcessor;
private specialTokenMapping;
private specialTokensSet;
private allSpecialTokenRegex;
private constructor();

@@ -32,3 +34,3 @@ static getEncodingApi(encodingName: EncodingName, getMergeableRanks: GetMergeableRanksFn): GptEncoding;

static getEncodingApiForModelAsync(modelName: ModelName, getMergeableRanks: GetMergeableRanksAsyncFn): Promise<GptEncoding>;
encodeGenerator(lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: EncodeOptions): Generator<number[], number, undefined>;
encodeGenerator(lineToEncode: string, { allowedSpecial, disallowedSpecial }?: EncodeOptions): Generator<number[], number, undefined>;
encode(lineToEncode: string, encodeOptions?: EncodeOptions): number[];

@@ -42,3 +44,3 @@ /**

*/
encodeChatGenerator(chat: Iterable<ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined): Generator<number[], void, undefined>;
encodeChatGenerator(chat: Iterable<ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined): Generator<number[], void, undefined>;
/**

@@ -51,3 +53,3 @@ * Encodes a chat into a single array of tokens.

*/
encodeChat(chat: readonly ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined): number[];
encodeChat(chat: readonly ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined): number[];
/**

@@ -54,0 +56,0 @@ * @returns {false | number} false if token limit is exceeded, otherwise the number of tokens

@@ -18,11 +18,21 @@ "use strict";

static FimSuffix = specialTokens_js_1.FimSuffix;
modelName;
decoder = new TextDecoder('utf8');
modelName;
bytePairEncodingCoreProcessor;
specialTokenMapping;
constructor({ tokenSplitRegex, mergeableBytePairRanks, specialTokenMapping, expectedVocabularySize, modelName, }) {
const maxTokenValue = Math.max((0, util_js_1.getMaxValueFromMap)(mergeableBytePairRanks), (0, util_js_1.getMaxValueFromMap)(specialTokenMapping));
specialTokensSet;
allSpecialTokenRegex;
constructor({ mergeableBytePairRanks, specialTokenMapping, expectedVocabularySize, modelName, ...rest }) {
this.specialTokenMapping = specialTokenMapping;
this.specialTokensSet = new Set(this.specialTokenMapping.keys());
this.allSpecialTokenRegex = (0, util_js_1.getSpecialTokenRegex)(this.specialTokensSet);
this.bytePairEncodingCoreProcessor = new BytePairEncodingCore_js_1.BytePairEncodingCore({
mergeableBytePairRanks,
specialTokenMapping,
...rest,
});
const maxTokenValue = Math.max(mergeableBytePairRanks.length - 1, (0, util_js_1.getMaxValueFromMap)(specialTokenMapping));
if (expectedVocabularySize !== undefined) {
if (mergeableBytePairRanks.size + specialTokenMapping.size !==
if (this.bytePairEncodingCoreProcessor.bytePairEncoderSize +
specialTokenMapping.size !==
expectedVocabularySize) {

@@ -32,10 +42,5 @@ throw new Error('The number of mergeable tokens and special tokens must be equal to explicit_n_vocab.');

if (maxTokenValue !== expectedVocabularySize - 1) {
throw new Error('The maximum token value must be equal to explicit_n_vocab - 1.');
throw new Error(`The model encodings are invalid. The maximum token value must be equal to expectedVocabularySize - 1. Currently ${maxTokenValue}, expected ${expectedVocabularySize - 1}`);
}
}
this.bytePairEncodingCoreProcessor = new BytePairEncodingCore_js_1.BytePairEncodingCore({
bytePairEncoder: mergeableBytePairRanks,
specialTokenEncoder: specialTokenMapping,
tokenSplitRegex,
});
this.encode = this.encode.bind(this);

@@ -69,14 +74,20 @@ this.decode = this.decode.bind(this);

}
encodeGenerator(lineToEncode, { allowedSpecial = new Set(), disallowedSpecial = new Set([exports.ALL_SPECIAL_TOKENS]), } = {}) {
const specialTokensSet = new Set(this.specialTokenMapping.keys());
if (disallowedSpecial.has(exports.ALL_SPECIAL_TOKENS)) {
disallowedSpecial = new Set(specialTokensSet);
allowedSpecial.forEach((val) => disallowedSpecial.delete(val));
disallowedSpecial.forEach((val) => allowedSpecial.delete(val));
encodeGenerator(lineToEncode, { allowedSpecial, disallowedSpecial } = {}) {
let regexPattern;
if (allowedSpecial?.has(exports.ALL_SPECIAL_TOKENS)) {
allowedSpecial = new Set(this.specialTokensSet);
}
if (allowedSpecial.has(exports.ALL_SPECIAL_TOKENS)) {
allowedSpecial = specialTokensSet;
if (!disallowedSpecial || disallowedSpecial.has(exports.ALL_SPECIAL_TOKENS)) {
// by default, all special tokens are disallowed
disallowedSpecial = new Set(this.specialTokensSet);
if (allowedSpecial?.size) {
allowedSpecial.forEach((val) => disallowedSpecial.delete(val));
disallowedSpecial.forEach((val) => allowedSpecial.delete(val));
regexPattern = (0, util_js_1.getSpecialTokenRegex)(disallowedSpecial);
}
else {
regexPattern = this.allSpecialTokenRegex;
}
}
if (disallowedSpecial.size > 0) {
const regexPattern = (0, util_js_1.getSpecialTokenRegex)(disallowedSpecial);
if (regexPattern) {
const match = lineToEncode.match(regexPattern);

@@ -168,3 +179,6 @@ if (match !== null) {

for (const decodedPart of decodedByteGenerator) {
buffer += this.decoder.decode(decodedPart, { stream: true });
buffer +=
typeof decodedPart === 'string'
? decodedPart
: this.decoder.decode(decodedPart, { stream: true });
if (buffer.length === 0 || (0, utfUtil_js_1.endsWithIncompleteUtfPairSurrogate)(buffer)) {

@@ -190,3 +204,6 @@ // Keep the high surrogate in the buffer and continue with the next token

for await (const decodedPart of decodedByteGenerator) {
buffer += this.decoder.decode(decodedPart, { stream: true });
buffer +=
typeof decodedPart === 'string'
? decodedPart
: this.decoder.decode(decodedPart, { stream: true });
if (buffer.length === 0 || (0, utfUtil_js_1.endsWithIncompleteUtfPairSurrogate)(buffer)) {

@@ -193,0 +210,0 @@ // Keep the high surrogate in the buffer and continue with the next token

@@ -187,3 +187,3 @@ "use strict";

? 127
: modelName === 'gpt-4o'
: modelName.startsWith('gpt-4o')
? 120

@@ -190,0 +190,0 @@ : 121;

@@ -7,12 +7,31 @@ export declare const cl100k_base = "cl100k_base";

export declare const encodingNames: readonly ["cl100k_base", "p50k_base", "r50k_base", "p50k_edit", "o200k_base"];
export declare const modelToEncodingMap: {
declare const chatEnabledModelsMap: {
readonly 'gpt-4': "cl100k_base";
readonly 'gpt-4-0314': "cl100k_base";
readonly 'gpt-4-0613': "cl100k_base";
readonly 'gpt-4-32k': "cl100k_base";
readonly 'gpt-4-0314': "cl100k_base";
readonly 'gpt-4-32k-0314': "cl100k_base";
readonly 'gpt-4-32k-0613': "cl100k_base";
readonly 'gpt-4-turbo': "cl100k_base";
readonly 'gpt-4-turbo-2024-04-09': "cl100k_base";
readonly 'gpt-4-turbo-preview': "cl100k_base";
readonly 'gpt-4-1106-preview': "cl100k_base";
readonly 'gpt-4-0125-preview': "cl100k_base";
readonly 'gpt-4-vision-preview': "cl100k_base";
readonly 'gpt-4o': "o200k_base";
readonly 'gpt-4o-2024-05-13': "o200k_base";
readonly 'gpt-4o-2024-08-06': "o200k_base";
readonly 'gpt-4o-mini-2024-07-18': "o200k_base";
readonly 'gpt-4o-mini': "o200k_base";
readonly 'gpt-3.5-turbo': "cl100k_base";
readonly 'gpt-3.5-turbo-0301': "cl100k_base";
readonly 'gpt-3.5-turbo-0613': "cl100k_base";
readonly 'gpt-3.5-turbo-1106': "cl100k_base";
readonly 'gpt-3.5-turbo-0125': "cl100k_base";
readonly 'gpt-3.5-turbo-16k': "cl100k_base";
readonly 'gpt-3.5-turbo-16k-0613': "cl100k_base";
readonly 'gpt-4o': "o200k_base";
readonly 'gpt-3.5-turbo-instruct': "cl100k_base";
readonly 'gpt-3.5-turbo-instruct-0914': "cl100k_base";
};
export declare const modelToEncodingMap: {
readonly 'text-davinci-003': "p50k_base";

@@ -37,2 +56,4 @@ readonly 'text-davinci-002': "p50k_base";

readonly 'text-embedding-ada-002': "cl100k_base";
readonly 'text-embedding-3-small': "cl100k_base";
readonly 'text-embedding-3-large': "cl100k_base";
readonly 'text-similarity-davinci-001': "r50k_base";

@@ -48,2 +69,28 @@ readonly 'text-similarity-curie-001': "r50k_base";

readonly 'code-search-ada-code-001': "r50k_base";
readonly 'gpt-4': "cl100k_base";
readonly 'gpt-4-0314': "cl100k_base";
readonly 'gpt-4-0613': "cl100k_base";
readonly 'gpt-4-32k': "cl100k_base";
readonly 'gpt-4-32k-0314': "cl100k_base";
readonly 'gpt-4-32k-0613': "cl100k_base";
readonly 'gpt-4-turbo': "cl100k_base";
readonly 'gpt-4-turbo-2024-04-09': "cl100k_base";
readonly 'gpt-4-turbo-preview': "cl100k_base";
readonly 'gpt-4-1106-preview': "cl100k_base";
readonly 'gpt-4-0125-preview': "cl100k_base";
readonly 'gpt-4-vision-preview': "cl100k_base";
readonly 'gpt-4o': "o200k_base";
readonly 'gpt-4o-2024-05-13': "o200k_base";
readonly 'gpt-4o-2024-08-06': "o200k_base";
readonly 'gpt-4o-mini-2024-07-18': "o200k_base";
readonly 'gpt-4o-mini': "o200k_base";
readonly 'gpt-3.5-turbo': "cl100k_base";
readonly 'gpt-3.5-turbo-0301': "cl100k_base";
readonly 'gpt-3.5-turbo-0613': "cl100k_base";
readonly 'gpt-3.5-turbo-1106': "cl100k_base";
readonly 'gpt-3.5-turbo-0125': "cl100k_base";
readonly 'gpt-3.5-turbo-16k': "cl100k_base";
readonly 'gpt-3.5-turbo-16k-0613': "cl100k_base";
readonly 'gpt-3.5-turbo-instruct': "cl100k_base";
readonly 'gpt-3.5-turbo-instruct-0914': "cl100k_base";
};

@@ -54,44 +101,7 @@ export interface ChatParameters {

}
declare const internalChatModelParams: {
'gpt-3.5-turbo': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-3.5-turbo-0301': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-3.5-turbo-0613': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-3.5-turbo-16k-0613': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-4': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-4-0314': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-4-32k': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-4-32k-0314': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-4o': {
messageSeparator: string;
roleSeparator: string;
};
};
export declare const chatModelParams: Partial<Record<ModelName, ChatParameters>>;
export type ModelName = keyof typeof modelToEncodingMap;
export type ChatModelName = keyof typeof internalChatModelParams;
export type ChatModelName = keyof typeof chatEnabledModelsMap;
export type EncodingName = (typeof modelToEncodingMap)[ModelName];
export declare const chatModelParams: Record<ChatModelName, ChatParameters>;
export declare const chatEnabledModels: ChatModelName[];
export {};
"use strict";
/* eslint-disable camelcase */
Object.defineProperty(exports, "__esModule", { value: true });
exports.chatModelParams = exports.modelToEncodingMap = exports.encodingNames = exports.o200k_base = exports.r50k_base = exports.p50k_edit = exports.p50k_base = exports.cl100k_base = void 0;
exports.chatEnabledModels = exports.chatModelParams = exports.modelToEncodingMap = exports.encodingNames = exports.o200k_base = exports.r50k_base = exports.p50k_edit = exports.p50k_base = exports.cl100k_base = void 0;
const specialTokens_js_1 = require("./specialTokens.js");

@@ -18,13 +18,33 @@ exports.cl100k_base = 'cl100k_base';

];
exports.modelToEncodingMap = {
// chat
const chatEnabledModelsMap = {
'gpt-4': exports.cl100k_base,
'gpt-4-0314': exports.cl100k_base,
'gpt-4-0613': exports.cl100k_base,
'gpt-4-32k': exports.cl100k_base,
'gpt-4-0314': exports.cl100k_base,
'gpt-4-32k-0314': exports.cl100k_base,
'gpt-4-32k-0613': exports.cl100k_base,
'gpt-4-turbo': exports.cl100k_base,
'gpt-4-turbo-2024-04-09': exports.cl100k_base,
'gpt-4-turbo-preview': exports.cl100k_base,
'gpt-4-1106-preview': exports.cl100k_base,
'gpt-4-0125-preview': exports.cl100k_base,
'gpt-4-vision-preview': exports.cl100k_base,
'gpt-4o': exports.o200k_base,
'gpt-4o-2024-05-13': exports.o200k_base,
'gpt-4o-2024-08-06': exports.o200k_base,
'gpt-4o-mini-2024-07-18': exports.o200k_base,
'gpt-4o-mini': exports.o200k_base,
'gpt-3.5-turbo': exports.cl100k_base,
'gpt-3.5-turbo-0301': exports.cl100k_base,
'gpt-3.5-turbo-0613': exports.cl100k_base,
'gpt-3.5-turbo-1106': exports.cl100k_base,
'gpt-3.5-turbo-0125': exports.cl100k_base,
'gpt-3.5-turbo-16k': exports.cl100k_base,
'gpt-3.5-turbo-16k-0613': exports.cl100k_base,
'gpt-4o': exports.o200k_base,
'gpt-3.5-turbo-instruct': exports.cl100k_base,
'gpt-3.5-turbo-instruct-0914': exports.cl100k_base,
};
exports.modelToEncodingMap = {
// chat
...chatEnabledModelsMap,
// text

@@ -53,2 +73,4 @@ 'text-davinci-003': exports.p50k_base,

'text-embedding-ada-002': exports.cl100k_base,
'text-embedding-3-small': exports.cl100k_base,
'text-embedding-3-large': exports.cl100k_base,
// old embeddings

@@ -66,41 +88,16 @@ 'text-similarity-davinci-001': exports.r50k_base,

};
const internalChatModelParams = {
'gpt-3.5-turbo': {
messageSeparator: '\n',
roleSeparator: '\n',
},
'gpt-3.5-turbo-0301': {
messageSeparator: '\n',
roleSeparator: '\n',
},
'gpt-3.5-turbo-0613': {
messageSeparator: '\n',
roleSeparator: '\n',
},
'gpt-3.5-turbo-16k-0613': {
messageSeparator: '\n',
roleSeparator: '\n',
},
'gpt-4': {
messageSeparator: '',
roleSeparator: specialTokens_js_1.ImSep,
},
'gpt-4-0314': {
messageSeparator: '',
roleSeparator: specialTokens_js_1.ImSep,
},
'gpt-4-32k': {
messageSeparator: '',
roleSeparator: specialTokens_js_1.ImSep,
},
'gpt-4-32k-0314': {
messageSeparator: '',
roleSeparator: specialTokens_js_1.ImSep,
},
'gpt-4o': {
messageSeparator: '',
roleSeparator: specialTokens_js_1.ImSep,
},
const gpt3params = {
messageSeparator: '\n',
roleSeparator: '\n',
};
exports.chatModelParams = internalChatModelParams;
const gpt4params = {
messageSeparator: '',
roleSeparator: specialTokens_js_1.ImSep,
};
exports.chatModelParams = Object.fromEntries(Object.keys(chatEnabledModelsMap).flatMap((modelName) => modelName.startsWith('gpt-4')
? [[modelName, gpt4params]]
: modelName.startsWith('gpt-3.5-turbo')
? [[modelName, gpt3params]]
: []));
exports.chatEnabledModels = Object.keys(chatEnabledModelsMap);
//# sourceMappingURL=mapping.js.map
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,7 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const cl100k_base_js_1 = __importDefault(require("../encodings/cl100k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-0301', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(cl100k_base_js_1.default));
// prettier-ignore
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-0301', () => cl100k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -29,0 +29,0 @@ exports.decode = decode;

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,7 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const cl100k_base_js_1 = __importDefault(require("../encodings/cl100k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-0613', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(cl100k_base_js_1.default));
// prettier-ignore
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-0613', () => cl100k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -29,0 +29,0 @@ exports.decode = decode;

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,7 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const cl100k_base_js_1 = __importDefault(require("../encodings/cl100k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-16k-0613', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(cl100k_base_js_1.default));
// prettier-ignore
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-16k-0613', () => cl100k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -29,0 +29,0 @@ exports.decode = decode;

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,7 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const cl100k_base_js_1 = __importDefault(require("../encodings/cl100k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-3.5-turbo', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(cl100k_base_js_1.default));
// prettier-ignore
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-3.5-turbo', () => cl100k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -29,0 +29,0 @@ exports.decode = decode;

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,7 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const cl100k_base_js_1 = __importDefault(require("../encodings/cl100k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-4-0314', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(cl100k_base_js_1.default));
// prettier-ignore
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-4-0314', () => cl100k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -29,0 +29,0 @@ exports.decode = decode;

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,7 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const cl100k_base_js_1 = __importDefault(require("../encodings/cl100k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-4-32k-0314', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(cl100k_base_js_1.default));
// prettier-ignore
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-4-32k-0314', () => cl100k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -29,0 +29,0 @@ exports.decode = decode;

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,7 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const cl100k_base_js_1 = __importDefault(require("../encodings/cl100k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-4-32k', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(cl100k_base_js_1.default));
// prettier-ignore
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-4-32k', () => cl100k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -29,0 +29,0 @@ exports.decode = decode;

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,7 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const cl100k_base_js_1 = __importDefault(require("../encodings/cl100k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-4', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(cl100k_base_js_1.default));
// prettier-ignore
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-4', () => cl100k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -29,0 +29,0 @@ exports.decode = decode;

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;

@@ -22,7 +22,7 @@ "use strict";

/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("../convertTokenBytePairEncodingFromTuples.js");
const o200k_base_js_1 = __importDefault(require("../encodings/o200k_base.js"));
const GptEncoding_js_1 = require("../GptEncoding.js");
__exportStar(require("../specialTokens.js"), exports);
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-4o', () => (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(o200k_base_js_1.default));
// prettier-ignore
const api = GptEncoding_js_1.GptEncoding.getEncodingApiForModel('gpt-4o', () => o200k_base_js_1.default);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -29,0 +29,0 @@ exports.decode = decode;

@@ -1,4 +0,4 @@

import { EncoderMap } from './EncoderMap.js';
import type { BytePairEncodingConfig, RawBytePairRanks } from './BytePairEncodingCore.js';
import type { EncodingName, ModelName } from './mapping.js';
export interface EncodingParams {
export interface EncodingParams extends BytePairEncodingConfig {
/**

@@ -16,9 +16,11 @@ * The expected total number of tokens in the vocabulary, including both regular and special tokens.

tokenSplitRegex: RegExp;
mergeableBytePairRanks: EncoderMap;
specialTokenMapping: Map<string, number>;
modelName?: ModelName;
/** increases memory consumption, but speeds up subsequent decoding */
enableCache?: boolean;
}
export type GetMergeableRanksFn = (encodingName: EncodingName) => EncoderMap;
export type GetMergeableRanksAsyncFn = (encodingName: EncodingName) => Promise<EncoderMap>;
export declare const tokenSplitRegex: RegExp;
export type GetMergeableRanksFn = (encodingName: EncodingName) => RawBytePairRanks;
export type GetMergeableRanksAsyncFn = (encodingName: EncodingName) => Promise<RawBytePairRanks>;
export declare function getEncodingParams(encodingName: EncodingName, getMergeableRanks: GetMergeableRanksFn): EncodingParams;
export declare function getModelParamsAsync(encodingName: EncodingName, getMergeableRanks: GetMergeableRanksAsyncFn): Promise<EncodingParams>;
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.tokenSplitRegex = void 0;
exports.getEncodingParams = getEncodingParams;
exports.getModelParamsAsync = getModelParamsAsync;
const specialTokens_js_1 = require("./specialTokens.js");
const tokenSplitRegex = /'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu;
function R50KBase(mergeableBytePairRanks) {
return {
expectedVocabularySize: 50_257,
tokenSplitRegex,
mergeableBytePairRanks,
specialTokenMapping: new Map([[specialTokens_js_1.EndOfText, 50_256]]),
};
}
function P50KBase(mergeableBytePairRanks) {
return {
expectedVocabularySize: 50_281,
tokenSplitRegex,
mergeableBytePairRanks,
specialTokenMapping: new Map([[specialTokens_js_1.EndOfText, 50_256]]),
};
}
function P50KEdit(mergeableBytePairRanks) {
const specialTokenMapping = new Map([
[specialTokens_js_1.EndOfText, 50_256],
[specialTokens_js_1.FimPrefix, 50_281],
[specialTokens_js_1.FimMiddle, 50_282],
[specialTokens_js_1.FimSuffix, 50_283],
]);
return {
tokenSplitRegex,
mergeableBytePairRanks,
specialTokenMapping,
};
}
function Cl100KBase(mergeableBytePairRanks) {
const specialTokenMapping = new Map([
[specialTokens_js_1.EndOfText, 100_257],
[specialTokens_js_1.FimPrefix, 100_258],
[specialTokens_js_1.FimMiddle, 100_259],
[specialTokens_js_1.FimSuffix, 100_260],
[specialTokens_js_1.ImStart, 100_264],
[specialTokens_js_1.ImEnd, 100_265],
[specialTokens_js_1.ImSep, 100_266],
[specialTokens_js_1.EndOfPrompt, 100_276],
]);
return {
tokenSplitRegex: /(?:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+/giu,
mergeableBytePairRanks,
specialTokenMapping,
};
}
function O200KBase(mergeableBytePairRanks) {
const specialTokenMapping = new Map([
[specialTokens_js_1.EndOfText, 199_999],
[specialTokens_js_1.FimPrefix, 200_000],
[specialTokens_js_1.FimMiddle, 200_001],
[specialTokens_js_1.FimSuffix, 200_002],
[specialTokens_js_1.ImStart, 200_003],
[specialTokens_js_1.ImEnd, 200_004],
[specialTokens_js_1.ImSep, 200_005],
[specialTokens_js_1.EndOfPrompt, 200_006],
]);
return {
tokenSplitRegex: /(?:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+/giu,
mergeableBytePairRanks,
specialTokenMapping,
};
}
const Cl100KBase_js_1 = require("./encodingParams/Cl100KBase.js");
const O200KBase_js_1 = require("./encodingParams/O200KBase.js");
const P50KBase_js_1 = require("./encodingParams/P50KBase.js");
const P50KEdit_js_1 = require("./encodingParams/P50KEdit.js");
const R50KBase_js_1 = require("./encodingParams/R50KBase.js");
exports.tokenSplitRegex = /'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu;
function getEncodingParams(encodingName, getMergeableRanks) {

@@ -74,11 +16,11 @@ const mergeableBytePairRanks = getMergeableRanks(encodingName);

case 'r50k_base':
return R50KBase(mergeableBytePairRanks);
return (0, R50KBase_js_1.R50KBase)(mergeableBytePairRanks);
case 'p50k_base':
return P50KBase(mergeableBytePairRanks);
return (0, P50KBase_js_1.P50KBase)(mergeableBytePairRanks);
case 'p50k_edit':
return P50KEdit(mergeableBytePairRanks);
return (0, P50KEdit_js_1.P50KEdit)(mergeableBytePairRanks);
case 'cl100k_base':
return Cl100KBase(mergeableBytePairRanks);
return (0, Cl100KBase_js_1.Cl100KBase)(mergeableBytePairRanks);
case 'o200k_base':
return O200KBase(mergeableBytePairRanks);
return (0, O200KBase_js_1.O200KBase)(mergeableBytePairRanks);
default:

@@ -85,0 +27,0 @@ throw new Error(`Unknown encoding name: ${encodingName}`);

@@ -1,3 +0,3 @@

import type { EncoderMap } from './EncoderMap.js';
import type { RawBytePairRanks } from './BytePairEncodingCore.js';
import type { EncodingName } from './mapping.js';
export declare const resolveEncoding: (encoding: EncodingName) => EncoderMap;
export declare const resolveEncoding: (encoding: EncodingName) => RawBytePairRanks;

@@ -7,4 +7,2 @@ "use strict";

exports.resolveEncoding = void 0;
/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("./convertTokenBytePairEncodingFromTuples.js");
const cl100k_base_js_1 = __importDefault(require("./encodings/cl100k_base.js"));

@@ -17,10 +15,10 @@ const o200k_base_js_1 = __importDefault(require("./encodings/o200k_base.js"));

case 'r50k_base':
return (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(r50k_base_js_1.default);
return r50k_base_js_1.default;
case 'p50k_base':
case 'p50k_edit':
return (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(p50k_base_js_1.default);
return p50k_base_js_1.default;
case 'cl100k_base':
return (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(cl100k_base_js_1.default);
return cl100k_base_js_1.default;
case 'o200k_base':
return (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(o200k_base_js_1.default);
return o200k_base_js_1.default;
default: {

@@ -27,0 +25,0 @@ throw new Error(`Unknown encoding name: ${encoding}`);

@@ -1,3 +0,3 @@

import type { EncoderMap } from './EncoderMap.js';
import type { RawBytePairRanks } from './BytePairEncodingCore.js';
import type { EncodingName } from './mapping.js';
export declare const resolveEncodingAsync: (encoding: EncodingName) => Promise<EncoderMap>;
export declare const resolveEncodingAsync: (encoding: EncodingName) => Promise<RawBytePairRanks>;

@@ -27,15 +27,13 @@ "use strict";

exports.resolveEncodingAsync = void 0;
/* eslint-disable import/extensions */
const convertTokenBytePairEncodingFromTuples_js_1 = require("./convertTokenBytePairEncodingFromTuples.js");
const resolveEncodingAsync = async (encoding) => {
switch (encoding) {
case 'r50k_base':
return (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(await Promise.resolve().then(() => __importStar(require('./encodings/r50k_base.js'))).then(({ default: encodingTuples }) => encodingTuples));
return Promise.resolve().then(() => __importStar(require('./encodings/r50k_base.js'))).then(({ default: rawBytePairRanks }) => rawBytePairRanks);
case 'p50k_base':
case 'p50k_edit':
return (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(await Promise.resolve().then(() => __importStar(require('./encodings/p50k_base.js'))).then(({ default: encodingTuples }) => encodingTuples));
return Promise.resolve().then(() => __importStar(require('./encodings/p50k_base.js'))).then(({ default: rawBytePairRanks }) => rawBytePairRanks);
case 'cl100k_base':
return (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(await Promise.resolve().then(() => __importStar(require('./encodings/cl100k_base.js'))).then(({ default: encodingTuples }) => encodingTuples));
return Promise.resolve().then(() => __importStar(require('./encodings/cl100k_base.js'))).then(({ default: rawBytePairRanks }) => rawBytePairRanks);
case 'o200k_base':
return (0, convertTokenBytePairEncodingFromTuples_js_1.convertTokenBytePairEncodingFromTuples)(await Promise.resolve().then(() => __importStar(require('./encodings/o200k_base.js'))).then(({ default: encodingTuples }) => encodingTuples));
return Promise.resolve().then(() => __importStar(require('./encodings/o200k_base.js'))).then(({ default: rawBytePairRanks }) => rawBytePairRanks);
default: {

@@ -42,0 +40,0 @@ throw new Error(`Unknown encoding name: ${encoding}`);

@@ -0,1 +1,4 @@

export declare const isAscii: (codePoint: number) => boolean;
export declare function endsWithIncompleteUtfPairSurrogate(string: string): boolean;
export declare function tryConvertToString(arr: Uint8Array): string | undefined;
export declare function compareUint8Arrays(a: Uint8Array, b: Uint8Array): number;
"use strict";
/* eslint-disable no-bitwise */
/* eslint-disable no-magic-numbers */
Object.defineProperty(exports, "__esModule", { value: true });
exports.isAscii = void 0;
exports.endsWithIncompleteUtfPairSurrogate = endsWithIncompleteUtfPairSurrogate;
exports.tryConvertToString = tryConvertToString;
exports.compareUint8Arrays = compareUint8Arrays;
const isAscii = (codePoint) => codePoint <= 0x7f;
exports.isAscii = isAscii;
const HIGH_SURROGATE_START = 55_296;

@@ -14,2 +21,82 @@ const HIGH_SURROGATE_END = 56_319;

}
function isValidUTF8(bytes) {
let i = 0;
while (i < bytes.length) {
const byte1 = bytes[i];
let numBytes = 0;
let codePoint = 0;
// Determine the number of bytes in the current UTF-8 character
if (byte1 <= 0x7f) {
// 1-byte character (ASCII)
numBytes = 1;
codePoint = byte1;
}
else if ((byte1 & 0xe0) === 0xc0) {
// 2-byte character
numBytes = 2;
codePoint = byte1 & 0x1f;
if (byte1 <= 0xc1)
return false; // Overlong encoding not allowed
}
else if ((byte1 & 0xf0) === 0xe0) {
// 3-byte character
numBytes = 3;
codePoint = byte1 & 0x0f;
}
else if ((byte1 & 0xf8) === 0xf0) {
// 4-byte character
numBytes = 4;
codePoint = byte1 & 0x07;
if (byte1 > 0xf4)
return false; // Code points above U+10FFFF not allowed
}
else {
// Invalid first byte of UTF-8 character
return false;
}
// Ensure there are enough continuation bytes
if (i + numBytes > bytes.length)
return false;
// Process the continuation bytes
for (let j = 1; j < numBytes; j++) {
const byte = bytes[i + j];
if (byte === undefined || (byte & 0xc0) !== 0x80)
return false; // Continuation bytes must start with '10'
codePoint = (codePoint << 6) | (byte & 0x3f);
}
// Check for overlong encodings
if (numBytes === 2 && codePoint < 0x80)
return false; // Overlong 2-byte sequence
if (numBytes === 3 && codePoint < 2_048)
return false; // Overlong 3-byte sequence
if (numBytes === 4 && codePoint < 65_536)
return false; // Overlong 4-byte sequence
// Check for surrogate halves (U+D800 to U+DFFF)
if (codePoint >= 55_296 && codePoint <= 57_343)
return false;
// Check for code points above U+10FFFF
if (codePoint > 1_114_111)
return false;
// Move to the next character
i += numBytes;
}
return true;
}
const textDecoder = new TextDecoder('utf8', { fatal: false });
function tryConvertToString(arr) {
if (!isValidUTF8(arr)) {
return undefined;
}
return textDecoder.decode(arr);
}
// Helper function to compare two Uint8Arrays lexicographically
function compareUint8Arrays(a, b) {
const len = Math.min(a.length, b.length);
for (let i = 0; i < len; i++) {
if (a[i] !== b[i]) {
return a[i] - b[i];
}
}
return a.length - b.length;
}
//# sourceMappingURL=utfUtil.js.map
export declare function getMaxValueFromMap(map: Map<unknown, number>): number;
export declare function escapeRegExp(string: string): string;
export declare function getSpecialTokenRegex(tokens: Set<string>): RegExp;
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.getMaxValueFromMap = getMaxValueFromMap;
exports.escapeRegExp = escapeRegExp;
exports.getSpecialTokenRegex = getSpecialTokenRegex;
const escapeRegExp_js_1 = require("./escapeRegExp.js");
function getMaxValueFromMap(map) {

@@ -13,4 +13,7 @@ let max = 0;

}
function escapeRegExp(string) {
return string.replace(/[$()*+.?[\\\]^{|}]/g, '\\$&'); // $& means the whole matched string
}
function getSpecialTokenRegex(tokens) {
const escapedTokens = [...tokens].map(escapeRegExp_js_1.escapeRegExp);
const escapedTokens = [...tokens].map(escapeRegExp);
const inner = escapedTokens.join('|');

@@ -17,0 +20,0 @@ return new RegExp(`(${inner})`);

@@ -1,25 +0,31 @@

import { EncoderMap } from './EncoderMap.js';
export declare class BytePairEncodingCore {
encoder: EncoderMap;
decoder: Map<number, Uint8Array>;
export type RawBytePairRanks = readonly (string | readonly number[])[];
export interface BytePairEncodingConfig {
mergeableBytePairRanks: RawBytePairRanks;
specialTokenMapping?: Map<string, number>;
tokenSplitRegex: RegExp;
specialTokensEncoder: Map<string, number>;
specialTokensDecoder: Map<number, Uint8Array>;
specialTokenPatternRegex: RegExp;
textEncoder: TextEncoder;
constructor({ bytePairEncoder, specialTokenEncoder, tokenSplitRegex, }: {
bytePairEncoder: EncoderMap;
specialTokenEncoder?: Map<string, number>;
tokenSplitRegex: RegExp;
});
encodeNative(text: string, allowedSpecial: Set<string>): Generator<number[], number, undefined>;
findNextSpecialStartIndex(text: string, allowedSpecial: Set<string>, startIndex: number, specialRegex: RegExp): number | undefined;
decodeNative(tokens: Iterable<number>): Generator<Uint8Array>;
decodeNativeAsync(tokens: AsyncIterable<number>): AsyncGenerator<Uint8Array>;
tryDecodeToken(token: number): Uint8Array | undefined;
bytePairEncode(inputBytes: Uint8Array, bytePairRanks: EncoderMap): number[];
bytePairMerge(piece: Uint8Array, bytePairRanks: EncoderMap, transform: (pair: {
start: number;
end: number;
}) => number): number[];
}
export declare class BytePairEncodingCore {
readonly bytePairEncoderSize: number;
private bytePairEncoder;
private bytePairEncoderSortedLookup;
private bytePairRanksDecoder;
private tokenSplitRegex;
private specialTokensEncoder;
private specialTokensDecoder;
private specialTokenPatternRegex;
private stringDecoder;
private textEncoder;
constructor({ mergeableBytePairRanks: bytePairEncoder, specialTokenMapping: specialTokenEncoder, tokenSplitRegex, }: BytePairEncodingConfig);
getBpeRankFromString(key: string): number | undefined;
getBpeRankFromStringOrThrow(key: string): number;
getBpeRankFromBytes(key: Uint8Array): number | undefined;
getBpeRankFromBytesOrThrow(key: Uint8Array): number;
binarySearch(key: Uint8Array): number;
encodeNative(text: string, allowedSpecial?: Set<string>): Generator<number[], number, undefined>;
findNextSpecialStartIndex(text: string, allowedSpecial: Set<string> | undefined, startIndex: number, specialRegex: RegExp): number | undefined;
decodeNative(tokens: Iterable<number>): Generator<Uint8Array | string, void, void>;
decodeNativeAsync(tokens: AsyncIterable<number>): AsyncGenerator<Uint8Array | string>;
tryDecodeToken(tokenRank: number): Uint8Array | string | undefined;
bytePairEncode(input: string): number[];
bytePairMerge(piece: Uint8Array, getByteForRange: (start: number, end: number) => number): number[];
}

@@ -1,6 +0,9 @@

import { EncoderMap } from './EncoderMap.js';
import { escapeRegExp } from './escapeRegExp.js';
/* eslint-disable no-continue */
import { compareUint8Arrays, isAscii, tryConvertToString } from './utfUtil.js';
import { escapeRegExp } from './util.js';
export class BytePairEncodingCore {
encoder;
decoder;
bytePairEncoderSize;
bytePairEncoder;
bytePairEncoderSortedLookup;
bytePairRanksDecoder = new Map();
tokenSplitRegex;

@@ -10,14 +13,24 @@ specialTokensEncoder;

specialTokenPatternRegex;
stringDecoder;
textEncoder = new TextEncoder();
constructor({ bytePairEncoder, specialTokenEncoder, tokenSplitRegex, }) {
this.encoder = bytePairEncoder ?? new EncoderMap();
this.decoder = bytePairEncoder
? new Map([...bytePairEncoder].map(([key, value]) => [value, key]))
: new Map();
constructor({ mergeableBytePairRanks: bytePairEncoder, specialTokenMapping: specialTokenEncoder, tokenSplitRegex, }) {
this.bytePairEncoder = bytePairEncoder;
this.stringDecoder = new Map();
// size without array holes (which may be present in the encoder)
this.bytePairEncoderSize = Object.keys(bytePairEncoder).length;
const binaryLookup = [];
// forEach skips array holes:
bytePairEncoder.forEach((value, rank) => {
if (typeof value === 'string') {
this.stringDecoder.set(value, rank);
return;
}
const byteArray = new Uint8Array(value);
binaryLookup.push([byteArray, rank]);
this.bytePairRanksDecoder.set(rank, byteArray);
});
this.bytePairEncoderSortedLookup = binaryLookup.sort((a, b) => compareUint8Arrays(a[0], b[0]));
this.specialTokensEncoder = specialTokenEncoder ?? new Map();
this.specialTokensDecoder = specialTokenEncoder
? new Map([...specialTokenEncoder].map(([key, value]) => [
value,
this.textEncoder.encode(key),
]))
? new Map([...specialTokenEncoder].map(([key, value]) => [value, key]))
: new Map();

@@ -34,2 +47,60 @@ this.tokenSplitRegex = tokenSplitRegex;

}
getBpeRankFromString(key) {
return this.stringDecoder.get(key);
}
getBpeRankFromStringOrThrow(key) {
const value = this.getBpeRankFromString(key);
if (value === undefined) {
throw new Error(`The byte-pair encoding does not contain a value for: ${key}`);
}
return value;
}
getBpeRankFromBytes(key) {
const keyAsString = tryConvertToString(key);
if (keyAsString !== undefined) {
return this.getBpeRankFromString(keyAsString);
}
// Perform binary search on the binary keys
const index = this.binarySearch(key);
if (index !== -1) {
return this.bytePairEncoderSortedLookup[index][1];
}
return undefined;
}
getBpeRankFromBytesOrThrow(key) {
const value = this.getBpeRankFromBytes(key);
if (value === undefined) {
throw new Error(`The byte-pair encoding does not contain a value for: ${key.toString()}`);
}
return value;
}
// Binary search on the binary keys
binarySearch(key) {
let low = 0;
let high = this.bytePairEncoderSortedLookup.length - 1;
while (low <= high) {
// eslint-disable-next-line no-bitwise
const mid = (low + high) >>> 1;
const midKey = this.bytePairEncoderSortedLookup[mid][0];
let cmp = 0;
for (let i = 0; i < Math.min(midKey.length, key.length); i++) {
cmp = midKey[i] - key[i];
if (cmp !== 0)
break;
}
if (cmp === 0) {
cmp = midKey.length - key.length;
}
if (cmp === 0) {
return mid;
}
if (cmp < 0) {
low = mid + 1;
}
else {
high = mid - 1;
}
}
return -1;
}
*encodeNative(text, allowedSpecial) {

@@ -45,11 +116,9 @@ let startIndex = 0;

for (const [match] of textSegment.matchAll(this.tokenSplitRegex)) {
const encodedPiece = this.textEncoder.encode(match);
const token = this.encoder.get(encodedPiece);
const token = this.getBpeRankFromString(match);
if (token !== undefined) {
lastTokenLength = 1;
yield [token];
// eslint-disable-next-line no-continue
continue;
}
const tokens = this.bytePairEncode(encodedPiece, this.encoder);
const tokens = this.bytePairEncode(match);
lastTokenLength = tokens.length;

@@ -83,3 +152,3 @@ yield tokens;

const [specialToken] = nextSpecialMatch;
if (allowedSpecial.has(specialToken)) {
if (allowedSpecial?.has(specialToken)) {
return nextSpecialMatch.index + searchIndex;

@@ -100,41 +169,69 @@ }

for await (const token of tokens) {
const tokenBytes = this.tryDecodeToken(token);
if (tokenBytes) {
yield tokenBytes;
const tokenBytesOrString = this.tryDecodeToken(token);
if (tokenBytesOrString) {
yield tokenBytesOrString;
}
}
}
tryDecodeToken(token) {
return this.decoder.get(token) ?? this.specialTokensDecoder.get(token);
tryDecodeToken(tokenRank) {
const value = this.bytePairEncoder[tokenRank];
if (typeof value === 'string') {
return value;
}
if (typeof value === 'object') {
const fromBinary = this.bytePairRanksDecoder.get(tokenRank);
if (fromBinary) {
return fromBinary;
}
}
return this.specialTokensDecoder.get(tokenRank);
}
bytePairEncode(inputBytes, bytePairRanks) {
if (inputBytes.length === 1) {
return [bytePairRanks.getOrThrow(inputBytes)];
bytePairEncode(input) {
if (input.length === 1 && isAscii(input.codePointAt(0))) {
return [this.getBpeRankFromStringOrThrow(input)];
}
return this.bytePairMerge(inputBytes, bytePairRanks, (pair) => {
const key = inputBytes.slice(pair.start, pair.end);
return bytePairRanks.getOrThrow(key);
const inputBytes = this.textEncoder.encode(input);
return this.bytePairMerge(inputBytes, (start, end) => {
const key = inputBytes.subarray(start, end);
return this.getBpeRankFromBytesOrThrow(key);
});
}
bytePairMerge(piece, bytePairRanks, transform) {
bytePairMerge(
// Input array of bytes to process
piece,
// Function to apply to each final segment after merging
getByteForRange) {
// Create an array of partition objects. Each partition tracks the start index in 'piece'
// and a rank value for adjacent pairs (initially set to positive infinity).
const partitions = Array.from({ length: piece.length + 1 }, (_, i) => ({
start: i,
rank: Number.POSITIVE_INFINITY,
rank: Number.POSITIVE_INFINITY, // Rank starts at infinity (unmerged)
}));
// Helper function to get the rank of a byte pair starting at 'startIndex'.
// 'skip' determines how far we look ahead (usually 0, for consecutive pairs).
const getRank = (startIndex, skip) => {
if (startIndex + skip + 2 >= partitions.length) {
// Avoid out-of-bounds errors, return undefined when no valid pair exists
return undefined;
}
const key = piece.slice(partitions[startIndex].start, partitions[startIndex + skip + 2].start);
return bytePairRanks.get(key);
// Get the byte pair by extracting a subarray starting at 'startIndex' and ending at
// the start of the partition after 'skip + 2'.
const key = piece.subarray(partitions[startIndex].start, partitions[startIndex + skip + 2].start);
// Retrieve the rank of this byte pair from the BPE rank function
return this.getBpeRankFromBytes(key);
};
// Initialize the ranks for all adjacent pairs in the array
for (let i = 0; i < partitions.length - 2; i++) {
// Get the rank for the pair starting at index 'i'
const rank = getRank(i, 0);
if (rank !== undefined) {
// Assign the rank to the partition at index 'i'
partitions[i].rank = rank;
}
}
// Iteratively merge byte pairs until no more useful merges can be done
while (partitions.length > 1) {
let minRank = Number.POSITIVE_INFINITY;
let minRankIdx = 0;
// Find the partition with the minimum rank, i.e., the most important pair to merge next
let i = 0;

@@ -148,7 +245,10 @@ for (const partition of partitions) {

}
// If no valid pair is left to merge, exit the loop
if (minRank === Number.POSITIVE_INFINITY) {
break;
}
// Update the rank of the partition after the merged one
partitions[minRankIdx].rank =
getRank(minRankIdx, 1) ?? Number.POSITIVE_INFINITY;
// Update the rank of the partition before the merged one (if exists)
if (minRankIdx > 0) {

@@ -158,10 +258,13 @@ partitions[minRankIdx - 1].rank =

}
// Merge by removing the partition after the one we just merged
partitions.splice(minRankIdx + 1, 1);
}
// Create the final output by applying the transform function to each partitioned range
const output = [];
for (let i = 0; i < partitions.length - 1; i++) {
output.push(transform({
start: partitions[i].start,
end: partitions[i + 1].start,
}));
output.push(getByteForRange(
// start index
partitions[i].start,
// end index
partitions[i + 1].start));
}

@@ -168,0 +271,0 @@ return output;

@@ -15,2 +15,3 @@ import * as fs from 'fs/promises';

.replace(`getEncodingApi('cl100k_base'`, `getEncodingApiForModel('${modelName}'`)
.replace('\nconst api =', '// prettier-ignore\nconst api =')
.replaceAll(`cl100k_base.js`, `${encoding}.js`)

@@ -17,0 +18,0 @@ : `// eslint-disable-next-line no-restricted-exports, import/no-default-export\nexport { default } from '../encoding/${encoding}.js'\nexport * from '../encoding/${encoding}.js'\n`;

@@ -0,4 +1,6 @@

/* eslint-disable no-console */
import * as fs from 'fs/promises';
import * as path from 'path';
import { fileURLToPath } from 'url';
const DEBUG = process.env.DEBUG === 'true';
const processFilesInDirectory = async (directoryPath, fn) => {

@@ -17,3 +19,2 @@ try {

catch (error) {
// eslint-disable-next-line no-console
console.error('An error occurred:', error);

@@ -24,2 +25,18 @@ }

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const textDecoder = new TextDecoder('utf8', { fatal: true });
const textEncoder = new TextEncoder();
function safeDecodeUtf8(bytes) {
try {
const v = textDecoder.decode(bytes);
const encoded = textEncoder.encode(v);
if (encoded.byteLength !== bytes.byteLength) {
console.log('Mismatch:', new Uint8Array(bytes), encoded);
return undefined;
}
return v;
}
catch {
return undefined;
}
}
await processFilesInDirectory(path.join(__dirname, '../../data'), async (filePath) => {

@@ -29,2 +46,3 @@ if (!filePath.endsWith('.tiktoken'))

const modelName = path.basename(filePath, '.tiktoken');
console.log(`Processing ${modelName}`);
const bpeFile = await fs.readFile(filePath, 'utf8');

@@ -34,9 +52,22 @@ const lines = bpeFile.split('\n');

const [token, rank] = x.split(' ');
return [token, Number.parseInt(rank, 10)];
if (!token || token.length === 0 || !rank || rank.length === 0) {
throw new Error(`Invalid token encoding: ${x}`);
}
const tokenArray = Buffer.from(token, 'base64');
return [tokenArray, Number.parseInt(rank, 10)];
});
const jsCodeBpeArray = encoder.reduce((acc, [token, rank]) => {
const decoded = safeDecodeUtf8(token) ?? token;
return {
string: `${acc.string}${','.repeat(rank - acc.lastRank)}${DEBUG ? `\n/** ${rank} = */` : ''}${typeof decoded === 'string'
? JSON.stringify(decoded)
: `[${token.join(',')}]`}`,
lastRank: rank,
};
}, { string: '', lastRank: 0 }).string;
const firstTokenRank = encoder[0]?.[1] ?? 0;
await fs.mkdir(path.join(__dirname, '../encodings'), { recursive: true });
await fs.writeFile(path.join(__dirname, `../encodings/${modelName}.js`), `/* eslint-disable */\n// @ts-nocheck\n// prettier-ignore\n/** @type {[string, number][]} */\nconst encoder = ${JSON.stringify(encoder)};\nexport default encoder;`);
// eslint-disable-next-line no-console
await fs.writeFile(path.join(__dirname, `../encodings/${modelName}.js`), `/* eslint-disable */\n// @ts-nocheck\n// prettier-ignore\n/** @type {(string | number[])[]} */\nconst encoder = [${','.repeat(firstTokenRank)}${jsCodeBpeArray}];\nexport default encoder;`);
console.log(`Wrote ${modelName}.js`);
});
//# sourceMappingURL=generateJsEncodings.js.map
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/cl100k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApi('cl100k_base', () => convertTokenBytePairEncodingFromTuples(encoder));
const api = GptEncoding.getEncodingApi('cl100k_base', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -8,0 +7,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/o200k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApi('o200k_base', () => convertTokenBytePairEncodingFromTuples(encoder));
const api = GptEncoding.getEncodingApi('o200k_base', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -8,0 +7,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/p50k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApi('p50k_base', () => convertTokenBytePairEncodingFromTuples(encoder));
const api = GptEncoding.getEncodingApi('p50k_base', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, } = api;

@@ -8,0 +7,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, };

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/p50k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApi('p50k_edit', () => convertTokenBytePairEncodingFromTuples(encoder));
const api = GptEncoding.getEncodingApi('p50k_edit', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, } = api;

@@ -8,0 +7,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, };

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/r50k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApi('r50k_base', () => convertTokenBytePairEncodingFromTuples(encoder));
const api = GptEncoding.getEncodingApi('r50k_base', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, } = api;

@@ -8,0 +7,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, };

export default encoder;
/** @type {[string, number][]} */
declare const encoder: [string, number][];
/** @type {(string | number[])[]} */
declare const encoder: (string | number[])[];
export default encoder;
/** @type {[string, number][]} */
declare const encoder: [string, number][];
/** @type {(string | number[])[]} */
declare const encoder: (string | number[])[];
export default encoder;
/** @type {[string, number][]} */
declare const encoder: [string, number][];
/** @type {(string | number[])[]} */
declare const encoder: (string | number[])[];
export default encoder;
/** @type {[string, number][]} */
declare const encoder: [string, number][];
/** @type {(string | number[])[]} */
declare const encoder: (string | number[])[];

@@ -22,6 +22,8 @@ import { type EncodingName, type ModelName } from './mapping.js';

static FimSuffix: string;
decoder: TextDecoder;
modelName?: ModelName;
private decoder;
private bytePairEncodingCoreProcessor;
private specialTokenMapping;
private specialTokensSet;
private allSpecialTokenRegex;
private constructor();

@@ -32,3 +34,3 @@ static getEncodingApi(encodingName: EncodingName, getMergeableRanks: GetMergeableRanksFn): GptEncoding;

static getEncodingApiForModelAsync(modelName: ModelName, getMergeableRanks: GetMergeableRanksAsyncFn): Promise<GptEncoding>;
encodeGenerator(lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: EncodeOptions): Generator<number[], number, undefined>;
encodeGenerator(lineToEncode: string, { allowedSpecial, disallowedSpecial }?: EncodeOptions): Generator<number[], number, undefined>;
encode(lineToEncode: string, encodeOptions?: EncodeOptions): number[];

@@ -42,3 +44,3 @@ /**

*/
encodeChatGenerator(chat: Iterable<ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined): Generator<number[], void, undefined>;
encodeChatGenerator(chat: Iterable<ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined): Generator<number[], void, undefined>;
/**

@@ -51,3 +53,3 @@ * Encodes a chat into a single array of tokens.

*/
encodeChat(chat: readonly ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined): number[];
encodeChat(chat: readonly ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined): number[];
/**

@@ -54,0 +56,0 @@ * @returns {false | number} false if token limit is exceeded, otherwise the number of tokens

@@ -15,11 +15,21 @@ /* eslint-disable no-param-reassign */

static FimSuffix = FimSuffix;
modelName;
decoder = new TextDecoder('utf8');
modelName;
bytePairEncodingCoreProcessor;
specialTokenMapping;
constructor({ tokenSplitRegex, mergeableBytePairRanks, specialTokenMapping, expectedVocabularySize, modelName, }) {
const maxTokenValue = Math.max(getMaxValueFromMap(mergeableBytePairRanks), getMaxValueFromMap(specialTokenMapping));
specialTokensSet;
allSpecialTokenRegex;
constructor({ mergeableBytePairRanks, specialTokenMapping, expectedVocabularySize, modelName, ...rest }) {
this.specialTokenMapping = specialTokenMapping;
this.specialTokensSet = new Set(this.specialTokenMapping.keys());
this.allSpecialTokenRegex = getSpecialTokenRegex(this.specialTokensSet);
this.bytePairEncodingCoreProcessor = new BytePairEncodingCore({
mergeableBytePairRanks,
specialTokenMapping,
...rest,
});
const maxTokenValue = Math.max(mergeableBytePairRanks.length - 1, getMaxValueFromMap(specialTokenMapping));
if (expectedVocabularySize !== undefined) {
if (mergeableBytePairRanks.size + specialTokenMapping.size !==
if (this.bytePairEncodingCoreProcessor.bytePairEncoderSize +
specialTokenMapping.size !==
expectedVocabularySize) {

@@ -29,10 +39,5 @@ throw new Error('The number of mergeable tokens and special tokens must be equal to explicit_n_vocab.');

if (maxTokenValue !== expectedVocabularySize - 1) {
throw new Error('The maximum token value must be equal to explicit_n_vocab - 1.');
throw new Error(`The model encodings are invalid. The maximum token value must be equal to expectedVocabularySize - 1. Currently ${maxTokenValue}, expected ${expectedVocabularySize - 1}`);
}
}
this.bytePairEncodingCoreProcessor = new BytePairEncodingCore({
bytePairEncoder: mergeableBytePairRanks,
specialTokenEncoder: specialTokenMapping,
tokenSplitRegex,
});
this.encode = this.encode.bind(this);

@@ -66,14 +71,20 @@ this.decode = this.decode.bind(this);

}
encodeGenerator(lineToEncode, { allowedSpecial = new Set(), disallowedSpecial = new Set([ALL_SPECIAL_TOKENS]), } = {}) {
const specialTokensSet = new Set(this.specialTokenMapping.keys());
if (disallowedSpecial.has(ALL_SPECIAL_TOKENS)) {
disallowedSpecial = new Set(specialTokensSet);
allowedSpecial.forEach((val) => disallowedSpecial.delete(val));
disallowedSpecial.forEach((val) => allowedSpecial.delete(val));
encodeGenerator(lineToEncode, { allowedSpecial, disallowedSpecial } = {}) {
let regexPattern;
if (allowedSpecial?.has(ALL_SPECIAL_TOKENS)) {
allowedSpecial = new Set(this.specialTokensSet);
}
if (allowedSpecial.has(ALL_SPECIAL_TOKENS)) {
allowedSpecial = specialTokensSet;
if (!disallowedSpecial || disallowedSpecial.has(ALL_SPECIAL_TOKENS)) {
// by default, all special tokens are disallowed
disallowedSpecial = new Set(this.specialTokensSet);
if (allowedSpecial?.size) {
allowedSpecial.forEach((val) => disallowedSpecial.delete(val));
disallowedSpecial.forEach((val) => allowedSpecial.delete(val));
regexPattern = getSpecialTokenRegex(disallowedSpecial);
}
else {
regexPattern = this.allSpecialTokenRegex;
}
}
if (disallowedSpecial.size > 0) {
const regexPattern = getSpecialTokenRegex(disallowedSpecial);
if (regexPattern) {
const match = lineToEncode.match(regexPattern);

@@ -165,3 +176,6 @@ if (match !== null) {

for (const decodedPart of decodedByteGenerator) {
buffer += this.decoder.decode(decodedPart, { stream: true });
buffer +=
typeof decodedPart === 'string'
? decodedPart
: this.decoder.decode(decodedPart, { stream: true });
if (buffer.length === 0 || endsWithIncompleteUtfPairSurrogate(buffer)) {

@@ -187,3 +201,6 @@ // Keep the high surrogate in the buffer and continue with the next token

for await (const decodedPart of decodedByteGenerator) {
buffer += this.decoder.decode(decodedPart, { stream: true });
buffer +=
typeof decodedPart === 'string'
? decodedPart
: this.decoder.decode(decodedPart, { stream: true });
if (buffer.length === 0 || endsWithIncompleteUtfPairSurrogate(buffer)) {

@@ -190,0 +207,0 @@ // Keep the high surrogate in the buffer and continue with the next token

@@ -182,3 +182,3 @@ import fs from 'fs';

? 127
: modelName === 'gpt-4o'
: modelName.startsWith('gpt-4o')
? 120

@@ -185,0 +185,0 @@ : 121;

@@ -7,12 +7,31 @@ export declare const cl100k_base = "cl100k_base";

export declare const encodingNames: readonly ["cl100k_base", "p50k_base", "r50k_base", "p50k_edit", "o200k_base"];
export declare const modelToEncodingMap: {
declare const chatEnabledModelsMap: {
readonly 'gpt-4': "cl100k_base";
readonly 'gpt-4-0314': "cl100k_base";
readonly 'gpt-4-0613': "cl100k_base";
readonly 'gpt-4-32k': "cl100k_base";
readonly 'gpt-4-0314': "cl100k_base";
readonly 'gpt-4-32k-0314': "cl100k_base";
readonly 'gpt-4-32k-0613': "cl100k_base";
readonly 'gpt-4-turbo': "cl100k_base";
readonly 'gpt-4-turbo-2024-04-09': "cl100k_base";
readonly 'gpt-4-turbo-preview': "cl100k_base";
readonly 'gpt-4-1106-preview': "cl100k_base";
readonly 'gpt-4-0125-preview': "cl100k_base";
readonly 'gpt-4-vision-preview': "cl100k_base";
readonly 'gpt-4o': "o200k_base";
readonly 'gpt-4o-2024-05-13': "o200k_base";
readonly 'gpt-4o-2024-08-06': "o200k_base";
readonly 'gpt-4o-mini-2024-07-18': "o200k_base";
readonly 'gpt-4o-mini': "o200k_base";
readonly 'gpt-3.5-turbo': "cl100k_base";
readonly 'gpt-3.5-turbo-0301': "cl100k_base";
readonly 'gpt-3.5-turbo-0613': "cl100k_base";
readonly 'gpt-3.5-turbo-1106': "cl100k_base";
readonly 'gpt-3.5-turbo-0125': "cl100k_base";
readonly 'gpt-3.5-turbo-16k': "cl100k_base";
readonly 'gpt-3.5-turbo-16k-0613': "cl100k_base";
readonly 'gpt-4o': "o200k_base";
readonly 'gpt-3.5-turbo-instruct': "cl100k_base";
readonly 'gpt-3.5-turbo-instruct-0914': "cl100k_base";
};
export declare const modelToEncodingMap: {
readonly 'text-davinci-003': "p50k_base";

@@ -37,2 +56,4 @@ readonly 'text-davinci-002': "p50k_base";

readonly 'text-embedding-ada-002': "cl100k_base";
readonly 'text-embedding-3-small': "cl100k_base";
readonly 'text-embedding-3-large': "cl100k_base";
readonly 'text-similarity-davinci-001': "r50k_base";

@@ -48,2 +69,28 @@ readonly 'text-similarity-curie-001': "r50k_base";

readonly 'code-search-ada-code-001': "r50k_base";
readonly 'gpt-4': "cl100k_base";
readonly 'gpt-4-0314': "cl100k_base";
readonly 'gpt-4-0613': "cl100k_base";
readonly 'gpt-4-32k': "cl100k_base";
readonly 'gpt-4-32k-0314': "cl100k_base";
readonly 'gpt-4-32k-0613': "cl100k_base";
readonly 'gpt-4-turbo': "cl100k_base";
readonly 'gpt-4-turbo-2024-04-09': "cl100k_base";
readonly 'gpt-4-turbo-preview': "cl100k_base";
readonly 'gpt-4-1106-preview': "cl100k_base";
readonly 'gpt-4-0125-preview': "cl100k_base";
readonly 'gpt-4-vision-preview': "cl100k_base";
readonly 'gpt-4o': "o200k_base";
readonly 'gpt-4o-2024-05-13': "o200k_base";
readonly 'gpt-4o-2024-08-06': "o200k_base";
readonly 'gpt-4o-mini-2024-07-18': "o200k_base";
readonly 'gpt-4o-mini': "o200k_base";
readonly 'gpt-3.5-turbo': "cl100k_base";
readonly 'gpt-3.5-turbo-0301': "cl100k_base";
readonly 'gpt-3.5-turbo-0613': "cl100k_base";
readonly 'gpt-3.5-turbo-1106': "cl100k_base";
readonly 'gpt-3.5-turbo-0125': "cl100k_base";
readonly 'gpt-3.5-turbo-16k': "cl100k_base";
readonly 'gpt-3.5-turbo-16k-0613': "cl100k_base";
readonly 'gpt-3.5-turbo-instruct': "cl100k_base";
readonly 'gpt-3.5-turbo-instruct-0914': "cl100k_base";
};

@@ -54,44 +101,7 @@ export interface ChatParameters {

}
declare const internalChatModelParams: {
'gpt-3.5-turbo': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-3.5-turbo-0301': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-3.5-turbo-0613': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-3.5-turbo-16k-0613': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-4': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-4-0314': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-4-32k': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-4-32k-0314': {
messageSeparator: string;
roleSeparator: string;
};
'gpt-4o': {
messageSeparator: string;
roleSeparator: string;
};
};
export declare const chatModelParams: Partial<Record<ModelName, ChatParameters>>;
export type ModelName = keyof typeof modelToEncodingMap;
export type ChatModelName = keyof typeof internalChatModelParams;
export type ChatModelName = keyof typeof chatEnabledModelsMap;
export type EncodingName = (typeof modelToEncodingMap)[ModelName];
export declare const chatModelParams: Record<ChatModelName, ChatParameters>;
export declare const chatEnabledModels: ChatModelName[];
export {};

@@ -15,13 +15,33 @@ /* eslint-disable camelcase */

];
export const modelToEncodingMap = {
// chat
const chatEnabledModelsMap = {
'gpt-4': cl100k_base,
'gpt-4-0314': cl100k_base,
'gpt-4-0613': cl100k_base,
'gpt-4-32k': cl100k_base,
'gpt-4-0314': cl100k_base,
'gpt-4-32k-0314': cl100k_base,
'gpt-4-32k-0613': cl100k_base,
'gpt-4-turbo': cl100k_base,
'gpt-4-turbo-2024-04-09': cl100k_base,
'gpt-4-turbo-preview': cl100k_base,
'gpt-4-1106-preview': cl100k_base,
'gpt-4-0125-preview': cl100k_base,
'gpt-4-vision-preview': cl100k_base,
'gpt-4o': o200k_base,
'gpt-4o-2024-05-13': o200k_base,
'gpt-4o-2024-08-06': o200k_base,
'gpt-4o-mini-2024-07-18': o200k_base,
'gpt-4o-mini': o200k_base,
'gpt-3.5-turbo': cl100k_base,
'gpt-3.5-turbo-0301': cl100k_base,
'gpt-3.5-turbo-0613': cl100k_base,
'gpt-3.5-turbo-1106': cl100k_base,
'gpt-3.5-turbo-0125': cl100k_base,
'gpt-3.5-turbo-16k': cl100k_base,
'gpt-3.5-turbo-16k-0613': cl100k_base,
'gpt-4o': o200k_base,
'gpt-3.5-turbo-instruct': cl100k_base,
'gpt-3.5-turbo-instruct-0914': cl100k_base,
};
export const modelToEncodingMap = {
// chat
...chatEnabledModelsMap,
// text

@@ -50,2 +70,4 @@ 'text-davinci-003': p50k_base,

'text-embedding-ada-002': cl100k_base,
'text-embedding-3-small': cl100k_base,
'text-embedding-3-large': cl100k_base,
// old embeddings

@@ -63,41 +85,16 @@ 'text-similarity-davinci-001': r50k_base,

};
const internalChatModelParams = {
'gpt-3.5-turbo': {
messageSeparator: '\n',
roleSeparator: '\n',
},
'gpt-3.5-turbo-0301': {
messageSeparator: '\n',
roleSeparator: '\n',
},
'gpt-3.5-turbo-0613': {
messageSeparator: '\n',
roleSeparator: '\n',
},
'gpt-3.5-turbo-16k-0613': {
messageSeparator: '\n',
roleSeparator: '\n',
},
'gpt-4': {
messageSeparator: '',
roleSeparator: ImSep,
},
'gpt-4-0314': {
messageSeparator: '',
roleSeparator: ImSep,
},
'gpt-4-32k': {
messageSeparator: '',
roleSeparator: ImSep,
},
'gpt-4-32k-0314': {
messageSeparator: '',
roleSeparator: ImSep,
},
'gpt-4o': {
messageSeparator: '',
roleSeparator: ImSep,
},
const gpt3params = {
messageSeparator: '\n',
roleSeparator: '\n',
};
export const chatModelParams = internalChatModelParams;
const gpt4params = {
messageSeparator: '',
roleSeparator: ImSep,
};
export const chatModelParams = Object.fromEntries(Object.keys(chatEnabledModelsMap).flatMap((modelName) => modelName.startsWith('gpt-4')
? [[modelName, gpt4params]]
: modelName.startsWith('gpt-3.5-turbo')
? [[modelName, gpt3params]]
: []));
export const chatEnabledModels = Object.keys(chatEnabledModelsMap);
//# sourceMappingURL=mapping.js.map
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/cl100k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-0301', () => convertTokenBytePairEncodingFromTuples(encoder));
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-0301', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -8,0 +8,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/cl100k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-0613', () => convertTokenBytePairEncodingFromTuples(encoder));
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-0613', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -8,0 +8,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/cl100k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-16k-0613', () => convertTokenBytePairEncodingFromTuples(encoder));
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-16k-0613', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -8,0 +8,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/cl100k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo', () => convertTokenBytePairEncodingFromTuples(encoder));
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -8,0 +8,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/cl100k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApiForModel('gpt-4-0314', () => convertTokenBytePairEncodingFromTuples(encoder));
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-4-0314', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -8,0 +8,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/cl100k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApiForModel('gpt-4-32k-0314', () => convertTokenBytePairEncodingFromTuples(encoder));
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-4-32k-0314', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -8,0 +8,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/cl100k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApiForModel('gpt-4-32k', () => convertTokenBytePairEncodingFromTuples(encoder));
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-4-32k', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -8,0 +8,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/cl100k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApiForModel('gpt-4', () => convertTokenBytePairEncodingFromTuples(encoder));
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-4', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -8,0 +8,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };

import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
declare const api: GptEncoding;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial, }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "gpt-4" | "gpt-4-32k" | "gpt-4-0314" | "gpt-4-32k-0314" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-16k-0613" | "gpt-4o" | "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | undefined) => Generator<number[], void, undefined>;
declare const decode: (inputTokensToDecode: Iterable<number>) => string, decodeAsyncGenerator: (inputTokensToDecode: AsyncIterable<number>) => AsyncGenerator<string, void>, decodeGenerator: (inputTokensToDecode: Iterable<number>) => Generator<string, void>, encode: (lineToEncode: string, encodeOptions?: import("../GptEncoding.js").EncodeOptions) => number[], encodeGenerator: (lineToEncode: string, { allowedSpecial, disallowedSpecial }?: import("../GptEncoding.js").EncodeOptions) => Generator<number[], number, undefined>, isWithinTokenLimit: (input: string | Iterable<import("../GptEncoding.js").ChatMessage>, tokenLimit: number) => false | number, encodeChat: (chat: readonly import("../GptEncoding.js").ChatMessage[], model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => number[], encodeChatGenerator: (chat: Iterable<import("../GptEncoding.js").ChatMessage>, model?: "text-davinci-003" | "text-davinci-002" | "text-davinci-001" | "text-curie-001" | "text-babbage-001" | "text-ada-001" | "davinci" | "curie" | "babbage" | "ada" | "code-davinci-002" | "code-davinci-001" | "code-cushman-002" | "code-cushman-001" | "davinci-codex" | "cushman-codex" | "text-davinci-edit-001" | "code-davinci-edit-001" | "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large" | "text-similarity-davinci-001" | "text-similarity-curie-001" | "text-similarity-babbage-001" | "text-similarity-ada-001" | "text-search-davinci-doc-001" | "text-search-curie-doc-001" | "text-search-babbage-doc-001" | "text-search-ada-doc-001" | "code-search-babbage-code-001" | "code-search-ada-code-001" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-0125-preview" | "gpt-4-vision-preview" | "gpt-4o" | "gpt-4o-2024-05-13" | "gpt-4o-2024-08-06" | "gpt-4o-mini-2024-07-18" | "gpt-4o-mini" | "gpt-3.5-turbo" | "gpt-3.5-turbo-0301" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-16k-0613" | "gpt-3.5-turbo-instruct" | "gpt-3.5-turbo-instruct-0914" | undefined) => Generator<number[], void, undefined>;
export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };
export default api;
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js';
import encoder from '../encodings/o200k_base.js';
import { GptEncoding } from '../GptEncoding.js';
export * from '../specialTokens.js';
const api = GptEncoding.getEncodingApiForModel('gpt-4o', () => convertTokenBytePairEncodingFromTuples(encoder));
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-4o', () => encoder);
const { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeGenerator, isWithinTokenLimit, encodeChat, encodeChatGenerator, } = api;

@@ -8,0 +8,0 @@ export { decode, decodeAsyncGenerator, decodeGenerator, encode, encodeChat, encodeChatGenerator, encodeGenerator, isWithinTokenLimit, };

@@ -1,4 +0,4 @@

import { EncoderMap } from './EncoderMap.js';
import type { BytePairEncodingConfig, RawBytePairRanks } from './BytePairEncodingCore.js';
import type { EncodingName, ModelName } from './mapping.js';
export interface EncodingParams {
export interface EncodingParams extends BytePairEncodingConfig {
/**

@@ -16,9 +16,11 @@ * The expected total number of tokens in the vocabulary, including both regular and special tokens.

tokenSplitRegex: RegExp;
mergeableBytePairRanks: EncoderMap;
specialTokenMapping: Map<string, number>;
modelName?: ModelName;
/** increases memory consumption, but speeds up subsequent decoding */
enableCache?: boolean;
}
export type GetMergeableRanksFn = (encodingName: EncodingName) => EncoderMap;
export type GetMergeableRanksAsyncFn = (encodingName: EncodingName) => Promise<EncoderMap>;
export declare const tokenSplitRegex: RegExp;
export type GetMergeableRanksFn = (encodingName: EncodingName) => RawBytePairRanks;
export type GetMergeableRanksAsyncFn = (encodingName: EncodingName) => Promise<RawBytePairRanks>;
export declare function getEncodingParams(encodingName: EncodingName, getMergeableRanks: GetMergeableRanksFn): EncodingParams;
export declare function getModelParamsAsync(encodingName: EncodingName, getMergeableRanks: GetMergeableRanksAsyncFn): Promise<EncodingParams>;

@@ -1,68 +0,7 @@

/* eslint-disable no-magic-numbers */
import { EncoderMap } from './EncoderMap.js';
import { EndOfPrompt, EndOfText, FimMiddle, FimPrefix, FimSuffix, ImEnd, ImSep, ImStart, } from './specialTokens.js';
const tokenSplitRegex = /'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu;
function R50KBase(mergeableBytePairRanks) {
return {
expectedVocabularySize: 50_257,
tokenSplitRegex,
mergeableBytePairRanks,
specialTokenMapping: new Map([[EndOfText, 50_256]]),
};
}
function P50KBase(mergeableBytePairRanks) {
return {
expectedVocabularySize: 50_281,
tokenSplitRegex,
mergeableBytePairRanks,
specialTokenMapping: new Map([[EndOfText, 50_256]]),
};
}
function P50KEdit(mergeableBytePairRanks) {
const specialTokenMapping = new Map([
[EndOfText, 50_256],
[FimPrefix, 50_281],
[FimMiddle, 50_282],
[FimSuffix, 50_283],
]);
return {
tokenSplitRegex,
mergeableBytePairRanks,
specialTokenMapping,
};
}
function Cl100KBase(mergeableBytePairRanks) {
const specialTokenMapping = new Map([
[EndOfText, 100_257],
[FimPrefix, 100_258],
[FimMiddle, 100_259],
[FimSuffix, 100_260],
[ImStart, 100_264],
[ImEnd, 100_265],
[ImSep, 100_266],
[EndOfPrompt, 100_276],
]);
return {
tokenSplitRegex: /(?:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+/giu,
mergeableBytePairRanks,
specialTokenMapping,
};
}
function O200KBase(mergeableBytePairRanks) {
const specialTokenMapping = new Map([
[EndOfText, 199_999],
[FimPrefix, 200_000],
[FimMiddle, 200_001],
[FimSuffix, 200_002],
[ImStart, 200_003],
[ImEnd, 200_004],
[ImSep, 200_005],
[EndOfPrompt, 200_006],
]);
return {
tokenSplitRegex: /(?:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+/giu,
mergeableBytePairRanks,
specialTokenMapping,
};
}
import { Cl100KBase } from './encodingParams/Cl100KBase.js';
import { O200KBase } from './encodingParams/O200KBase.js';
import { P50KBase } from './encodingParams/P50KBase.js';
import { P50KEdit } from './encodingParams/P50KEdit.js';
import { R50KBase } from './encodingParams/R50KBase.js';
export const tokenSplitRegex = /'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu;
export function getEncodingParams(encodingName, getMergeableRanks) {

@@ -69,0 +8,0 @@ const mergeableBytePairRanks = getMergeableRanks(encodingName);

@@ -1,3 +0,3 @@

import type { EncoderMap } from './EncoderMap.js';
import type { RawBytePairRanks } from './BytePairEncodingCore.js';
import type { EncodingName } from './mapping.js';
export declare const resolveEncoding: (encoding: EncodingName) => EncoderMap;
export declare const resolveEncoding: (encoding: EncodingName) => RawBytePairRanks;

@@ -1,3 +0,1 @@

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from './convertTokenBytePairEncodingFromTuples.js';
import cl100k from './encodings/cl100k_base.js';

@@ -10,10 +8,10 @@ import o200k from './encodings/o200k_base.js';

case 'r50k_base':
return convertTokenBytePairEncodingFromTuples(r50k);
return r50k;
case 'p50k_base':
case 'p50k_edit':
return convertTokenBytePairEncodingFromTuples(p50k);
return p50k;
case 'cl100k_base':
return convertTokenBytePairEncodingFromTuples(cl100k);
return cl100k;
case 'o200k_base':
return convertTokenBytePairEncodingFromTuples(o200k);
return o200k;
default: {

@@ -20,0 +18,0 @@ throw new Error(`Unknown encoding name: ${encoding}`);

@@ -1,3 +0,3 @@

import type { EncoderMap } from './EncoderMap.js';
import type { RawBytePairRanks } from './BytePairEncodingCore.js';
import type { EncodingName } from './mapping.js';
export declare const resolveEncodingAsync: (encoding: EncodingName) => Promise<EncoderMap>;
export declare const resolveEncodingAsync: (encoding: EncodingName) => Promise<RawBytePairRanks>;

@@ -1,14 +0,12 @@

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from './convertTokenBytePairEncodingFromTuples.js';
export const resolveEncodingAsync = async (encoding) => {
switch (encoding) {
case 'r50k_base':
return convertTokenBytePairEncodingFromTuples(await import('./encodings/r50k_base.js').then(({ default: encodingTuples }) => encodingTuples));
return import('./encodings/r50k_base.js').then(({ default: rawBytePairRanks }) => rawBytePairRanks);
case 'p50k_base':
case 'p50k_edit':
return convertTokenBytePairEncodingFromTuples(await import('./encodings/p50k_base.js').then(({ default: encodingTuples }) => encodingTuples));
return import('./encodings/p50k_base.js').then(({ default: rawBytePairRanks }) => rawBytePairRanks);
case 'cl100k_base':
return convertTokenBytePairEncodingFromTuples(await import('./encodings/cl100k_base.js').then(({ default: encodingTuples }) => encodingTuples));
return import('./encodings/cl100k_base.js').then(({ default: rawBytePairRanks }) => rawBytePairRanks);
case 'o200k_base':
return convertTokenBytePairEncodingFromTuples(await import('./encodings/o200k_base.js').then(({ default: encodingTuples }) => encodingTuples));
return import('./encodings/o200k_base.js').then(({ default: rawBytePairRanks }) => rawBytePairRanks);
default: {

@@ -15,0 +13,0 @@ throw new Error(`Unknown encoding name: ${encoding}`);

@@ -0,1 +1,4 @@

export declare const isAscii: (codePoint: number) => boolean;
export declare function endsWithIncompleteUtfPairSurrogate(string: string): boolean;
export declare function tryConvertToString(arr: Uint8Array): string | undefined;
export declare function compareUint8Arrays(a: Uint8Array, b: Uint8Array): number;

@@ -0,1 +1,4 @@

/* eslint-disable no-bitwise */
/* eslint-disable no-magic-numbers */
export const isAscii = (codePoint) => codePoint <= 0x7f;
const HIGH_SURROGATE_START = 55_296;

@@ -11,2 +14,82 @@ const HIGH_SURROGATE_END = 56_319;

}
function isValidUTF8(bytes) {
let i = 0;
while (i < bytes.length) {
const byte1 = bytes[i];
let numBytes = 0;
let codePoint = 0;
// Determine the number of bytes in the current UTF-8 character
if (byte1 <= 0x7f) {
// 1-byte character (ASCII)
numBytes = 1;
codePoint = byte1;
}
else if ((byte1 & 0xe0) === 0xc0) {
// 2-byte character
numBytes = 2;
codePoint = byte1 & 0x1f;
if (byte1 <= 0xc1)
return false; // Overlong encoding not allowed
}
else if ((byte1 & 0xf0) === 0xe0) {
// 3-byte character
numBytes = 3;
codePoint = byte1 & 0x0f;
}
else if ((byte1 & 0xf8) === 0xf0) {
// 4-byte character
numBytes = 4;
codePoint = byte1 & 0x07;
if (byte1 > 0xf4)
return false; // Code points above U+10FFFF not allowed
}
else {
// Invalid first byte of UTF-8 character
return false;
}
// Ensure there are enough continuation bytes
if (i + numBytes > bytes.length)
return false;
// Process the continuation bytes
for (let j = 1; j < numBytes; j++) {
const byte = bytes[i + j];
if (byte === undefined || (byte & 0xc0) !== 0x80)
return false; // Continuation bytes must start with '10'
codePoint = (codePoint << 6) | (byte & 0x3f);
}
// Check for overlong encodings
if (numBytes === 2 && codePoint < 0x80)
return false; // Overlong 2-byte sequence
if (numBytes === 3 && codePoint < 2_048)
return false; // Overlong 3-byte sequence
if (numBytes === 4 && codePoint < 65_536)
return false; // Overlong 4-byte sequence
// Check for surrogate halves (U+D800 to U+DFFF)
if (codePoint >= 55_296 && codePoint <= 57_343)
return false;
// Check for code points above U+10FFFF
if (codePoint > 1_114_111)
return false;
// Move to the next character
i += numBytes;
}
return true;
}
const textDecoder = new TextDecoder('utf8', { fatal: false });
export function tryConvertToString(arr) {
if (!isValidUTF8(arr)) {
return undefined;
}
return textDecoder.decode(arr);
}
// Helper function to compare two Uint8Arrays lexicographically
export function compareUint8Arrays(a, b) {
const len = Math.min(a.length, b.length);
for (let i = 0; i < len; i++) {
if (a[i] !== b[i]) {
return a[i] - b[i];
}
}
return a.length - b.length;
}
//# sourceMappingURL=utfUtil.js.map
export declare function getMaxValueFromMap(map: Map<unknown, number>): number;
export declare function escapeRegExp(string: string): string;
export declare function getSpecialTokenRegex(tokens: Set<string>): RegExp;

@@ -1,2 +0,1 @@

import { escapeRegExp } from './escapeRegExp.js';
export function getMaxValueFromMap(map) {

@@ -9,2 +8,5 @@ let max = 0;

}
export function escapeRegExp(string) {
return string.replace(/[$()*+.?[\\\]^{|}]/g, '\\$&'); // $& means the whole matched string
}
export function getSpecialTokenRegex(tokens) {

@@ -11,0 +13,0 @@ const escapedTokens = [...tokens].map(escapeRegExp);

{
"name": "gpt-tokenizer",
"version": "2.2.3",
"version": "2.3.0",
"description": "A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models",

@@ -80,3 +80,3 @@ "keywords": [

"build:cjs": "yarn rrun tsc --outDir cjs --module commonjs --target es2022 --project tsconfig-cjs.json",
"build:esm": "yarn rrun tsc --outDir esm --module esnext --target es2022 && echo '{\"name\": \"gpt-tokenizer\", \"type\": \"module\"}' > ./esm/package.json",
"build:esm": "mkdir -p esm && echo '{\"name\": \"gpt-tokenizer\", \"type\": \"module\"}' > ./esm/package.json && yarn rrun tsc --outDir esm --module esnext --target es2022",
"build:umd": "yarn build:umd:cl100k_base && yarn build:umd:p50k_base && yarn build:umd:p50k_edit && yarn build:umd:r50k_base && yarn build:umd:o200k_base",

@@ -130,6 +130,3 @@ "build:umd:cl100k_base": "beemo webpack --entry='./src/main.ts' --env 'outDir=dist' --env 'moduleTarget=umd' --env 'engineTarget=web' --env 'codeTarget=es2022' --env 'name=GPTTokenizer_cl100k_base' --env 'filename=cl100k_base.js'",

"access": "public"
},
"dependencies": {
"rfc4648": "^1.5.3"
}
}

@@ -15,2 +15,3 @@ # gpt-tokenizer

- Support for all current OpenAI models (available encodings: `r50k_base`, `p50k_base`, `p50k_edit`, `cl100k_base` and `o200k_base`)
- Can be loaded and work synchronously! (i.e. in non async/await contexts)
- Generator function versions of both the decoder and encoder functions

@@ -49,7 +50,7 @@ - Provides the ability to decode an asynchronous stream of data (using `decodeAsyncGenerator` and `decodeGenerator` with any iterable input)

- https://unpkg.com/gpt-tokenizer/dist/cl100k_base.js
- https://unpkg.com/gpt-tokenizer/dist/o200k_base.js (for `gpt-4o`)
- https://unpkg.com/gpt-tokenizer/dist/cl100k_base.js (for `gpt-4-*` and `gpt-3.5-turbo`)
- https://unpkg.com/gpt-tokenizer/dist/p50k_base.js
- https://unpkg.com/gpt-tokenizer/dist/p50k_edit.js
- https://unpkg.com/gpt-tokenizer/dist/r50k_base.js
- https://unpkg.com/gpt-tokenizer/dist/o200k_base.js

@@ -136,3 +137,4 @@ The global name is a concatenation: `GPTTokenizer_${encoding}`.

isWithinTokenLimit,
} from 'gpt-tokenizer/model/text-davinci-003'
// etc...
} from 'gpt-tokenizer/model/gpt-3.5-turbo'
```

@@ -147,62 +149,44 @@

isWithinTokenLimit,
} from 'gpt-tokenizer/cjs/model/text-davinci-003'
// etc...
} from 'gpt-tokenizer/cjs/model/gpt-3.5-turbo'
```
### Supported models and their encodings
#### Lazy loading
chat:
If you don't mind loading the tokenizer asynchronously, you can use a dynamic import inside your function, like so:
- `gpt-4-32k` (`cl100k_base`)
- `gpt-4-0314` (`cl100k_base`)
- `gpt-4-32k-0314` (`cl100k_base`)
- `gpt-3.5-turbo` (`cl100k_base`)
- `gpt-3.5-turbo-0301` (`cl100k_base`)
- `gpt-4o` (`o200k_base`)
```ts
const {
encode,
decode,
isWithinTokenLimit,
// etc...
} = await import('gpt-tokenizer/model/gpt-3.5-turbo')
```
note: if you're using `gpt-3.5-*` or `gpt-4-*` and don't see the model you're looking for, use the `cl100k_base` encoding directly.
#### Loading an encoding
text-only:
If your model isn't supported by the package, but you know which BPE encoding it uses, you can load the encoding directly, e.g.:
```ts
import {
encode,
decode,
isWithinTokenLimit,
// etc...
} from 'gpt-tokenizer/encoding/cl100k_base'
```
### Supported models and their encodings
- `gpt-4o` (`o200k_base`)
- `gpt-4-*` (`cl100k_base`)
- `gpt-3.5-turbo` (`cl100k_base`)
- `text-davinci-003` (`p50k_base`)
- `text-davinci-002` (`p50k_base`)
- `text-davinci-001` (`r50k_base`)
- `text-curie-001` (`r50k_base`)
- `text-babbage-001` (`r50k_base`)
- `text-ada-001` (`r50k_base`)
- `davinci` (`r50k_base`)
- `curie` (`r50k_base`)
- `babbage` (`r50k_base`)
- `ada` (`r50k_base`)
- ...and many other models, see [mapping](./src/mapping.ts) for an up-to-date list of supported models and their encodings.
code:
Note: if you're using `gpt-3.5-*` or `gpt-4-*` and don't see the model you're looking for, use the `cl100k_base` encoding directly.
- `code-davinci-002` (`p50k_base`)
- `code-davinci-001` (`p50k_base`)
- `code-cushman-002` (`p50k_base`)
- `code-cushman-001` (`p50k_base`)
- `davinci-codex` (`p50k_base`)
- `cushman-codex` (`p50k_base`)
edit:
- `text-davinci-edit-001` (`p50k_edit`)
- `code-davinci-edit-001` (`p50k_edit`)
embeddings:
- `text-embedding-ada-002` (`cl100k_base`)
old embeddings:
- `text-similarity-davinci-001` (`r50k_base`)
- `text-similarity-curie-001` (`r50k_base`)
- `text-similarity-babbage-001` (`r50k_base`)
- `text-similarity-ada-001` (`r50k_base`)
- `text-search-davinci-doc-001` (`r50k_base`)
- `text-search-curie-doc-001` (`r50k_base`)
- `text-search-babbage-doc-001` (`r50k_base`)
- `text-search-ada-doc-001` (`r50k_base`)
- `code-search-babbage-code-001` (`r50k_base`)
- `code-search-ada-code-001` (`r50k_base`)
## API

@@ -268,2 +252,4 @@

Note that if you encode an empty chat, it will still contain the minimum number of special tokens.
### `encodeGenerator(text: string): Generator<number[], void, undefined>`

@@ -361,6 +347,6 @@

```ts
import { encode } from 'gpt-tokenizer'
import { encode, EndOfText } from 'gpt-tokenizer'
const inputText = `Some Text`
const disallowedSpecial = new Set(['Some'])
const inputText = `Some Text ${EndOfText}`
const disallowedSpecial = new Set([EndOfText])
// throws an error:

@@ -367,0 +353,0 @@ const encoded = encode(inputText, undefined, disallowedSpecial)

@@ -1,36 +0,54 @@

import { EncoderMap } from './EncoderMap.js'
import { escapeRegExp } from './escapeRegExp.js'
/* eslint-disable no-continue */
export class BytePairEncodingCore {
encoder: EncoderMap
decoder: Map<number, Uint8Array>
import { compareUint8Arrays, isAscii, tryConvertToString } from './utfUtil.js'
import { escapeRegExp } from './util.js'
export type RawBytePairRanks = readonly (string | readonly number[])[]
export interface BytePairEncodingConfig {
mergeableBytePairRanks: RawBytePairRanks
specialTokenMapping?: Map<string, number>
tokenSplitRegex: RegExp
specialTokensEncoder: Map<string, number>
specialTokensDecoder: Map<number, Uint8Array>
specialTokenPatternRegex: RegExp
}
textEncoder = new TextEncoder()
export class BytePairEncodingCore {
readonly bytePairEncoderSize: number
private bytePairEncoder: RawBytePairRanks
private bytePairEncoderSortedLookup: readonly [Uint8Array, number][]
private bytePairRanksDecoder = new Map<number, Uint8Array>()
private tokenSplitRegex: RegExp
private specialTokensEncoder: Map<string, number>
private specialTokensDecoder: Map<number, string>
private specialTokenPatternRegex: RegExp
private stringDecoder: Map<string, number>
private textEncoder = new TextEncoder()
constructor({
bytePairEncoder,
specialTokenEncoder,
mergeableBytePairRanks: bytePairEncoder,
specialTokenMapping: specialTokenEncoder,
tokenSplitRegex,
}: {
bytePairEncoder: EncoderMap
specialTokenEncoder?: Map<string, number>
tokenSplitRegex: RegExp
}) {
this.encoder = bytePairEncoder ?? new EncoderMap()
this.decoder = bytePairEncoder
? new Map([...bytePairEncoder].map(([key, value]) => [value, key]))
: new Map<number, Uint8Array>()
}: BytePairEncodingConfig) {
this.bytePairEncoder = bytePairEncoder
this.stringDecoder = new Map<string, number>()
// size without array holes (which may be present in the encoder)
this.bytePairEncoderSize = Object.keys(bytePairEncoder).length
const binaryLookup: [Uint8Array, number][] = []
// forEach skips array holes:
bytePairEncoder.forEach((value, rank) => {
if (typeof value === 'string') {
this.stringDecoder.set(value, rank)
return
}
const byteArray = new Uint8Array(value)
binaryLookup.push([byteArray, rank])
this.bytePairRanksDecoder.set(rank, byteArray)
})
this.bytePairEncoderSortedLookup = binaryLookup.sort((a, b) =>
compareUint8Arrays(a[0], b[0]),
)
this.specialTokensEncoder = specialTokenEncoder ?? new Map<string, number>()
this.specialTokensDecoder = specialTokenEncoder
? new Map(
[...specialTokenEncoder].map(([key, value]) => [
value,
this.textEncoder.encode(key),
]),
)
: new Map<number, Uint8Array>()
? new Map([...specialTokenEncoder].map(([key, value]) => [value, key]))
: new Map<number, string>()
this.tokenSplitRegex = tokenSplitRegex

@@ -47,5 +65,74 @@

getBpeRankFromString(key: string): number | undefined {
return this.stringDecoder.get(key)
}
getBpeRankFromStringOrThrow(key: string): number {
const value = this.getBpeRankFromString(key)
if (value === undefined) {
throw new Error(
`The byte-pair encoding does not contain a value for: ${key}`,
)
}
return value
}
getBpeRankFromBytes(key: Uint8Array): number | undefined {
const keyAsString = tryConvertToString(key)
if (keyAsString !== undefined) {
return this.getBpeRankFromString(keyAsString)
}
// Perform binary search on the binary keys
const index = this.binarySearch(key)
if (index !== -1) {
return this.bytePairEncoderSortedLookup[index]![1]
}
return undefined
}
getBpeRankFromBytesOrThrow(key: Uint8Array): number {
const value = this.getBpeRankFromBytes(key)
if (value === undefined) {
throw new Error(
`The byte-pair encoding does not contain a value for: ${key.toString()}`,
)
}
return value
}
// Binary search on the binary keys
binarySearch(key: Uint8Array): number {
let low = 0
let high = this.bytePairEncoderSortedLookup.length - 1
while (low <= high) {
// eslint-disable-next-line no-bitwise
const mid = (low + high) >>> 1
const midKey = this.bytePairEncoderSortedLookup[mid]![0]
let cmp = 0
for (let i = 0; i < Math.min(midKey.length, key.length); i++) {
cmp = midKey[i]! - key[i]!
if (cmp !== 0) break
}
if (cmp === 0) {
cmp = midKey.length - key.length
}
if (cmp === 0) {
return mid
}
if (cmp < 0) {
low = mid + 1
} else {
high = mid - 1
}
}
return -1
}
*encodeNative(
text: string,
allowedSpecial: Set<string>,
allowedSpecial?: Set<string>,
): Generator<number[], number, undefined> {

@@ -71,12 +158,11 @@ let startIndex = 0

for (const [match] of textSegment.matchAll(this.tokenSplitRegex)) {
const encodedPiece = this.textEncoder.encode(match)
const token = this.encoder.get(encodedPiece)
const token = this.getBpeRankFromString(match)
if (token !== undefined) {
lastTokenLength = 1
yield [token]
// eslint-disable-next-line no-continue
continue
}
const tokens = this.bytePairEncode(encodedPiece, this.encoder)
const tokens = this.bytePairEncode(match)
lastTokenLength = tokens.length

@@ -107,3 +193,3 @@ yield tokens

text: string,
allowedSpecial: Set<string>,
allowedSpecial: Set<string> | undefined,
startIndex: number,

@@ -126,3 +212,3 @@ specialRegex: RegExp,

if (allowedSpecial.has(specialToken)) {
if (allowedSpecial?.has(specialToken)) {
return nextSpecialMatch.index + searchIndex

@@ -135,3 +221,5 @@ }

*decodeNative(tokens: Iterable<number>): Generator<Uint8Array> {
*decodeNative(
tokens: Iterable<number>,
): Generator<Uint8Array | string, void, void> {
for (const token of tokens) {

@@ -147,7 +235,7 @@ const tokenBytes = this.tryDecodeToken(token)

tokens: AsyncIterable<number>,
): AsyncGenerator<Uint8Array> {
): AsyncGenerator<Uint8Array | string> {
for await (const token of tokens) {
const tokenBytes = this.tryDecodeToken(token)
if (tokenBytes) {
yield tokenBytes
const tokenBytesOrString = this.tryDecodeToken(token)
if (tokenBytesOrString) {
yield tokenBytesOrString
}

@@ -157,14 +245,26 @@ }

tryDecodeToken(token: number): Uint8Array | undefined {
return this.decoder.get(token) ?? this.specialTokensDecoder.get(token)
tryDecodeToken(tokenRank: number): Uint8Array | string | undefined {
const value = this.bytePairEncoder[tokenRank]
if (typeof value === 'string') {
return value
}
if (typeof value === 'object') {
const fromBinary = this.bytePairRanksDecoder.get(tokenRank)
if (fromBinary) {
return fromBinary
}
}
return this.specialTokensDecoder.get(tokenRank)
}
bytePairEncode(inputBytes: Uint8Array, bytePairRanks: EncoderMap): number[] {
if (inputBytes.length === 1) {
return [bytePairRanks.getOrThrow(inputBytes)]
bytePairEncode(input: string): number[] {
if (input.length === 1 && isAscii(input.codePointAt(0)!)) {
return [this.getBpeRankFromStringOrThrow(input)]
}
return this.bytePairMerge(inputBytes, bytePairRanks, (pair) => {
const key = inputBytes.slice(pair.start, pair.end)
return bytePairRanks.getOrThrow(key)
const inputBytes = this.textEncoder.encode(input)
return this.bytePairMerge(inputBytes, (start, end) => {
const key = inputBytes.subarray(start, end)
return this.getBpeRankFromBytesOrThrow(key)
})

@@ -174,26 +274,39 @@ }

bytePairMerge(
// Input array of bytes to process
piece: Uint8Array,
bytePairRanks: EncoderMap,
transform: (pair: { start: number; end: number }) => number,
// Function to apply to each final segment after merging
getByteForRange: (start: number, end: number) => number,
): number[] {
// Create an array of partition objects. Each partition tracks the start index in 'piece'
// and a rank value for adjacent pairs (initially set to positive infinity).
const partitions = Array.from({ length: piece.length + 1 }, (_, i) => ({
start: i,
rank: Number.POSITIVE_INFINITY,
rank: Number.POSITIVE_INFINITY, // Rank starts at infinity (unmerged)
}))
// Helper function to get the rank of a byte pair starting at 'startIndex'.
// 'skip' determines how far we look ahead (usually 0, for consecutive pairs).
const getRank = (startIndex: number, skip: number): number | undefined => {
if (startIndex + skip + 2 >= partitions.length) {
// Avoid out-of-bounds errors, return undefined when no valid pair exists
return undefined
}
const key = piece.slice(
// Get the byte pair by extracting a subarray starting at 'startIndex' and ending at
// the start of the partition after 'skip + 2'.
const key = piece.subarray(
partitions[startIndex]!.start,
partitions[startIndex + skip + 2]!.start,
)
return bytePairRanks.get(key)
// Retrieve the rank of this byte pair from the BPE rank function
return this.getBpeRankFromBytes(key)
}
// Initialize the ranks for all adjacent pairs in the array
for (let i = 0; i < partitions.length - 2; i++) {
// Get the rank for the pair starting at index 'i'
const rank = getRank(i, 0)
if (rank !== undefined) {
// Assign the rank to the partition at index 'i'
partitions[i]!.rank = rank

@@ -203,2 +316,3 @@ }

// Iteratively merge byte pairs until no more useful merges can be done
while (partitions.length > 1) {

@@ -208,2 +322,3 @@ let minRank = Number.POSITIVE_INFINITY

// Find the partition with the minimum rank, i.e., the most important pair to merge next
let i = 0

@@ -218,2 +333,3 @@ for (const partition of partitions) {

// If no valid pair is left to merge, exit the loop
if (minRank === Number.POSITIVE_INFINITY) {

@@ -223,5 +339,7 @@ break

// Update the rank of the partition after the merged one
partitions[minRankIdx]!.rank =
getRank(minRankIdx, 1) ?? Number.POSITIVE_INFINITY
// Update the rank of the partition before the merged one (if exists)
if (minRankIdx > 0) {

@@ -232,12 +350,16 @@ partitions[minRankIdx - 1]!.rank =

// Merge by removing the partition after the one we just merged
partitions.splice(minRankIdx + 1, 1)
}
// Create the final output by applying the transform function to each partitioned range
const output: number[] = []
for (let i = 0; i < partitions.length - 1; i++) {
output.push(
transform({
start: partitions[i]!.start,
end: partitions[i + 1]!.start,
}),
getByteForRange(
// start index
partitions[i]!.start,
// end index
partitions[i + 1]!.start,
),
)

@@ -244,0 +366,0 @@ }

@@ -27,2 +27,3 @@ import * as fs from 'fs/promises'

)
.replace('\nconst api =', '// prettier-ignore\nconst api =')
.replaceAll(`cl100k_base.js`, `${encoding}.js`)

@@ -29,0 +30,0 @@ : `// eslint-disable-next-line no-restricted-exports, import/no-default-export\nexport { default } from '../encoding/${encoding}.js'\nexport * from '../encoding/${encoding}.js'\n`

@@ -0,1 +1,2 @@

/* eslint-disable no-console */
import * as fs from 'fs/promises'

@@ -6,2 +7,3 @@ import * as path from 'path'

type CallbackFunction = (filename: string) => Promise<void> | void
const DEBUG = process.env.DEBUG === 'true'

@@ -23,3 +25,2 @@ const processFilesInDirectory = async (

} catch (error) {
// eslint-disable-next-line no-console
console.error('An error occurred:', error)

@@ -31,3 +32,20 @@ }

const __dirname = path.dirname(fileURLToPath(import.meta.url))
const textDecoder = new TextDecoder('utf8', { fatal: true })
const textEncoder = new TextEncoder()
function safeDecodeUtf8(bytes: Buffer): string | undefined {
try {
const v = textDecoder.decode(bytes)
const encoded = textEncoder.encode(v)
if (encoded.byteLength !== bytes.byteLength) {
console.log('Mismatch:', new Uint8Array(bytes), encoded)
return undefined
}
return v
} catch {
return undefined
}
}
await processFilesInDirectory(

@@ -39,2 +57,3 @@ path.join(__dirname, '../../data'),

const modelName = path.basename(filePath, '.tiktoken')
console.log(`Processing ${modelName}`)
const bpeFile = await fs.readFile(filePath, 'utf8')

@@ -44,16 +63,38 @@ const lines = bpeFile.split('\n')

const [token, rank] = x.split(' ')
return [token, Number.parseInt(rank!, 10)]
if (!token || token.length === 0 || !rank || rank.length === 0) {
throw new Error(`Invalid token encoding: ${x}`)
}
const tokenArray = Buffer.from(token, 'base64')
return [tokenArray, Number.parseInt(rank, 10)] as const
})
const jsCodeBpeArray = encoder.reduce(
(acc, [token, rank]) => {
const decoded = safeDecodeUtf8(token) ?? token
return {
string: `${acc.string}${','.repeat(rank - acc.lastRank)}${
DEBUG ? `\n/** ${rank} = */` : ''
}${
typeof decoded === 'string'
? JSON.stringify(decoded)
: `[${token.join(',')}]`
}`,
lastRank: rank,
}
},
{ string: '', lastRank: 0 },
).string
const firstTokenRank = encoder[0]?.[1] ?? 0
await fs.mkdir(path.join(__dirname, '../encodings'), { recursive: true })
await fs.writeFile(
path.join(__dirname, `../encodings/${modelName}.js`),
`/* eslint-disable */\n// @ts-nocheck\n// prettier-ignore\n/** @type {[string, number][]} */\nconst encoder = ${JSON.stringify(
encoder,
)};\nexport default encoder;`,
`/* eslint-disable */\n// @ts-nocheck\n// prettier-ignore\n/** @type {(string | number[])[]} */\nconst encoder = [${','.repeat(
firstTokenRank,
)}${jsCodeBpeArray}];\nexport default encoder;`,
)
// eslint-disable-next-line no-console
console.log(`Wrote ${modelName}.js`)
},
)
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/cl100k_base.js'

@@ -8,5 +7,3 @@ import { GptEncoding } from '../GptEncoding.js'

const api = GptEncoding.getEncodingApi('cl100k_base', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
const api = GptEncoding.getEncodingApi('cl100k_base', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/o200k_base.js'

@@ -8,5 +7,3 @@ import { GptEncoding } from '../GptEncoding.js'

const api = GptEncoding.getEncodingApi('o200k_base', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
const api = GptEncoding.getEncodingApi('o200k_base', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/p50k_base.js'

@@ -8,5 +7,3 @@ import { GptEncoding } from '../GptEncoding.js'

const api = GptEncoding.getEncodingApi('p50k_base', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
const api = GptEncoding.getEncodingApi('p50k_base', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/p50k_base.js'

@@ -8,5 +7,3 @@ import { GptEncoding } from '../GptEncoding.js'

const api = GptEncoding.getEncodingApi('p50k_edit', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
const api = GptEncoding.getEncodingApi('p50k_edit', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/r50k_base.js'

@@ -8,5 +7,3 @@ import { GptEncoding } from '../GptEncoding.js'

const api = GptEncoding.getEncodingApi('r50k_base', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
const api = GptEncoding.getEncodingApi('r50k_base', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

@@ -221,3 +221,3 @@ import fs from 'fs'

? 127
: modelName === 'gpt-4o'
: modelName.startsWith('gpt-4o')
? 120

@@ -224,0 +224,0 @@ : 121

/* eslint-disable no-param-reassign */
import { BytePairEncodingCore } from './BytePairEncodingCore.js'
import {
type ChatModelName,
type ChatParameters,
type EncodingName,

@@ -53,9 +55,10 @@ type ModelName,

decoder = new TextDecoder('utf8')
modelName?: ModelName
private decoder = new TextDecoder('utf8')
private bytePairEncodingCoreProcessor: BytePairEncodingCore
private specialTokenMapping: Map<string, number>
private specialTokensSet: Set<string>
private allSpecialTokenRegex: RegExp
private constructor({
tokenSplitRegex,
mergeableBytePairRanks,

@@ -65,12 +68,22 @@ specialTokenMapping,

modelName,
...rest
}: EncodingParams) {
this.specialTokenMapping = specialTokenMapping
this.specialTokensSet = new Set<string>(this.specialTokenMapping.keys())
this.allSpecialTokenRegex = getSpecialTokenRegex(this.specialTokensSet)
this.bytePairEncodingCoreProcessor = new BytePairEncodingCore({
mergeableBytePairRanks,
specialTokenMapping,
...rest,
})
const maxTokenValue = Math.max(
getMaxValueFromMap(mergeableBytePairRanks),
mergeableBytePairRanks.length - 1,
getMaxValueFromMap(specialTokenMapping),
)
this.specialTokenMapping = specialTokenMapping
if (expectedVocabularySize !== undefined) {
if (
mergeableBytePairRanks.size + specialTokenMapping.size !==
this.bytePairEncodingCoreProcessor.bytePairEncoderSize +
specialTokenMapping.size !==
expectedVocabularySize

@@ -85,3 +98,5 @@ ) {

throw new Error(
'The maximum token value must be equal to explicit_n_vocab - 1.',
`The model encodings are invalid. The maximum token value must be equal to expectedVocabularySize - 1. Currently ${maxTokenValue}, expected ${
expectedVocabularySize - 1
}`,
)

@@ -91,8 +106,2 @@ }

this.bytePairEncodingCoreProcessor = new BytePairEncodingCore({
bytePairEncoder: mergeableBytePairRanks,
specialTokenEncoder: specialTokenMapping,
tokenSplitRegex,
})
this.encode = this.encode.bind(this)

@@ -151,21 +160,23 @@ this.decode = this.decode.bind(this)

lineToEncode: string,
{
allowedSpecial = new Set<string>(),
disallowedSpecial = new Set<string>([ALL_SPECIAL_TOKENS]),
}: EncodeOptions = {},
{ allowedSpecial, disallowedSpecial }: EncodeOptions = {},
): Generator<number[], number, undefined> {
const specialTokensSet = new Set<string>(this.specialTokenMapping.keys())
let regexPattern: RegExp | undefined
if (disallowedSpecial.has(ALL_SPECIAL_TOKENS)) {
disallowedSpecial = new Set<string>(specialTokensSet)
allowedSpecial.forEach((val) => disallowedSpecial.delete(val))
disallowedSpecial.forEach((val) => allowedSpecial.delete(val))
if (allowedSpecial?.has(ALL_SPECIAL_TOKENS)) {
allowedSpecial = new Set(this.specialTokensSet)
}
if (allowedSpecial.has(ALL_SPECIAL_TOKENS)) {
allowedSpecial = specialTokensSet
if (!disallowedSpecial || disallowedSpecial.has(ALL_SPECIAL_TOKENS)) {
// by default, all special tokens are disallowed
disallowedSpecial = new Set(this.specialTokensSet)
if (allowedSpecial?.size) {
allowedSpecial.forEach((val) => disallowedSpecial!.delete(val))
disallowedSpecial.forEach((val) => allowedSpecial.delete(val))
regexPattern = getSpecialTokenRegex(disallowedSpecial)
} else {
regexPattern = this.allSpecialTokenRegex
}
}
if (disallowedSpecial.size > 0) {
const regexPattern = getSpecialTokenRegex(disallowedSpecial)
if (regexPattern) {
const match = lineToEncode.match(regexPattern)

@@ -203,3 +214,5 @@ if (match !== null) {

}
const params = chatModelParams[model]
const params: ChatParameters | undefined =
chatModelParams[model as ChatModelName]
const chatStartToken = this.specialTokenMapping.get(ImStart)

@@ -287,3 +300,6 @@ const chatEndToken = this.specialTokenMapping.get(ImEnd)

for (const decodedPart of decodedByteGenerator) {
buffer += this.decoder.decode(decodedPart, { stream: true })
buffer +=
typeof decodedPart === 'string'
? decodedPart
: this.decoder.decode(decodedPart, { stream: true })

@@ -316,3 +332,6 @@ if (buffer.length === 0 || endsWithIncompleteUtfPairSurrogate(buffer)) {

for await (const decodedPart of decodedByteGenerator) {
buffer += this.decoder.decode(decodedPart, { stream: true })
buffer +=
typeof decodedPart === 'string'
? decodedPart
: this.decoder.decode(decodedPart, { stream: true })

@@ -319,0 +338,0 @@ if (buffer.length === 0 || endsWithIncompleteUtfPairSurrogate(buffer)) {

@@ -19,13 +19,34 @@ /* eslint-disable camelcase */

export const modelToEncodingMap = {
// chat
const chatEnabledModelsMap = {
'gpt-4': cl100k_base,
'gpt-4-0314': cl100k_base,
'gpt-4-0613': cl100k_base,
'gpt-4-32k': cl100k_base,
'gpt-4-0314': cl100k_base,
'gpt-4-32k-0314': cl100k_base,
'gpt-4-32k-0613': cl100k_base,
'gpt-4-turbo': cl100k_base,
'gpt-4-turbo-2024-04-09': cl100k_base,
'gpt-4-turbo-preview': cl100k_base,
'gpt-4-1106-preview': cl100k_base,
'gpt-4-0125-preview': cl100k_base,
'gpt-4-vision-preview': cl100k_base,
'gpt-4o': o200k_base,
'gpt-4o-2024-05-13': o200k_base,
'gpt-4o-2024-08-06': o200k_base,
'gpt-4o-mini-2024-07-18': o200k_base,
'gpt-4o-mini': o200k_base,
'gpt-3.5-turbo': cl100k_base,
'gpt-3.5-turbo-0301': cl100k_base,
'gpt-3.5-turbo-0613': cl100k_base,
'gpt-3.5-turbo-1106': cl100k_base,
'gpt-3.5-turbo-0125': cl100k_base,
'gpt-3.5-turbo-16k': cl100k_base,
'gpt-3.5-turbo-16k-0613': cl100k_base,
'gpt-4o': o200k_base,
'gpt-3.5-turbo-instruct': cl100k_base,
'gpt-3.5-turbo-instruct-0914': cl100k_base,
} as const
export const modelToEncodingMap = {
// chat
...chatEnabledModelsMap,
// text

@@ -54,2 +75,4 @@ 'text-davinci-003': p50k_base,

'text-embedding-ada-002': cl100k_base,
'text-embedding-3-small': cl100k_base,
'text-embedding-3-large': cl100k_base,
// old embeddings

@@ -73,45 +96,28 @@ 'text-similarity-davinci-001': r50k_base,

const internalChatModelParams = {
'gpt-3.5-turbo': {
messageSeparator: '\n',
roleSeparator: '\n',
},
'gpt-3.5-turbo-0301': {
messageSeparator: '\n',
roleSeparator: '\n',
},
'gpt-3.5-turbo-0613': {
messageSeparator: '\n',
roleSeparator: '\n',
},
'gpt-3.5-turbo-16k-0613': {
messageSeparator: '\n',
roleSeparator: '\n',
},
'gpt-4': {
messageSeparator: '',
roleSeparator: ImSep,
},
'gpt-4-0314': {
messageSeparator: '',
roleSeparator: ImSep,
},
'gpt-4-32k': {
messageSeparator: '',
roleSeparator: ImSep,
},
'gpt-4-32k-0314': {
messageSeparator: '',
roleSeparator: ImSep,
},
'gpt-4o': {
messageSeparator: '',
roleSeparator: ImSep,
},
const gpt3params = {
messageSeparator: '\n',
roleSeparator: '\n',
}
export const chatModelParams: Partial<Record<ModelName, ChatParameters>> =
internalChatModelParams
const gpt4params = {
messageSeparator: '',
roleSeparator: ImSep,
}
export type ModelName = keyof typeof modelToEncodingMap
export type ChatModelName = keyof typeof internalChatModelParams
export type ChatModelName = keyof typeof chatEnabledModelsMap
export type EncodingName = (typeof modelToEncodingMap)[ModelName]
export const chatModelParams = Object.fromEntries(
Object.keys(chatEnabledModelsMap).flatMap((modelName) =>
modelName.startsWith('gpt-4')
? ([[modelName, gpt4params] as const] as const)
: modelName.startsWith('gpt-3.5-turbo')
? ([[modelName, gpt3params] as const] as const)
: [],
),
) as Record<ChatModelName, ChatParameters>
export const chatEnabledModels = Object.keys(
chatEnabledModelsMap,
) as ChatModelName[]
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/cl100k_base.js'

@@ -7,6 +6,4 @@ import { GptEncoding } from '../GptEncoding.js'

export * from '../specialTokens.js'
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-0301', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-0301', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/cl100k_base.js'

@@ -7,6 +6,4 @@ import { GptEncoding } from '../GptEncoding.js'

export * from '../specialTokens.js'
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-0613', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-0613', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/cl100k_base.js'

@@ -7,6 +6,4 @@ import { GptEncoding } from '../GptEncoding.js'

export * from '../specialTokens.js'
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-16k-0613', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo-16k-0613', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/cl100k_base.js'

@@ -7,6 +6,4 @@ import { GptEncoding } from '../GptEncoding.js'

export * from '../specialTokens.js'
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-3.5-turbo', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/cl100k_base.js'

@@ -7,6 +6,4 @@ import { GptEncoding } from '../GptEncoding.js'

export * from '../specialTokens.js'
const api = GptEncoding.getEncodingApiForModel('gpt-4-0314', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-4-0314', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/cl100k_base.js'

@@ -7,6 +6,4 @@ import { GptEncoding } from '../GptEncoding.js'

export * from '../specialTokens.js'
const api = GptEncoding.getEncodingApiForModel('gpt-4-32k-0314', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-4-32k-0314', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/cl100k_base.js'

@@ -7,6 +6,4 @@ import { GptEncoding } from '../GptEncoding.js'

export * from '../specialTokens.js'
const api = GptEncoding.getEncodingApiForModel('gpt-4-32k', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-4-32k', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/cl100k_base.js'

@@ -7,6 +6,4 @@ import { GptEncoding } from '../GptEncoding.js'

export * from '../specialTokens.js'
const api = GptEncoding.getEncodingApiForModel('gpt-4', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-4', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from '../convertTokenBytePairEncodingFromTuples.js'
import encoder from '../encodings/o200k_base.js'

@@ -7,6 +6,4 @@ import { GptEncoding } from '../GptEncoding.js'

export * from '../specialTokens.js'
const api = GptEncoding.getEncodingApiForModel('gpt-4o', () =>
convertTokenBytePairEncodingFromTuples(encoder),
)
// prettier-ignore
const api = GptEncoding.getEncodingApiForModel('gpt-4o', () => encoder)
const {

@@ -13,0 +10,0 @@ decode,

@@ -1,16 +0,13 @@

/* eslint-disable no-magic-numbers */
import { EncoderMap } from './EncoderMap.js'
import type {
BytePairEncodingConfig,
RawBytePairRanks,
} from './BytePairEncodingCore.js'
import { Cl100KBase } from './encodingParams/Cl100KBase.js'
import { O200KBase } from './encodingParams/O200KBase.js'
import { P50KBase } from './encodingParams/P50KBase.js'
import { P50KEdit } from './encodingParams/P50KEdit.js'
import { R50KBase } from './encodingParams/R50KBase.js'
import type { EncodingName, ModelName } from './mapping.js'
import {
EndOfPrompt,
EndOfText,
FimMiddle,
FimPrefix,
FimSuffix,
ImEnd,
ImSep,
ImStart,
} from './specialTokens.js'
export interface EncodingParams {
export interface EncodingParams extends BytePairEncodingConfig {
/**

@@ -28,87 +25,18 @@ * The expected total number of tokens in the vocabulary, including both regular and special tokens.

tokenSplitRegex: RegExp
mergeableBytePairRanks: EncoderMap
specialTokenMapping: Map<string, number>
modelName?: ModelName
/** increases memory consumption, but speeds up subsequent decoding */
enableCache?: boolean
}
const tokenSplitRegex =
export const tokenSplitRegex =
/'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu
function R50KBase(mergeableBytePairRanks: EncoderMap): EncodingParams {
return {
expectedVocabularySize: 50_257,
tokenSplitRegex,
mergeableBytePairRanks,
specialTokenMapping: new Map<string, number>([[EndOfText, 50_256]]),
}
}
function P50KBase(mergeableBytePairRanks: EncoderMap): EncodingParams {
return {
expectedVocabularySize: 50_281,
tokenSplitRegex,
mergeableBytePairRanks,
specialTokenMapping: new Map<string, number>([[EndOfText, 50_256]]),
}
}
function P50KEdit(mergeableBytePairRanks: EncoderMap): EncodingParams {
const specialTokenMapping = new Map<string, number>([
[EndOfText, 50_256],
[FimPrefix, 50_281],
[FimMiddle, 50_282],
[FimSuffix, 50_283],
])
return {
tokenSplitRegex,
mergeableBytePairRanks,
specialTokenMapping,
}
}
function Cl100KBase(mergeableBytePairRanks: EncoderMap): EncodingParams {
const specialTokenMapping = new Map<string, number>([
[EndOfText, 100_257],
[FimPrefix, 100_258],
[FimMiddle, 100_259],
[FimSuffix, 100_260],
[ImStart, 100_264],
[ImEnd, 100_265],
[ImSep, 100_266],
[EndOfPrompt, 100_276],
])
return {
tokenSplitRegex:
/(?:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+/giu,
mergeableBytePairRanks,
specialTokenMapping,
}
}
function O200KBase(mergeableBytePairRanks: EncoderMap): EncodingParams {
const specialTokenMapping = new Map<string, number>([
[EndOfText, 199_999],
[FimPrefix, 200_000],
[FimMiddle, 200_001],
[FimSuffix, 200_002],
[ImStart, 200_003],
[ImEnd, 200_004],
[ImSep, 200_005],
[EndOfPrompt, 200_006],
])
return {
tokenSplitRegex:
/(?:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+/giu,
mergeableBytePairRanks,
specialTokenMapping,
}
}
export type GetMergeableRanksFn = (encodingName: EncodingName) => EncoderMap
export type GetMergeableRanksFn = (
encodingName: EncodingName,
) => RawBytePairRanks
export type GetMergeableRanksAsyncFn = (
encodingName: EncodingName,
) => Promise<EncoderMap>
) => Promise<RawBytePairRanks>

@@ -115,0 +43,0 @@ export function getEncodingParams(

{
"name": "gpt-tokenizer",
"type": "module",
"dependencies": {
"rfc4648": "^1.5.3"
}
"dependencies": {}
}
/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from './convertTokenBytePairEncodingFromTuples.js'
import type { EncoderMap } from './EncoderMap.js'
import type { RawBytePairRanks } from './BytePairEncodingCore.js'
import cl100k from './encodings/cl100k_base.js'

@@ -10,13 +9,13 @@ import o200k from './encodings/o200k_base.js'

export const resolveEncoding = (encoding: EncodingName): EncoderMap => {
export const resolveEncoding = (encoding: EncodingName): RawBytePairRanks => {
switch (encoding) {
case 'r50k_base':
return convertTokenBytePairEncodingFromTuples(r50k)
return r50k
case 'p50k_base':
case 'p50k_edit':
return convertTokenBytePairEncodingFromTuples(p50k)
return p50k
case 'cl100k_base':
return convertTokenBytePairEncodingFromTuples(cl100k)
return cl100k
case 'o200k_base':
return convertTokenBytePairEncodingFromTuples(o200k)
return o200k
default: {

@@ -23,0 +22,0 @@ throw new Error(`Unknown encoding name: ${encoding}`)

/* eslint-disable import/extensions */
import { convertTokenBytePairEncodingFromTuples } from './convertTokenBytePairEncodingFromTuples.js'
import type { EncoderMap } from './EncoderMap.js'
import type { RawBytePairRanks } from './BytePairEncodingCore.js'
import type { EncodingName } from './mapping.js'

@@ -8,28 +7,20 @@

encoding: EncodingName,
): Promise<EncoderMap> => {
): Promise<RawBytePairRanks> => {
switch (encoding) {
case 'r50k_base':
return convertTokenBytePairEncodingFromTuples(
await import('./encodings/r50k_base.js').then(
({ default: encodingTuples }) => encodingTuples,
),
return import('./encodings/r50k_base.js').then(
({ default: rawBytePairRanks }) => rawBytePairRanks,
)
case 'p50k_base':
case 'p50k_edit':
return convertTokenBytePairEncodingFromTuples(
await import('./encodings/p50k_base.js').then(
({ default: encodingTuples }) => encodingTuples,
),
return import('./encodings/p50k_base.js').then(
({ default: rawBytePairRanks }) => rawBytePairRanks,
)
case 'cl100k_base':
return convertTokenBytePairEncodingFromTuples(
await import('./encodings/cl100k_base.js').then(
({ default: encodingTuples }) => encodingTuples,
),
return import('./encodings/cl100k_base.js').then(
({ default: rawBytePairRanks }) => rawBytePairRanks,
)
case 'o200k_base':
return convertTokenBytePairEncodingFromTuples(
await import('./encodings/o200k_base.js').then(
({ default: encodingTuples }) => encodingTuples,
),
return import('./encodings/o200k_base.js').then(
({ default: rawBytePairRanks }) => rawBytePairRanks,
)

@@ -36,0 +27,0 @@ default: {

@@ -0,1 +1,6 @@

/* eslint-disable no-bitwise */
/* eslint-disable no-magic-numbers */
export const isAscii = (codePoint: number) => codePoint <= 0x7f
const HIGH_SURROGATE_START = 55_296

@@ -13,1 +18,80 @@ const HIGH_SURROGATE_END = 56_319

}
function isValidUTF8(bytes: Uint8Array): boolean {
let i = 0
while (i < bytes.length) {
const byte1 = bytes[i]!
let numBytes = 0
let codePoint = 0
// Determine the number of bytes in the current UTF-8 character
if (byte1 <= 0x7f) {
// 1-byte character (ASCII)
numBytes = 1
codePoint = byte1
} else if ((byte1 & 0xe0) === 0xc0) {
// 2-byte character
numBytes = 2
codePoint = byte1 & 0x1f
if (byte1 <= 0xc1) return false // Overlong encoding not allowed
} else if ((byte1 & 0xf0) === 0xe0) {
// 3-byte character
numBytes = 3
codePoint = byte1 & 0x0f
} else if ((byte1 & 0xf8) === 0xf0) {
// 4-byte character
numBytes = 4
codePoint = byte1 & 0x07
if (byte1 > 0xf4) return false // Code points above U+10FFFF not allowed
} else {
// Invalid first byte of UTF-8 character
return false
}
// Ensure there are enough continuation bytes
if (i + numBytes > bytes.length) return false
// Process the continuation bytes
for (let j = 1; j < numBytes; j++) {
const byte = bytes[i + j]
if (byte === undefined || (byte & 0xc0) !== 0x80) return false // Continuation bytes must start with '10'
codePoint = (codePoint << 6) | (byte & 0x3f)
}
// Check for overlong encodings
if (numBytes === 2 && codePoint < 0x80) return false // Overlong 2-byte sequence
if (numBytes === 3 && codePoint < 2_048) return false // Overlong 3-byte sequence
if (numBytes === 4 && codePoint < 65_536) return false // Overlong 4-byte sequence
// Check for surrogate halves (U+D800 to U+DFFF)
if (codePoint >= 55_296 && codePoint <= 57_343) return false
// Check for code points above U+10FFFF
if (codePoint > 1_114_111) return false
// Move to the next character
i += numBytes
}
return true
}
const textDecoder = new TextDecoder('utf8', { fatal: false })
export function tryConvertToString(arr: Uint8Array): string | undefined {
if (!isValidUTF8(arr)) {
return undefined
}
return textDecoder.decode(arr)
}
// Helper function to compare two Uint8Arrays lexicographically
export function compareUint8Arrays(a: Uint8Array, b: Uint8Array): number {
const len = Math.min(a.length, b.length)
for (let i = 0; i < len; i++) {
if (a[i] !== b[i]) {
return a[i]! - b[i]!
}
}
return a.length - b.length
}

@@ -1,3 +0,1 @@

import { escapeRegExp } from './escapeRegExp.js'
export function getMaxValueFromMap(map: Map<unknown, number>): number {

@@ -11,2 +9,6 @@ let max = 0

export function escapeRegExp(string: string) {
return string.replace(/[$()*+.?[\\\]^{|}]/g, '\\$&') // $& means the whole matched string
}
export function getSpecialTokenRegex(tokens: Set<string>): RegExp {

@@ -13,0 +15,0 @@ const escapedTokens = [...tokens].map(escapeRegExp)

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc