@dqbd/tiktoken
Advanced tools
Comparing version 1.0.7 to 1.0.12
@@ -59,3 +59,3 @@ let wasm; | ||
const buf = cachedTextEncoder.encode(arg); | ||
const ptr = malloc(buf.length); | ||
const ptr = malloc(buf.length, 1) >>> 0; | ||
getUint8Memory0().subarray(ptr, ptr + buf.length).set(buf); | ||
@@ -67,3 +67,3 @@ WASM_VECTOR_LEN = buf.length; | ||
let len = arg.length; | ||
let ptr = malloc(len); | ||
let ptr = malloc(len, 1) >>> 0; | ||
@@ -84,3 +84,3 @@ const mem = getUint8Memory0(); | ||
} | ||
ptr = realloc(ptr, len, len = offset + arg.length * 3); | ||
ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0; | ||
const view = getUint8Memory0().subarray(ptr + offset, ptr + len); | ||
@@ -116,2 +116,3 @@ const ret = encodeString(arg, view); | ||
function getStringFromWasm0(ptr, len) { | ||
ptr = ptr >>> 0; | ||
return cachedTextDecoder.decode(getUint8Memory0().subarray(ptr, ptr + len)); | ||
@@ -139,2 +140,3 @@ } | ||
function getArrayU32FromWasm0(ptr, len) { | ||
ptr = ptr >>> 0; | ||
return getUint32Memory0().subarray(ptr / 4, ptr / 4 + len); | ||
@@ -144,3 +146,3 @@ } | ||
function passArray8ToWasm0(arg, malloc) { | ||
const ptr = malloc(arg.length * 1); | ||
const ptr = malloc(arg.length * 1, 1) >>> 0; | ||
getUint8Memory0().set(arg, ptr / 1); | ||
@@ -152,3 +154,3 @@ WASM_VECTOR_LEN = arg.length; | ||
function passArray32ToWasm0(arg, malloc) { | ||
const ptr = malloc(arg.length * 4); | ||
const ptr = malloc(arg.length * 4, 4) >>> 0; | ||
getUint32Memory0().set(arg, ptr / 4); | ||
@@ -160,2 +162,3 @@ WASM_VECTOR_LEN = arg.length; | ||
function getArrayU8FromWasm0(ptr, len) { | ||
ptr = ptr >>> 0; | ||
return getUint8Memory0().subarray(ptr / 1, ptr / 1 + len); | ||
@@ -171,2 +174,4 @@ } | ||
} | ||
const TiktokenFinalization = new FinalizationRegistry(ptr => wasm.__wbg_tiktoken_free(ptr >>> 0)); | ||
/** | ||
@@ -177,5 +182,6 @@ */ | ||
static __wrap(ptr) { | ||
ptr = ptr >>> 0; | ||
const obj = Object.create(Tiktoken.prototype); | ||
obj.ptr = ptr; | ||
obj.__wbg_ptr = ptr; | ||
TiktokenFinalization.register(obj, obj.__wbg_ptr, obj); | ||
return obj; | ||
@@ -185,5 +191,5 @@ } | ||
__destroy_into_raw() { | ||
const ptr = this.ptr; | ||
this.ptr = 0; | ||
const ptr = this.__wbg_ptr; | ||
this.__wbg_ptr = 0; | ||
TiktokenFinalization.unregister(this); | ||
return ptr; | ||
@@ -193,3 +199,3 @@ } | ||
free() { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
const ptr = this.__destroy_into_raw(); | ||
@@ -204,3 +210,3 @@ wasm.__wbg_tiktoken_free(ptr); | ||
constructor(tiktoken_bfe, special_tokens, pat_str) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
const ptr0 = passStringToWasm0(tiktoken_bfe, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); | ||
@@ -219,11 +225,11 @@ const len0 = WASM_VECTOR_LEN; | ||
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
wasm.tiktoken_name(retptr, this.ptr); | ||
wasm.tiktoken_name(retptr, this.__wbg_ptr); | ||
var r0 = getInt32Memory0()[retptr / 4 + 0]; | ||
var r1 = getInt32Memory0()[retptr / 4 + 1]; | ||
let v0; | ||
let v1; | ||
if (r0 !== 0) { | ||
v0 = getStringFromWasm0(r0, r1).slice(); | ||
v1 = getStringFromWasm0(r0, r1).slice(); | ||
wasm.__wbindgen_export_2(r0, r1 * 1); | ||
} | ||
return v0; | ||
return v1; | ||
} finally { | ||
@@ -240,3 +246,3 @@ wasm.__wbindgen_add_to_stack_pointer(16); | ||
encode(text, allowed_special, disallowed_special) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
try { | ||
@@ -246,3 +252,3 @@ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
const len0 = WASM_VECTOR_LEN; | ||
wasm.tiktoken_encode(retptr, this.ptr, ptr0, len0, addHeapObject(allowed_special), addHeapObject(disallowed_special)); | ||
wasm.tiktoken_encode(retptr, this.__wbg_ptr, ptr0, len0, addHeapObject(allowed_special), addHeapObject(disallowed_special)); | ||
var r0 = getInt32Memory0()[retptr / 4 + 0]; | ||
@@ -255,5 +261,5 @@ var r1 = getInt32Memory0()[retptr / 4 + 1]; | ||
} | ||
var v1 = getArrayU32FromWasm0(r0, r1).slice(); | ||
var v2 = getArrayU32FromWasm0(r0, r1).slice(); | ||
wasm.__wbindgen_export_2(r0, r1 * 4); | ||
return v1; | ||
return v2; | ||
} finally { | ||
@@ -268,3 +274,3 @@ wasm.__wbindgen_add_to_stack_pointer(16); | ||
encode_ordinary(text) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
try { | ||
@@ -274,8 +280,8 @@ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
const len0 = WASM_VECTOR_LEN; | ||
wasm.tiktoken_encode_ordinary(retptr, this.ptr, ptr0, len0); | ||
wasm.tiktoken_encode_ordinary(retptr, this.__wbg_ptr, ptr0, len0); | ||
var r0 = getInt32Memory0()[retptr / 4 + 0]; | ||
var r1 = getInt32Memory0()[retptr / 4 + 1]; | ||
var v1 = getArrayU32FromWasm0(r0, r1).slice(); | ||
var v2 = getArrayU32FromWasm0(r0, r1).slice(); | ||
wasm.__wbindgen_export_2(r0, r1 * 4); | ||
return v1; | ||
return v2; | ||
} finally { | ||
@@ -292,3 +298,3 @@ wasm.__wbindgen_add_to_stack_pointer(16); | ||
encode_with_unstable(text, allowed_special, disallowed_special) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
try { | ||
@@ -298,3 +304,3 @@ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
const len0 = WASM_VECTOR_LEN; | ||
wasm.tiktoken_encode_with_unstable(retptr, this.ptr, ptr0, len0, addHeapObject(allowed_special), addHeapObject(disallowed_special)); | ||
wasm.tiktoken_encode_with_unstable(retptr, this.__wbg_ptr, ptr0, len0, addHeapObject(allowed_special), addHeapObject(disallowed_special)); | ||
var r0 = getInt32Memory0()[retptr / 4 + 0]; | ||
@@ -316,6 +322,6 @@ var r1 = getInt32Memory0()[retptr / 4 + 1]; | ||
encode_single_token(bytes) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
const ptr0 = passArray8ToWasm0(bytes, wasm.__wbindgen_export_0); | ||
const len0 = WASM_VECTOR_LEN; | ||
const ret = wasm.tiktoken_encode_single_token(this.ptr, ptr0, len0); | ||
const ret = wasm.tiktoken_encode_single_token(this.__wbg_ptr, ptr0, len0); | ||
return ret >>> 0; | ||
@@ -328,3 +334,3 @@ } | ||
decode(tokens) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
try { | ||
@@ -334,8 +340,8 @@ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
const len0 = WASM_VECTOR_LEN; | ||
wasm.tiktoken_decode(retptr, this.ptr, ptr0, len0); | ||
wasm.tiktoken_decode(retptr, this.__wbg_ptr, ptr0, len0); | ||
var r0 = getInt32Memory0()[retptr / 4 + 0]; | ||
var r1 = getInt32Memory0()[retptr / 4 + 1]; | ||
var v1 = getArrayU8FromWasm0(r0, r1).slice(); | ||
var v2 = getArrayU8FromWasm0(r0, r1).slice(); | ||
wasm.__wbindgen_export_2(r0, r1 * 1); | ||
return v1; | ||
return v2; | ||
} finally { | ||
@@ -350,11 +356,11 @@ wasm.__wbindgen_add_to_stack_pointer(16); | ||
decode_single_token_bytes(token) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
try { | ||
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
wasm.tiktoken_decode_single_token_bytes(retptr, this.ptr, token); | ||
wasm.tiktoken_decode_single_token_bytes(retptr, this.__wbg_ptr, token); | ||
var r0 = getInt32Memory0()[retptr / 4 + 0]; | ||
var r1 = getInt32Memory0()[retptr / 4 + 1]; | ||
var v0 = getArrayU8FromWasm0(r0, r1).slice(); | ||
var v1 = getArrayU8FromWasm0(r0, r1).slice(); | ||
wasm.__wbindgen_export_2(r0, r1 * 1); | ||
return v0; | ||
return v1; | ||
} finally { | ||
@@ -368,4 +374,4 @@ wasm.__wbindgen_add_to_stack_pointer(16); | ||
token_byte_values() { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
const ret = wasm.tiktoken_token_byte_values(this.ptr); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
const ret = wasm.tiktoken_token_byte_values(this.__wbg_ptr); | ||
return takeObject(ret); | ||
@@ -380,3 +386,3 @@ } | ||
export function __wbg_stringify_029a979dfb73aa17() { return handleError(function (arg0) { | ||
export function __wbg_stringify_e25465938f3f611f() { return handleError(function (arg0) { | ||
const ret = JSON.stringify(getObject(arg0)); | ||
@@ -391,9 +397,9 @@ return addHeapObject(ret); | ||
export function __wbindgen_string_get(arg0, arg1) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
const obj = getObject(arg1); | ||
const ret = typeof(obj) === 'string' ? obj : undefined; | ||
var ptr0 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); | ||
var len0 = WASM_VECTOR_LEN; | ||
getInt32Memory0()[arg0 / 4 + 1] = len0; | ||
getInt32Memory0()[arg0 / 4 + 0] = ptr0; | ||
var ptr1 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); | ||
var len1 = WASM_VECTOR_LEN; | ||
getInt32Memory0()[arg0 / 4 + 1] = len1; | ||
getInt32Memory0()[arg0 / 4 + 0] = ptr1; | ||
}; | ||
@@ -406,3 +412,3 @@ | ||
export function __wbg_parse_3ac95b51fc312db8() { return handleError(function (arg0, arg1) { | ||
export function __wbg_parse_670c19d4e984792e() { return handleError(function (arg0, arg1) { | ||
const ret = JSON.parse(getStringFromWasm0(arg0, arg1)); | ||
@@ -409,0 +415,0 @@ return addHeapObject(ret); |
@@ -14,6 +14,6 @@ /* tslint:disable */ | ||
export function tiktoken_token_byte_values(a: number): number; | ||
export function __wbindgen_export_0(a: number): number; | ||
export function __wbindgen_export_1(a: number, b: number, c: number): number; | ||
export function __wbindgen_export_0(a: number, b: number): number; | ||
export function __wbindgen_export_1(a: number, b: number, c: number, d: number): number; | ||
export function __wbindgen_add_to_stack_pointer(a: number): number; | ||
export function __wbindgen_export_2(a: number, b: number): void; | ||
export function __wbindgen_export_2(a: number, b: number, c: number): void; | ||
export function __wbindgen_export_3(a: number): void; |
{ | ||
"davinci-002": "cl100k_base", | ||
"babbage-002": "cl100k_base", | ||
"text-davinci-003": "p50k_base", | ||
@@ -33,7 +35,21 @@ "text-davinci-002": "p50k_base", | ||
"gpt-3.5-turbo": "cl100k_base", | ||
"gpt-35-turbo": "cl100k_base", | ||
"gpt-3.5-turbo-0301": "cl100k_base", | ||
"gpt-3.5-turbo-0613": "cl100k_base", | ||
"gpt-3.5-turbo-1106": "cl100k_base", | ||
"gpt-3.5-turbo-0125": "cl100k_base", | ||
"gpt-3.5-turbo-16k": "cl100k_base", | ||
"gpt-3.5-turbo-16k-0613": "cl100k_base", | ||
"gpt-3.5-turbo-instruct": "cl100k_base", | ||
"gpt-3.5-turbo-instruct-0914": "cl100k_base", | ||
"gpt-4": "cl100k_base", | ||
"gpt-4-0314": "cl100k_base", | ||
"gpt-4-0613": "cl100k_base", | ||
"gpt-4-32k": "cl100k_base", | ||
"gpt-4-32k-0314": "cl100k_base" | ||
"gpt-4-32k-0314": "cl100k_base", | ||
"gpt-4-32k-0613": "cl100k_base", | ||
"gpt-4-turbo-preview": "cl100k_base", | ||
"gpt-4-1106-preview": "cl100k_base", | ||
"gpt-4-0125-preview": "cl100k_base", | ||
"gpt-4-vision-preview": "cl100k_base" | ||
} |
{ | ||
"name": "@dqbd/tiktoken", | ||
"version": "1.0.7", | ||
"description": "Javascript bindings for tiktoken", | ||
"version": "1.0.12", | ||
"description": "JS/WASM bindings for tiktoken", | ||
"license": "MIT", | ||
@@ -11,2 +11,6 @@ "repository": { | ||
"dependencies": {}, | ||
"publishConfig": { | ||
"directory": "./dist", | ||
"access": "public" | ||
}, | ||
"files": [ | ||
@@ -73,7 +77,37 @@ "**/*" | ||
"./encoders/gpt2.json": "./encoders/gpt2.json", | ||
"./encoders/gpt2": { | ||
"types": "./encoders/gpt2.d.ts", | ||
"edge-light": "./encoders/gpt2.js", | ||
"node": "./encoders/gpt2.cjs", | ||
"default": "./encoders/gpt2.js" | ||
}, | ||
"./encoders/r50k_base.json": "./encoders/r50k_base.json", | ||
"./encoders/r50k_base": { | ||
"types": "./encoders/r50k_base.d.ts", | ||
"edge-light": "./encoders/r50k_base.js", | ||
"node": "./encoders/r50k_base.cjs", | ||
"default": "./encoders/r50k_base.js" | ||
}, | ||
"./encoders/p50k_base.json": "./encoders/p50k_base.json", | ||
"./encoders/p50k_base": { | ||
"types": "./encoders/p50k_base.d.ts", | ||
"edge-light": "./encoders/p50k_base.js", | ||
"node": "./encoders/p50k_base.cjs", | ||
"default": "./encoders/p50k_base.js" | ||
}, | ||
"./encoders/p50k_edit.json": "./encoders/p50k_edit.json", | ||
"./encoders/cl100k_base.json": "./encoders/cl100k_base.json" | ||
"./encoders/p50k_edit": { | ||
"types": "./encoders/p50k_edit.d.ts", | ||
"edge-light": "./encoders/p50k_edit.js", | ||
"node": "./encoders/p50k_edit.cjs", | ||
"default": "./encoders/p50k_edit.js" | ||
}, | ||
"./encoders/cl100k_base.json": "./encoders/cl100k_base.json", | ||
"./encoders/cl100k_base": { | ||
"types": "./encoders/cl100k_base.d.ts", | ||
"edge-light": "./encoders/cl100k_base.js", | ||
"node": "./encoders/cl100k_base.cjs", | ||
"default": "./encoders/cl100k_base.js" | ||
} | ||
} | ||
} |
# ⏳ tiktoken | ||
tiktoken is a [BPE](https://en.wikipedia.org/wiki/Byte_pair_encoding) tokeniser for use with | ||
OpenAI's models, forked from the original tiktoken library to provide NPM bindings for Node and other JS runtimes. | ||
OpenAI's models, forked from the original tiktoken library to provide JS/WASM bindings for NodeJS and other JS runtimes. | ||
The open source version of `tiktoken` can be installed from NPM: | ||
This repository contains the following packages: | ||
- `tiktoken` (formally hosted at `@dqbd/tiktoken`): WASM bindings for the original Python library, providing full 1-to-1 feature parity. | ||
- `js-tiktoken`: Pure JavaScript port of the original library with the core functionality, suitable for environments where WASM is not well supported or not desired (such as edge runtimes). | ||
Documentation for `js-tiktoken` can be found in [here](https://github.com/dqbd/tiktoken/blob/main/js/README.md). Documentation for the `tiktoken` can be found here below. | ||
The WASM version of `tiktoken` can be installed from NPM: | ||
``` | ||
npm install @dqbd/tiktoken | ||
npm install tiktoken | ||
``` | ||
@@ -18,3 +25,3 @@ | ||
import assert from "node:assert"; | ||
import { get_encoding, encoding_for_model } from "@dqbd/tiktoken"; | ||
import { get_encoding, encoding_for_model } from "tiktoken"; | ||
@@ -40,7 +47,7 @@ const enc = get_encoding("gpt2"); | ||
In constrained environments (eg. Edge Runtime, Cloudflare Workers), where you don't want to load all the encoders at once, you can use the lightweight WASM binary via `@dqbd/tiktoken/lite`. | ||
In constrained environments (eg. Edge Runtime, Cloudflare Workers), where you don't want to load all the encoders at once, you can use the lightweight WASM binary via `tiktoken/lite`. | ||
```typescript | ||
const { Tiktoken } = require("@dqbd/tiktoken/lite"); | ||
const cl100k_base = require("@dqbd/tiktoken/encoders/cl100k_base.json"); | ||
const { Tiktoken } = require("tiktoken/lite"); | ||
const cl100k_base = require("tiktoken/encoders/cl100k_base.json"); | ||
@@ -59,6 +66,6 @@ const encoding = new Tiktoken( | ||
```typescript | ||
const { Tiktoken } = require("@dqbd/tiktoken/lite"); | ||
const { load } = require("@dqbd/tiktoken/load"); | ||
const registry = require("@dqbd/tiktoken/registry.json"); | ||
const models = require("@dqbd/tiktoken/model_to_encoding.json"); | ||
const { Tiktoken } = require("tiktoken/lite"); | ||
const { load } = require("tiktoken/load"); | ||
const registry = require("tiktoken/registry.json"); | ||
const models = require("tiktoken/model_to_encoding.json"); | ||
@@ -72,3 +79,3 @@ async function main() { | ||
); | ||
const tokens = encoding.encode("hello world"); | ||
const tokens = encoder.encode("hello world"); | ||
encoder.free(); | ||
@@ -96,3 +103,3 @@ } | ||
```typescript | ||
import { get_encoding, init } from "@dqbd/tiktoken/init"; | ||
import { get_encoding, init } from "tiktoken/init"; | ||
@@ -115,13 +122,17 @@ async function main() { | ||
| Runtime | Status | Notes | | ||
| ---------------------------- | ------ | ------------------------------------------ | | ||
| Node.js | ✅ | | | ||
| Bun | ✅ | | | ||
| Vite | ✅ | See [here](#vite) for notes | | ||
| Next.js | ✅ | See [here](#nextjs) for notes | | ||
| Create React App (via Craco) | ✅ | See [here](#create-react-app) for notes | | ||
| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | | ||
| Cloudflare Workers | ✅ | See [here](#cloudflare-workers) for notes | | ||
| Deno | ❌ | Currently unsupported | | ||
| Runtime | Status | Notes | | ||
| ---------------------------- | ------ | ------------------------------------------------------------------------------------------ | | ||
| Node.js | ✅ | | | ||
| Bun | ✅ | | | ||
| Vite | ✅ | See [here](#vite) for notes | | ||
| Next.js | ✅ | See [here](#nextjs) for notes | | ||
| Create React App (via Craco) | ✅ | See [here](#create-react-app) for notes | | ||
| Vercel Edge Runtime | ✅ | See [here](#vercel-edge-runtime) for notes | | ||
| Cloudflare Workers | ✅ | See [here](#cloudflare-workers) for notes | | ||
| Electron | ✅ | See [here](#electron) for notes | | ||
| Deno | ❌ | Currently unsupported (see [dqbd/tiktoken#22](https://github.com/dqbd/tiktoken/issues/22)) | | ||
| Svelte + Cloudflare Workers | ❌ | Currently unsupported (see [dqbd/tiktoken#37](https://github.com/dqbd/tiktoken/issues/37)) | | ||
For unsupported runtimes, consider using [`js-tiktoken`](https://www.npmjs.com/package/js-tiktoken), which is a pure JS implementation of the tokeniser. | ||
### [Vite](#vite) | ||
@@ -162,3 +173,3 @@ | ||
```tsx | ||
import { get_encoding } from "@dqbd/tiktoken"; | ||
import { get_encoding } from "tiktoken"; | ||
import { useState } from "react"; | ||
@@ -188,3 +199,3 @@ | ||
```typescript | ||
import { get_encoding } from "@dqbd/tiktoken"; | ||
import { get_encoding } from "tiktoken"; | ||
import { NextApiRequest, NextApiResponse } from "next"; | ||
@@ -235,5 +246,5 @@ | ||
// @ts-expect-error | ||
import wasm from "@dqbd/tiktoken/lite/tiktoken_bg.wasm?module"; | ||
import model from "@dqbd/tiktoken/encoders/cl100k_base.json"; | ||
import { init, Tiktoken } from "@dqbd/tiktoken/lite/init"; | ||
import wasm from "tiktoken/lite/tiktoken_bg.wasm?module"; | ||
import model from "tiktoken/encoders/cl100k_base.json"; | ||
import { init, Tiktoken } from "tiktoken/lite/init"; | ||
@@ -260,3 +271,3 @@ export const config = { runtime: "edge" }; | ||
Similar to Vercel Edge Runtime, Cloudflare Workers must import the WASM binary file manually and use the `@dqbd/tiktoken/lite` version to fit the 1 MB limit. However, users need to point directly at the WASM binary via a relative path (including `./node_modules/`). | ||
Similar to Vercel Edge Runtime, Cloudflare Workers must import the WASM binary file manually and use the `tiktoken/lite` version to fit the 1 MB limit. However, users need to point directly at the WASM binary via a relative path (including `./node_modules/`). | ||
@@ -274,5 +285,5 @@ Add the following rule to the `wrangler.toml` to upload WASM during build: | ||
```javascript | ||
import { init, Tiktoken } from "@dqbd/tiktoken/lite/init"; | ||
import wasm from "./node_modules/@dqbd/tiktoken/lite/tiktoken_bg.wasm"; | ||
import model from "@dqbd/tiktoken/encoders/cl100k_base.json"; | ||
import { init, Tiktoken } from "tiktoken/lite/init"; | ||
import wasm from "./node_modules/tiktoken/lite/tiktoken_bg.wasm"; | ||
import model from "tiktoken/encoders/cl100k_base.json"; | ||
@@ -294,4 +305,25 @@ export default { | ||
### [Electron](#electron) | ||
To use tiktoken in your Electron main process, you need to make sure the WASM binary gets copied into your application package. | ||
Assuming a setup with [Electron Forge](https://www.electronforge.io) and [`@electron-forge/plugin-webpack`](https://www.npmjs.com/package/@electron-forge/plugin-webpack), add the following to your `webpack.main.config.js`: | ||
```javascript | ||
const CopyPlugin = require("copy-webpack-plugin"); | ||
module.exports = { | ||
// ... | ||
plugins: [ | ||
new CopyPlugin({ | ||
patterns: [ | ||
{ from: "./node_modules/tiktoken/tiktoken_bg.wasm" }, | ||
], | ||
}), | ||
], | ||
}; | ||
``` | ||
## Acknowledgements | ||
- https://github.com/zurawiki/tiktoken-rs |
@@ -59,3 +59,3 @@ let wasm; | ||
const buf = cachedTextEncoder.encode(arg); | ||
const ptr = malloc(buf.length); | ||
const ptr = malloc(buf.length, 1) >>> 0; | ||
getUint8Memory0().subarray(ptr, ptr + buf.length).set(buf); | ||
@@ -67,3 +67,3 @@ WASM_VECTOR_LEN = buf.length; | ||
let len = arg.length; | ||
let ptr = malloc(len); | ||
let ptr = malloc(len, 1) >>> 0; | ||
@@ -84,3 +84,3 @@ const mem = getUint8Memory0(); | ||
} | ||
ptr = realloc(ptr, len, len = offset + arg.length * 3); | ||
ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0; | ||
const view = getUint8Memory0().subarray(ptr + offset, ptr + len); | ||
@@ -116,2 +116,3 @@ const ret = encodeString(arg, view); | ||
function getStringFromWasm0(ptr, len) { | ||
ptr = ptr >>> 0; | ||
return cachedTextDecoder.decode(getUint8Memory0().subarray(ptr, ptr + len)); | ||
@@ -139,2 +140,3 @@ } | ||
function getArrayU32FromWasm0(ptr, len) { | ||
ptr = ptr >>> 0; | ||
return getUint32Memory0().subarray(ptr / 4, ptr / 4 + len); | ||
@@ -144,3 +146,3 @@ } | ||
function passArray8ToWasm0(arg, malloc) { | ||
const ptr = malloc(arg.length * 1); | ||
const ptr = malloc(arg.length * 1, 1) >>> 0; | ||
getUint8Memory0().set(arg, ptr / 1); | ||
@@ -152,3 +154,3 @@ WASM_VECTOR_LEN = arg.length; | ||
function passArray32ToWasm0(arg, malloc) { | ||
const ptr = malloc(arg.length * 4); | ||
const ptr = malloc(arg.length * 4, 4) >>> 0; | ||
getUint32Memory0().set(arg, ptr / 4); | ||
@@ -160,2 +162,3 @@ WASM_VECTOR_LEN = arg.length; | ||
function getArrayU8FromWasm0(ptr, len) { | ||
ptr = ptr >>> 0; | ||
return getUint8Memory0().subarray(ptr / 1, ptr / 1 + len); | ||
@@ -169,3 +172,3 @@ } | ||
export function get_encoding(encoding, extend_special_tokens) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
try { | ||
@@ -194,3 +197,3 @@ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
export function encoding_for_model(model, extend_special_tokens) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
try { | ||
@@ -220,2 +223,4 @@ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
} | ||
const TiktokenFinalization = new FinalizationRegistry(ptr => wasm.__wbg_tiktoken_free(ptr >>> 0)); | ||
/** | ||
@@ -226,5 +231,6 @@ */ | ||
static __wrap(ptr) { | ||
ptr = ptr >>> 0; | ||
const obj = Object.create(Tiktoken.prototype); | ||
obj.ptr = ptr; | ||
obj.__wbg_ptr = ptr; | ||
TiktokenFinalization.register(obj, obj.__wbg_ptr, obj); | ||
return obj; | ||
@@ -234,5 +240,5 @@ } | ||
__destroy_into_raw() { | ||
const ptr = this.ptr; | ||
this.ptr = 0; | ||
const ptr = this.__wbg_ptr; | ||
this.__wbg_ptr = 0; | ||
TiktokenFinalization.unregister(this); | ||
return ptr; | ||
@@ -242,3 +248,3 @@ } | ||
free() { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
const ptr = this.__destroy_into_raw(); | ||
@@ -253,3 +259,3 @@ wasm.__wbg_tiktoken_free(ptr); | ||
constructor(tiktoken_bfe, special_tokens, pat_str) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
const ptr0 = passStringToWasm0(tiktoken_bfe, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); | ||
@@ -268,11 +274,11 @@ const len0 = WASM_VECTOR_LEN; | ||
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
wasm.tiktoken_name(retptr, this.ptr); | ||
wasm.tiktoken_name(retptr, this.__wbg_ptr); | ||
var r0 = getInt32Memory0()[retptr / 4 + 0]; | ||
var r1 = getInt32Memory0()[retptr / 4 + 1]; | ||
let v0; | ||
let v1; | ||
if (r0 !== 0) { | ||
v0 = getStringFromWasm0(r0, r1).slice(); | ||
v1 = getStringFromWasm0(r0, r1).slice(); | ||
wasm.__wbindgen_export_2(r0, r1 * 1); | ||
} | ||
return v0; | ||
return v1; | ||
} finally { | ||
@@ -289,3 +295,3 @@ wasm.__wbindgen_add_to_stack_pointer(16); | ||
encode(text, allowed_special, disallowed_special) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
try { | ||
@@ -295,3 +301,3 @@ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
const len0 = WASM_VECTOR_LEN; | ||
wasm.tiktoken_encode(retptr, this.ptr, ptr0, len0, addHeapObject(allowed_special), addHeapObject(disallowed_special)); | ||
wasm.tiktoken_encode(retptr, this.__wbg_ptr, ptr0, len0, addHeapObject(allowed_special), addHeapObject(disallowed_special)); | ||
var r0 = getInt32Memory0()[retptr / 4 + 0]; | ||
@@ -304,5 +310,5 @@ var r1 = getInt32Memory0()[retptr / 4 + 1]; | ||
} | ||
var v1 = getArrayU32FromWasm0(r0, r1).slice(); | ||
var v2 = getArrayU32FromWasm0(r0, r1).slice(); | ||
wasm.__wbindgen_export_2(r0, r1 * 4); | ||
return v1; | ||
return v2; | ||
} finally { | ||
@@ -317,3 +323,3 @@ wasm.__wbindgen_add_to_stack_pointer(16); | ||
encode_ordinary(text) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
try { | ||
@@ -323,8 +329,8 @@ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
const len0 = WASM_VECTOR_LEN; | ||
wasm.tiktoken_encode_ordinary(retptr, this.ptr, ptr0, len0); | ||
wasm.tiktoken_encode_ordinary(retptr, this.__wbg_ptr, ptr0, len0); | ||
var r0 = getInt32Memory0()[retptr / 4 + 0]; | ||
var r1 = getInt32Memory0()[retptr / 4 + 1]; | ||
var v1 = getArrayU32FromWasm0(r0, r1).slice(); | ||
var v2 = getArrayU32FromWasm0(r0, r1).slice(); | ||
wasm.__wbindgen_export_2(r0, r1 * 4); | ||
return v1; | ||
return v2; | ||
} finally { | ||
@@ -341,3 +347,3 @@ wasm.__wbindgen_add_to_stack_pointer(16); | ||
encode_with_unstable(text, allowed_special, disallowed_special) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
try { | ||
@@ -347,3 +353,3 @@ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
const len0 = WASM_VECTOR_LEN; | ||
wasm.tiktoken_encode_with_unstable(retptr, this.ptr, ptr0, len0, addHeapObject(allowed_special), addHeapObject(disallowed_special)); | ||
wasm.tiktoken_encode_with_unstable(retptr, this.__wbg_ptr, ptr0, len0, addHeapObject(allowed_special), addHeapObject(disallowed_special)); | ||
var r0 = getInt32Memory0()[retptr / 4 + 0]; | ||
@@ -365,6 +371,6 @@ var r1 = getInt32Memory0()[retptr / 4 + 1]; | ||
encode_single_token(bytes) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
const ptr0 = passArray8ToWasm0(bytes, wasm.__wbindgen_export_0); | ||
const len0 = WASM_VECTOR_LEN; | ||
const ret = wasm.tiktoken_encode_single_token(this.ptr, ptr0, len0); | ||
const ret = wasm.tiktoken_encode_single_token(this.__wbg_ptr, ptr0, len0); | ||
return ret >>> 0; | ||
@@ -377,3 +383,3 @@ } | ||
decode(tokens) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
try { | ||
@@ -383,8 +389,8 @@ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
const len0 = WASM_VECTOR_LEN; | ||
wasm.tiktoken_decode(retptr, this.ptr, ptr0, len0); | ||
wasm.tiktoken_decode(retptr, this.__wbg_ptr, ptr0, len0); | ||
var r0 = getInt32Memory0()[retptr / 4 + 0]; | ||
var r1 = getInt32Memory0()[retptr / 4 + 1]; | ||
var v1 = getArrayU8FromWasm0(r0, r1).slice(); | ||
var v2 = getArrayU8FromWasm0(r0, r1).slice(); | ||
wasm.__wbindgen_export_2(r0, r1 * 1); | ||
return v1; | ||
return v2; | ||
} finally { | ||
@@ -399,11 +405,11 @@ wasm.__wbindgen_add_to_stack_pointer(16); | ||
decode_single_token_bytes(token) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
try { | ||
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); | ||
wasm.tiktoken_decode_single_token_bytes(retptr, this.ptr, token); | ||
wasm.tiktoken_decode_single_token_bytes(retptr, this.__wbg_ptr, token); | ||
var r0 = getInt32Memory0()[retptr / 4 + 0]; | ||
var r1 = getInt32Memory0()[retptr / 4 + 1]; | ||
var v0 = getArrayU8FromWasm0(r0, r1).slice(); | ||
var v1 = getArrayU8FromWasm0(r0, r1).slice(); | ||
wasm.__wbindgen_export_2(r0, r1 * 1); | ||
return v0; | ||
return v1; | ||
} finally { | ||
@@ -417,4 +423,4 @@ wasm.__wbindgen_add_to_stack_pointer(16); | ||
token_byte_values() { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
const ret = wasm.tiktoken_token_byte_values(this.ptr); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
const ret = wasm.tiktoken_token_byte_values(this.__wbg_ptr); | ||
return takeObject(ret); | ||
@@ -433,3 +439,3 @@ } | ||
export function __wbg_stringify_029a979dfb73aa17() { return handleError(function (arg0) { | ||
export function __wbg_stringify_e25465938f3f611f() { return handleError(function (arg0) { | ||
const ret = JSON.stringify(getObject(arg0)); | ||
@@ -440,9 +446,9 @@ return addHeapObject(ret); | ||
export function __wbindgen_string_get(arg0, arg1) { | ||
if (wasm == null) throw new Error("@dqbd/tiktoken: WASM binary has not been propery initialized."); | ||
if (wasm == null) throw new Error("tiktoken: WASM binary has not been propery initialized."); | ||
const obj = getObject(arg1); | ||
const ret = typeof(obj) === 'string' ? obj : undefined; | ||
var ptr0 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); | ||
var len0 = WASM_VECTOR_LEN; | ||
getInt32Memory0()[arg0 / 4 + 1] = len0; | ||
getInt32Memory0()[arg0 / 4 + 0] = ptr0; | ||
var ptr1 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_export_0, wasm.__wbindgen_export_1); | ||
var len1 = WASM_VECTOR_LEN; | ||
getInt32Memory0()[arg0 / 4 + 1] = len1; | ||
getInt32Memory0()[arg0 / 4 + 0] = ptr1; | ||
}; | ||
@@ -455,3 +461,3 @@ | ||
export function __wbg_parse_3ac95b51fc312db8() { return handleError(function (arg0, arg1) { | ||
export function __wbg_parse_670c19d4e984792e() { return handleError(function (arg0, arg1) { | ||
const ret = JSON.parse(getStringFromWasm0(arg0, arg1)); | ||
@@ -458,0 +464,0 @@ return addHeapObject(ret); |
@@ -16,6 +16,6 @@ /* tslint:disable */ | ||
export function encoding_for_model(a: number, b: number, c: number, d: number): void; | ||
export function __wbindgen_export_0(a: number): number; | ||
export function __wbindgen_export_1(a: number, b: number, c: number): number; | ||
export function __wbindgen_export_0(a: number, b: number): number; | ||
export function __wbindgen_export_1(a: number, b: number, c: number, d: number): number; | ||
export function __wbindgen_add_to_stack_pointer(a: number): number; | ||
export function __wbindgen_export_2(a: number, b: number): void; | ||
export function __wbindgen_export_2(a: number, b: number, c: number): void; | ||
export function __wbindgen_export_3(a: number): void; |
@@ -16,2 +16,4 @@ /* tslint:disable */ | ||
export type TiktokenModel = | ||
| "davinci-002" | ||
| "babbage-002" | ||
| "text-davinci-003" | ||
@@ -47,8 +49,22 @@ | "text-davinci-002" | ||
| "gpt2" | ||
| "gpt-3.5-turbo" | ||
| "gpt-35-turbo" | ||
| "gpt-3.5-turbo-0301" | ||
| "gpt-3.5-turbo-0613" | ||
| "gpt-3.5-turbo-1106" | ||
| "gpt-3.5-turbo-0125" | ||
| "gpt-3.5-turbo-16k" | ||
| "gpt-3.5-turbo-16k-0613" | ||
| "gpt-3.5-turbo-instruct" | ||
| "gpt-3.5-turbo-instruct-0914" | ||
| "gpt-4" | ||
| "gpt-4-0314" | ||
| "gpt-4-0613" | ||
| "gpt-4-32k" | ||
| "gpt-4-32k-0314" | ||
| "gpt-3.5-turbo" | ||
| "gpt-3.5-turbo-0301"; | ||
| "gpt-4-32k-0613" | ||
| "gpt-4-turbo-preview" | ||
| "gpt-4-1106-preview" | ||
| "gpt-4-0125-preview" | ||
| "gpt-4-vision-preview" | ||
@@ -55,0 +71,0 @@ /** |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Native code
Supply chain riskContains native code (e.g., compiled binaries or shared libraries). Including native code can obscure malicious behavior.
Found 2 instances in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
13813880
53
2696
318
6