Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@microsoft/tiktokenizer

Package Overview
Dependencies
Maintainers
1
Versions
9
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@microsoft/tiktokenizer - npm Package Compare versions

Comparing version 1.0.7 to 1.0.8

4

dist/tikTokenizer.d.ts

@@ -22,3 +22,3 @@ import { ILRUCache } from './lru';

* https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken
* @param tikTokenBpeFile BPE rank file path
* @param tikTokenBpeFileOrDict BPE rank file path or parsed dictionary
* @param specialTokensEncoder special tokens encoder

@@ -28,3 +28,3 @@ * @param regexPattern regex pattern to split the input text

*/
constructor(tikTokenBpeFile: string, specialTokensEncoder: ReadonlyMap<string, number>, regexPattern: string, cacheSize?: number);
constructor(tikTokenBpeFileOrDict: string | Map<Uint8Array, number>, specialTokensEncoder: ReadonlyMap<string, number>, regexPattern: string, cacheSize?: number);
protected init(bpeDict: ReadonlyMap<Uint8Array, number>, specialTokensEncoder: ReadonlyMap<string, number>, regexPattern: string): void;

@@ -31,0 +31,0 @@ private findNextSpecialToken;

@@ -64,3 +64,3 @@ "use strict";

* https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken
* @param tikTokenBpeFile BPE rank file path
* @param tikTokenBpeFileOrDict BPE rank file path or parsed dictionary
* @param specialTokensEncoder special tokens encoder

@@ -70,7 +70,7 @@ * @param regexPattern regex pattern to split the input text

*/
constructor(tikTokenBpeFile, specialTokensEncoder, regexPattern, cacheSize = 8192) {
constructor(tikTokenBpeFileOrDict, specialTokensEncoder, regexPattern, cacheSize = 8192) {
this.textEncoder = (0, textEncoder_1.makeTextEncoder)();
this.textDecoder = new util_1.TextDecoder("utf-8");
this.cache = new lru_1.LRUCache(cacheSize);
const bpeDict = loadTikTokenBpe(tikTokenBpeFile);
const bpeDict = typeof tikTokenBpeFileOrDict === 'string' ? loadTikTokenBpe(tikTokenBpeFileOrDict) : tikTokenBpeFileOrDict;
this.init(bpeDict, specialTokensEncoder, regexPattern);

@@ -77,0 +77,0 @@ }

@@ -42,3 +42,3 @@ import { TikTokenizer } from "./tikTokenizer";

* Create a tokenizer from a file
* @param tikTokenBpeFile BPE rank file in tiktoken format
* @param tikTokenBpeFileOrDict BPE rank file in tiktoken format or parsed dictionary
* @param specialTokensEncoder special tokens mapping

@@ -49,2 +49,2 @@ * @param regexPattern regex pattern

*/
export declare function createTokenizer(tikTokenBpeFile: string, specialTokensEncoder: ReadonlyMap<string, number>, regexPattern: string, cacheSize?: number): TikTokenizer;
export declare function createTokenizer(tikTokenBpeFileOrDict: string | Map<Uint8Array, number>, specialTokensEncoder: ReadonlyMap<string, number>, regexPattern: string, cacheSize?: number): TikTokenizer;

@@ -225,3 +225,3 @@ "use strict";

regexPattern = REGEX_PATTERN_1;
mergeableRanksFileUrl = `https://pythia.blob.core.windows.net/public/encoding/gpt2.tiktoken`;
mergeableRanksFileUrl = `https://raw.githubusercontent.com/microsoft/Tokenizer/main/model/gpt2.tiktoken`;
break;

@@ -251,3 +251,3 @@ default:

* Create a tokenizer from a file
* @param tikTokenBpeFile BPE rank file in tiktoken format
* @param tikTokenBpeFileOrDict BPE rank file in tiktoken format or parsed dictionary
* @param specialTokensEncoder special tokens mapping

@@ -258,4 +258,4 @@ * @param regexPattern regex pattern

*/
function createTokenizer(tikTokenBpeFile, specialTokensEncoder, regexPattern, cacheSize = 8192) {
const tikTokenizer = new tikTokenizer_1.TikTokenizer(tikTokenBpeFile, specialTokensEncoder, regexPattern, cacheSize);
function createTokenizer(tikTokenBpeFileOrDict, specialTokensEncoder, regexPattern, cacheSize = 8192) {
const tikTokenizer = new tikTokenizer_1.TikTokenizer(tikTokenBpeFileOrDict, specialTokensEncoder, regexPattern, cacheSize);
return tikTokenizer;

@@ -262,0 +262,0 @@ }

@@ -5,3 +5,3 @@ {

"description": "Tokenizer for OpenAI large language models.",
"version": "1.0.7",
"version": "1.0.8",
"author": {

@@ -8,0 +8,0 @@ "name": "Microsoft Corporation"

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc