@@ -0,1 +1,150 @@
		/** This file is auto-generated by generate-llm.ts */

		declare const LLM_ARCHITECTURES: readonly ["llama", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "persimmon", "refact", "bert", "nomic-bert", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "phi2", "phi3", "plamo", "codeshell", "orion", "internlm2", "minicpm", "gemma", "starcoder2", "mamba", "xverse", "command-r", "dbrx", "olmo"];
		type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];
		type Attention<TArchitecture extends string> = {
		[K in `${TArchitecture}.attention.head_count`]: number;
		} & {
		[K in `${TArchitecture}.attention.head_count_kv`]: number;
		} & {
		[K in `${TArchitecture}.attention.layer_norm_epsilon`]: number;
		} & {
		[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number;
		} & {
		[K in `${TArchitecture}.attention.alibi_bias_max`]: number;
		} & {
		[K in `${TArchitecture}.attention.clip_kqv`]: number;
		} & {
		[K in `${TArchitecture}.attention.use_norm`]: number;
		};
		type Rope<TArchitecture extends LLMArchitecture> = {
		[K in `${TArchitecture}.rope.dimension_count`]: number;
		} & {
		[K in `${TArchitecture}.rope.freq_base`]: number;
		} & {
		[K in `${TArchitecture}.rope.scale`]: number;
		} & {
		[K in `${TArchitecture}.rope.scale_linear`]: number;
		};
		type MOE<TArchitecture extends LLMArchitecture> = {
		[K in `${TArchitecture}.expert_count`]: number;
		} & {
		[K in `${TArchitecture}.expert_used_count`]: number;
		};
		type TransformerLLMBase<TArchitecture extends LLMArchitecture> = ModelBase<TArchitecture> & MOE<TArchitecture> & Attention<TArchitecture> & Rope<TArchitecture>;
		declare enum TransformerLLMPoolingType {
		UNSPECIFIED = -1,
		NONE = 0,
		MEAN = 1,
		CLS = 2
		}
		type ArchLlama = TransformerLLMBase<"llama"> & {
		"llama.attention.layer_norm_rms_epsilon": number;
		};
		type ArchFalcon = TransformerLLMBase<"falcon"> & {
		"falcon.attention.layer_norm_epsilon": number;
		};
		type ArchGrok = TransformerLLMBase<"grok"> & {
		"grok.attention.layer_norm_rms_epsilon": number;
		};
		type ArchGpt2 = TransformerLLMBase<"gpt2"> & {
		"gpt2.attention.layer_norm_epsilon": number;
		};
		type ArchGptj = TransformerLLMBase<"gptj">;
		type ArchGptneox = TransformerLLMBase<"gptneox">;
		type ArchMpt = TransformerLLMBase<"mpt"> & {
		"mpt.attention.layer_norm_epsilon": number;
		"mpt.attention.clamp_kqv": number;
		"mpt.attention.max_alibi_bias": number;
		};
		type ArchBaichuan = TransformerLLMBase<"baichuan"> & {
		"baichuan.attention.layer_norm_rms_epsilon": number;
		};
		type ArchStarcoder = TransformerLLMBase<"starcoder"> & {
		"starcoder.attention.layer_norm_epsilon": number;
		};
		type ArchPersimmon = TransformerLLMBase<"persimmon"> & {
		"persimmon.attention.layer_norm_epsilon": number;
		};
		type ArchRefact = TransformerLLMBase<"refact"> & {
		"refact.attention.layer_norm_rms_epsilon": number;
		};
		type ArchBert = TransformerLLMBase<"bert"> & {
		"bert.attention.layer_norm_epsilon": number;
		"bert.attention.causal": boolean;
		"tokenizer.ggml.token_type_count": number;
		"bert.pooling_type": TransformerLLMPoolingType;
		};
		type ArchNomicBert = TransformerLLMBase<"nomic-bert"> & {
		"nomic-bert.attention.layer_norm_epsilon": number;
		"nomic-bert.attention.causal": boolean;
		"tokenizer.ggml.token_type_count": number;
		"nomic-bert.pooling_type": TransformerLLMPoolingType;
		};
		type ArchBloom = TransformerLLMBase<"bloom"> & {
		"bloom.attention.layer_norm_epsilon": number;
		};
		type ArchStablelm = TransformerLLMBase<"stablelm"> & {
		"stablelm.attention.layer_norm_epsilon": number;
		};
		type ArchQwen = TransformerLLMBase<"qwen"> & {
		"qwen.attention.layer_norm_rms_epsilon": number;
		};
		type ArchQwen2 = TransformerLLMBase<"qwen2"> & {
		"qwen2.attention.layer_norm_rms_epsilon": number;
		};
		type ArchQwen2moe = TransformerLLMBase<"qwen2moe"> & {
		"qwen2moe.attention.layer_norm_rms_epsilon": number;
		};
		type ArchPhi2 = TransformerLLMBase<"phi2"> & {
		"phi2.attention.layer_norm_epsilon": number;
		};
		type ArchPhi3 = TransformerLLMBase<"phi3"> & {
		"phi3.attention.layer_norm_rms_epsilon": number;
		};
		type ArchPlamo = TransformerLLMBase<"plamo"> & {
		"plamo.attention.layer_norm_rms_epsilon": number;
		};
		type ArchCodeshell = TransformerLLMBase<"codeshell"> & {
		"codeshell.attention.layer_norm_epsilon": number;
		};
		type ArchOrion = TransformerLLMBase<"orion"> & {
		"orion.attention.layer_norm_epsilon": number;
		};
		type ArchInternlm2 = TransformerLLMBase<"internlm2"> & {
		"internlm2.attention.layer_norm_rms_epsilon": number;
		};
		type ArchMinicpm = TransformerLLMBase<"minicpm"> & {
		"minicpm.attention.layer_norm_rms_epsilon": number;
		};
		type ArchGemma = TransformerLLMBase<"gemma"> & {
		"gemma.attention.layer_norm_rms_epsilon": number;
		};
		type ArchStarcoder2 = TransformerLLMBase<"starcoder2"> & {
		"starcoder2.attention.layer_norm_epsilon": number;
		};
		type ArchMamba = TransformerLLMBase<"mamba"> & {
		"mamba.ssm.conv_kernel": number;
		"mamba.ssm.inner_size": number;
		"mamba.ssm.state_size": number;
		"mamba.ssm.time_step_rank": number;
		"mamba.attention.layer_norm_rms_epsilon": number;
		};
		type ArchXverse = TransformerLLMBase<"xverse"> & {
		"xverse.attention.layer_norm_rms_epsilon": number;
		};
		type ArchCommandR = TransformerLLMBase<"command-r"> & {
		"command-r.logit_scale": number;
		"command-r.attention.layer_norm_epsilon": number;
		};
		type ArchDbrx = TransformerLLMBase<"dbrx"> & {
		"dbrx.attention.layer_norm_epsilon": number;
		"dbrx.attention.clamp_kqv": number;
		};
		type ArchOlmo = TransformerLLMBase<"olmo"> & {
		"olmo.attention.layer_norm_epsilon": number;
		"olmo.attention.clamp_kqv": number;
		};
		type TransformerLLM = ArchLlama \| ArchFalcon \| ArchGrok \| ArchGpt2 \| ArchGptj \| ArchGptneox \| ArchMpt \| ArchBaichuan \| ArchStarcoder \| ArchPersimmon \| ArchRefact \| ArchBert \| ArchNomicBert \| ArchBloom \| ArchStablelm \| ArchQwen \| ArchQwen2 \| ArchQwen2moe \| ArchPhi2 \| ArchPhi3 \| ArchPlamo \| ArchCodeshell \| ArchOrion \| ArchInternlm2 \| ArchMinicpm \| ArchGemma \| ArchStarcoder2 \| ArchMamba \| ArchXverse \| ArchCommandR \| ArchDbrx \| ArchOlmo;

		type MetadataBaseValue = string \| number \| bigint \| boolean;
		@@ -43,3 +192,3 @@ type MetadataValue = MetadataBaseValue \| MetadataBaseValue[] \| MetadataValue[];
		}
		declare const ARCHITECTURES: readonly ["llama", "mpt", "gptneox", "gptj", "gpt2", "bloom", "falcon", "gemma", "rwkv", "whisper"];
		declare const ARCHITECTURES: readonly ["llama", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "persimmon", "refact", "bert", "nomic-bert", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "phi2", "phi3", "plamo", "codeshell", "orion", "internlm2", "minicpm", "gemma", "starcoder2", "mamba", "xverse", "command-r", "dbrx", "olmo", "rwkv", "whisper"];
		type Architecture = (typeof ARCHITECTURES)[number];
		@@ -52,44 +201,16 @@ interface General {
		}
		type Attention<TArchitecture extends Architecture> = {
		[K in `${TArchitecture}.attention.head_count`]: number;
		} \| {
		[K in `${TArchitecture}.attention.head_count_kv`]: number;
		} \| {
		[K in `${TArchitecture}.attention.layer_norm_epsilon`]: number;
		} \| {
		[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number;
		} \| {
		[K in `${TArchitecture}.attention.alibi_bias_max`]: number;
		} \| {
		[K in `${TArchitecture}.attention.clip_kqv`]: number;
		} \| {
		[K in `${TArchitecture}.attention.use_norm`]: number;
		};
		type Rope<TArchitecture extends Architecture> = {
		[K in `${TArchitecture}.rope.dimension_count`]: number;
		} \| {
		[K in `${TArchitecture}.rope.freq_base`]: number;
		} \| {
		[K in `${TArchitecture}.rope.scale`]: number;
		} \| {
		[K in `${TArchitecture}.rope.scale_linear`]: number;
		};
		type ModelBase<TArchitecture extends Architecture \| `encoder.${Extract<Architecture, "whisper">}` \| `decoder.${Extract<Architecture, "whisper">}`> = {
		[K in `${TArchitecture}.layer_count`]: number;
		} \| {
		} & {
		[K in `${TArchitecture}.feed_forward_length`]: number;
		} \| {
		} & {
		[K in `${TArchitecture}.context_length`]: number;
		} \| {
		} & {
		[K in `${TArchitecture}.embedding_length`]: number;
		} \| {
		} & {
		[K in `${TArchitecture}.block_count`]: number;
		};
		type MOE<TArchitecture extends Architecture> = {
		[K in `${TArchitecture}.expert_count`]: number;
		} \| {
		[K in `${TArchitecture}.expert_used_count`]: number;
		};
		type TokenizerModel = "no_vocab" \| "llama" \| "gpt2" \| "bert";
		interface Tokenizer {
		"tokenizer.ggml.model": Architecture;
		"tokenizer.ggml.model": TokenizerModel;
		"tokenizer.ggml.tokens": string[];
		@@ -103,4 +224,2 @@ "tokenizer.ggml.scores": number[];
		}
		type TransformerLLMArchitecture = Exclude<Architecture, "rwkv" \| "whisper">;
		type TransformerLLM = ModelBase<TransformerLLMArchitecture> & MOE<TransformerLLMArchitecture> & Attention<TransformerLLMArchitecture> & Rope<TransformerLLMArchitecture>;
		type RWKV = ModelBase<"rwkv"> & {
		@@ -147,2 +266,3 @@ "rwkv.architecture_version": number;
		fetch?: typeof fetch;
		additionalFetchHeaders?: Record<string, string>;
		computeParametersCount: true;
		@@ -157,2 +277,3 @@ }): Promise<GGUFParseOutput & {
		fetch?: typeof fetch;
		additionalFetchHeaders?: Record<string, string>;
		}): Promise<GGUFParseOutput>;
		@@ -169,2 +290,2 @@ declare function ggufAllShards(url: string, params?: {

		export { GGMLQuantizationType, GGUFMetadata, GGUFParseOutput, GGUFTensorInfo, GGUFValueType, GGUF_QUANT_DESCRIPTIONS, GgufShardFileInfo, MetadataBaseValue, MetadataValue, RE_GGUF_FILE, RE_GGUF_SHARD_FILE, Version, gguf, ggufAllShards, parseGgufShardFilename };
		export { Architecture, GGMLQuantizationType, GGUFMetadata, GGUFParseOutput, GGUFTensorInfo, GGUFValueType, GGUF_QUANT_DESCRIPTIONS, GgufShardFileInfo, MetadataBaseValue, MetadataValue, RE_GGUF_FILE, RE_GGUF_SHARD_FILE, Version, gguf, ggufAllShards, parseGgufShardFilename };

dist/browser/index.js

		@@ -8,2 +8,38 @@ "use strict";Object.defineProperty(exports, "__esModule", {value: true});var __defProp = Object.defineProperty;

		// src/transformer-llm.ts
		var LLM_ARCHITECTURES = [
		"llama",
		"falcon",
		"grok",
		"gpt2",
		"gptj",
		"gptneox",
		"mpt",
		"baichuan",
		"starcoder",
		"persimmon",
		"refact",
		"bert",
		"nomic-bert",
		"bloom",
		"stablelm",
		"qwen",
		"qwen2",
		"qwen2moe",
		"phi2",
		"phi3",
		"plamo",
		"codeshell",
		"orion",
		"internlm2",
		"minicpm",
		"gemma",
		"starcoder2",
		"mamba",
		"xverse",
		"command-r",
		"dbrx",
		"olmo"
		];

		// src/types.ts
		@@ -51,2 +87,3 @@ var GGMLQuantizationType = /* @__PURE__ */ ((GGMLQuantizationType2) => {
		})(GGUFValueType \|\| {});
		var ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"];

		@@ -205,3 +242,3 @@ // src/utils/promisesQueue.ts
		async fetchChunk() {
		var _a, _b;
		var _a, _b, _c, _d;
		const range = [this.chunk * HTTP_CHUNK_SIZE, (this.chunk + 1) * HTTP_CHUNK_SIZE - 1];
		@@ -211,2 +248,3 @@ const buf = new Uint8Array(
		headers: {
		...(_d = (_c = this.params) == null ? void 0 : _c.additionalFetchHeaders) != null ? _d : {},
		Range: `bytes=${range[0]}-${range[1]}`
		@@ -213,0 +251,0 @@ }

197

dist/index.d.ts

		@@ -0,1 +1,150 @@
		/** This file is auto-generated by generate-llm.ts */

		declare const LLM_ARCHITECTURES: readonly ["llama", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "persimmon", "refact", "bert", "nomic-bert", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "phi2", "phi3", "plamo", "codeshell", "orion", "internlm2", "minicpm", "gemma", "starcoder2", "mamba", "xverse", "command-r", "dbrx", "olmo"];
		type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];
		type Attention<TArchitecture extends string> = {
		[K in `${TArchitecture}.attention.head_count`]: number;
		} & {
		[K in `${TArchitecture}.attention.head_count_kv`]: number;
		} & {
		[K in `${TArchitecture}.attention.layer_norm_epsilon`]: number;
		} & {
		[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number;
		} & {
		[K in `${TArchitecture}.attention.alibi_bias_max`]: number;
		} & {
		[K in `${TArchitecture}.attention.clip_kqv`]: number;
		} & {
		[K in `${TArchitecture}.attention.use_norm`]: number;
		};
		type Rope<TArchitecture extends LLMArchitecture> = {
		[K in `${TArchitecture}.rope.dimension_count`]: number;
		} & {
		[K in `${TArchitecture}.rope.freq_base`]: number;
		} & {
		[K in `${TArchitecture}.rope.scale`]: number;
		} & {
		[K in `${TArchitecture}.rope.scale_linear`]: number;
		};
		type MOE<TArchitecture extends LLMArchitecture> = {
		[K in `${TArchitecture}.expert_count`]: number;
		} & {
		[K in `${TArchitecture}.expert_used_count`]: number;
		};
		type TransformerLLMBase<TArchitecture extends LLMArchitecture> = ModelBase<TArchitecture> & MOE<TArchitecture> & Attention<TArchitecture> & Rope<TArchitecture>;
		declare enum TransformerLLMPoolingType {
		UNSPECIFIED = -1,
		NONE = 0,
		MEAN = 1,
		CLS = 2
		}
		type ArchLlama = TransformerLLMBase<"llama"> & {
		"llama.attention.layer_norm_rms_epsilon": number;
		};
		type ArchFalcon = TransformerLLMBase<"falcon"> & {
		"falcon.attention.layer_norm_epsilon": number;
		};
		type ArchGrok = TransformerLLMBase<"grok"> & {
		"grok.attention.layer_norm_rms_epsilon": number;
		};
		type ArchGpt2 = TransformerLLMBase<"gpt2"> & {
		"gpt2.attention.layer_norm_epsilon": number;
		};
		type ArchGptj = TransformerLLMBase<"gptj">;
		type ArchGptneox = TransformerLLMBase<"gptneox">;
		type ArchMpt = TransformerLLMBase<"mpt"> & {
		"mpt.attention.layer_norm_epsilon": number;
		"mpt.attention.clamp_kqv": number;
		"mpt.attention.max_alibi_bias": number;
		};
		type ArchBaichuan = TransformerLLMBase<"baichuan"> & {
		"baichuan.attention.layer_norm_rms_epsilon": number;
		};
		type ArchStarcoder = TransformerLLMBase<"starcoder"> & {
		"starcoder.attention.layer_norm_epsilon": number;
		};
		type ArchPersimmon = TransformerLLMBase<"persimmon"> & {
		"persimmon.attention.layer_norm_epsilon": number;
		};
		type ArchRefact = TransformerLLMBase<"refact"> & {
		"refact.attention.layer_norm_rms_epsilon": number;
		};
		type ArchBert = TransformerLLMBase<"bert"> & {
		"bert.attention.layer_norm_epsilon": number;
		"bert.attention.causal": boolean;
		"tokenizer.ggml.token_type_count": number;
		"bert.pooling_type": TransformerLLMPoolingType;
		};
		type ArchNomicBert = TransformerLLMBase<"nomic-bert"> & {
		"nomic-bert.attention.layer_norm_epsilon": number;
		"nomic-bert.attention.causal": boolean;
		"tokenizer.ggml.token_type_count": number;
		"nomic-bert.pooling_type": TransformerLLMPoolingType;
		};
		type ArchBloom = TransformerLLMBase<"bloom"> & {
		"bloom.attention.layer_norm_epsilon": number;
		};
		type ArchStablelm = TransformerLLMBase<"stablelm"> & {
		"stablelm.attention.layer_norm_epsilon": number;
		};
		type ArchQwen = TransformerLLMBase<"qwen"> & {
		"qwen.attention.layer_norm_rms_epsilon": number;
		};
		type ArchQwen2 = TransformerLLMBase<"qwen2"> & {
		"qwen2.attention.layer_norm_rms_epsilon": number;
		};
		type ArchQwen2moe = TransformerLLMBase<"qwen2moe"> & {
		"qwen2moe.attention.layer_norm_rms_epsilon": number;
		};
		type ArchPhi2 = TransformerLLMBase<"phi2"> & {
		"phi2.attention.layer_norm_epsilon": number;
		};
		type ArchPhi3 = TransformerLLMBase<"phi3"> & {
		"phi3.attention.layer_norm_rms_epsilon": number;
		};
		type ArchPlamo = TransformerLLMBase<"plamo"> & {
		"plamo.attention.layer_norm_rms_epsilon": number;
		};
		type ArchCodeshell = TransformerLLMBase<"codeshell"> & {
		"codeshell.attention.layer_norm_epsilon": number;
		};
		type ArchOrion = TransformerLLMBase<"orion"> & {
		"orion.attention.layer_norm_epsilon": number;
		};
		type ArchInternlm2 = TransformerLLMBase<"internlm2"> & {
		"internlm2.attention.layer_norm_rms_epsilon": number;
		};
		type ArchMinicpm = TransformerLLMBase<"minicpm"> & {
		"minicpm.attention.layer_norm_rms_epsilon": number;
		};
		type ArchGemma = TransformerLLMBase<"gemma"> & {
		"gemma.attention.layer_norm_rms_epsilon": number;
		};
		type ArchStarcoder2 = TransformerLLMBase<"starcoder2"> & {
		"starcoder2.attention.layer_norm_epsilon": number;
		};
		type ArchMamba = TransformerLLMBase<"mamba"> & {
		"mamba.ssm.conv_kernel": number;
		"mamba.ssm.inner_size": number;
		"mamba.ssm.state_size": number;
		"mamba.ssm.time_step_rank": number;
		"mamba.attention.layer_norm_rms_epsilon": number;
		};
		type ArchXverse = TransformerLLMBase<"xverse"> & {
		"xverse.attention.layer_norm_rms_epsilon": number;
		};
		type ArchCommandR = TransformerLLMBase<"command-r"> & {
		"command-r.logit_scale": number;
		"command-r.attention.layer_norm_epsilon": number;
		};
		type ArchDbrx = TransformerLLMBase<"dbrx"> & {
		"dbrx.attention.layer_norm_epsilon": number;
		"dbrx.attention.clamp_kqv": number;
		};
		type ArchOlmo = TransformerLLMBase<"olmo"> & {
		"olmo.attention.layer_norm_epsilon": number;
		"olmo.attention.clamp_kqv": number;
		};
		type TransformerLLM = ArchLlama \| ArchFalcon \| ArchGrok \| ArchGpt2 \| ArchGptj \| ArchGptneox \| ArchMpt \| ArchBaichuan \| ArchStarcoder \| ArchPersimmon \| ArchRefact \| ArchBert \| ArchNomicBert \| ArchBloom \| ArchStablelm \| ArchQwen \| ArchQwen2 \| ArchQwen2moe \| ArchPhi2 \| ArchPhi3 \| ArchPlamo \| ArchCodeshell \| ArchOrion \| ArchInternlm2 \| ArchMinicpm \| ArchGemma \| ArchStarcoder2 \| ArchMamba \| ArchXverse \| ArchCommandR \| ArchDbrx \| ArchOlmo;

		type MetadataBaseValue = string \| number \| bigint \| boolean;
		@@ -43,3 +192,3 @@ type MetadataValue = MetadataBaseValue \| MetadataBaseValue[] \| MetadataValue[];
		}
		declare const ARCHITECTURES: readonly ["llama", "mpt", "gptneox", "gptj", "gpt2", "bloom", "falcon", "gemma", "rwkv", "whisper"];
		declare const ARCHITECTURES: readonly ["llama", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "persimmon", "refact", "bert", "nomic-bert", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "phi2", "phi3", "plamo", "codeshell", "orion", "internlm2", "minicpm", "gemma", "starcoder2", "mamba", "xverse", "command-r", "dbrx", "olmo", "rwkv", "whisper"];
		type Architecture = (typeof ARCHITECTURES)[number];
		@@ -52,44 +201,16 @@ interface General {
		}
		type Attention<TArchitecture extends Architecture> = {
		[K in `${TArchitecture}.attention.head_count`]: number;
		} \| {
		[K in `${TArchitecture}.attention.head_count_kv`]: number;
		} \| {
		[K in `${TArchitecture}.attention.layer_norm_epsilon`]: number;
		} \| {
		[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number;
		} \| {
		[K in `${TArchitecture}.attention.alibi_bias_max`]: number;
		} \| {
		[K in `${TArchitecture}.attention.clip_kqv`]: number;
		} \| {
		[K in `${TArchitecture}.attention.use_norm`]: number;
		};
		type Rope<TArchitecture extends Architecture> = {
		[K in `${TArchitecture}.rope.dimension_count`]: number;
		} \| {
		[K in `${TArchitecture}.rope.freq_base`]: number;
		} \| {
		[K in `${TArchitecture}.rope.scale`]: number;
		} \| {
		[K in `${TArchitecture}.rope.scale_linear`]: number;
		};
		type ModelBase<TArchitecture extends Architecture \| `encoder.${Extract<Architecture, "whisper">}` \| `decoder.${Extract<Architecture, "whisper">}`> = {
		[K in `${TArchitecture}.layer_count`]: number;
		} \| {
		} & {
		[K in `${TArchitecture}.feed_forward_length`]: number;
		} \| {
		} & {
		[K in `${TArchitecture}.context_length`]: number;
		} \| {
		} & {
		[K in `${TArchitecture}.embedding_length`]: number;
		} \| {
		} & {
		[K in `${TArchitecture}.block_count`]: number;
		};
		type MOE<TArchitecture extends Architecture> = {
		[K in `${TArchitecture}.expert_count`]: number;
		} \| {
		[K in `${TArchitecture}.expert_used_count`]: number;
		};
		type TokenizerModel = "no_vocab" \| "llama" \| "gpt2" \| "bert";
		interface Tokenizer {
		"tokenizer.ggml.model": Architecture;
		"tokenizer.ggml.model": TokenizerModel;
		"tokenizer.ggml.tokens": string[];
		@@ -103,4 +224,2 @@ "tokenizer.ggml.scores": number[];
		}
		type TransformerLLMArchitecture = Exclude<Architecture, "rwkv" \| "whisper">;
		type TransformerLLM = ModelBase<TransformerLLMArchitecture> & MOE<TransformerLLMArchitecture> & Attention<TransformerLLMArchitecture> & Rope<TransformerLLMArchitecture>;
		type RWKV = ModelBase<"rwkv"> & {
		@@ -147,2 +266,3 @@ "rwkv.architecture_version": number;
		fetch?: typeof fetch;
		additionalFetchHeaders?: Record<string, string>;
		computeParametersCount: true;
		@@ -157,2 +277,3 @@ }): Promise<GGUFParseOutput & {
		fetch?: typeof fetch;
		additionalFetchHeaders?: Record<string, string>;
		}): Promise<GGUFParseOutput>;
		@@ -169,2 +290,2 @@ declare function ggufAllShards(url: string, params?: {

		export { GGMLQuantizationType, GGUFMetadata, GGUFParseOutput, GGUFTensorInfo, GGUFValueType, GGUF_QUANT_DESCRIPTIONS, GgufShardFileInfo, MetadataBaseValue, MetadataValue, RE_GGUF_FILE, RE_GGUF_SHARD_FILE, Version, gguf, ggufAllShards, parseGgufShardFilename };
		export { Architecture, GGMLQuantizationType, GGUFMetadata, GGUFParseOutput, GGUFTensorInfo, GGUFValueType, GGUF_QUANT_DESCRIPTIONS, GgufShardFileInfo, MetadataBaseValue, MetadataValue, RE_GGUF_FILE, RE_GGUF_SHARD_FILE, Version, gguf, ggufAllShards, parseGgufShardFilename };

dist/index.js

		@@ -34,2 +34,38 @@ "use strict";

		// src/transformer-llm.ts
		var LLM_ARCHITECTURES = [
		"llama",
		"falcon",
		"grok",
		"gpt2",
		"gptj",
		"gptneox",
		"mpt",
		"baichuan",
		"starcoder",
		"persimmon",
		"refact",
		"bert",
		"nomic-bert",
		"bloom",
		"stablelm",
		"qwen",
		"qwen2",
		"qwen2moe",
		"phi2",
		"phi3",
		"plamo",
		"codeshell",
		"orion",
		"internlm2",
		"minicpm",
		"gemma",
		"starcoder2",
		"mamba",
		"xverse",
		"command-r",
		"dbrx",
		"olmo"
		];

		// src/types.ts
		@@ -77,2 +113,3 @@ var GGMLQuantizationType = /* @__PURE__ */ ((GGMLQuantizationType2) => {
		})(GGUFValueType \|\| {});
		var ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"];

		@@ -235,2 +272,3 @@ // src/utils/promisesQueue.ts
		headers: {
		...this.params?.additionalFetchHeaders ?? {},
		Range: `bytes=${range[0]}-${range[1]}`
		@@ -237,0 +275,0 @@ }

package.json

		{
		"name": "@huggingface/gguf",
		"packageManager": "pnpm@8.10.5",
		"version": "0.1.2",
		"version": "0.1.3",
		"description": "a GGUF parser that works on remotely hosted files",
		@@ -49,2 +49,3 @@ "repository": "https://github.com/huggingface/huggingface.js.git",
		"build": "tsup src/index.ts --format cjs,esm --clean --dts",
		"build:llm": "tsx scripts/generate-llm.ts && pnpm run format",
		"test": "vitest run",
		@@ -51,0 +52,0 @@ "check": "tsc"

src/gguf.ts

		@@ -6,3 +6,3 @@ import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
		export type { MetadataBaseValue, MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
		export { GGUFValueType, GGMLQuantizationType } from "./types";
		export { GGUFValueType, GGMLQuantizationType, Architecture } from "./types";
		export { GGUF_QUANT_DESCRIPTIONS } from "./quant-descriptions";
		@@ -68,2 +68,3 @@
		fetch?: typeof fetch;
		additionalFetchHeaders?: Record<string, string>;
		}
		@@ -87,2 +88,3 @@ ) {
		headers: {
		...(this.params?.additionalFetchHeaders ?? {}),
		Range: `bytes=${range[0]}-${range[1]}`,
		@@ -215,2 +217,3 @@ },
		fetch?: typeof fetch;
		additionalFetchHeaders?: Record<string, string>;
		computeParametersCount: true;
		@@ -226,2 +229,3 @@ }
		fetch?: typeof fetch;
		additionalFetchHeaders?: Record<string, string>;
		}
		@@ -236,2 +240,3 @@ ): Promise<GGUFParseOutput>;
		fetch?: typeof fetch;
		additionalFetchHeaders?: Record<string, string>;
		computeParametersCount?: boolean;
		@@ -238,0 +243,0 @@ }

src/types.ts

		@@ -0,1 +1,4 @@
		import type { TransformerLLM } from "./transformer-llm";
		import { LLM_ARCHITECTURES } from "./transformer-llm";

		export type MetadataBaseValue = string \| number \| bigint \| boolean;
		@@ -47,15 +50,3 @@ export type MetadataValue = MetadataBaseValue \| MetadataBaseValue[] \| MetadataValue[]; /// recursive as arrays can be nested.

		export const ARCHITECTURES = [
		"llama",
		"mpt",
		"gptneox",
		"gptj",
		"gpt2",
		"bloom",
		"falcon",
		"gemma",
		"rwkv",
		"whisper",
		] as const;

		const ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"] as const;
		export type Architecture = (typeof ARCHITECTURES)[number];
		@@ -70,18 +61,3 @@

		type Attention<TArchitecture extends Architecture> =
		\| { [K in `${TArchitecture}.attention.head_count`]: number }
		\| { [K in `${TArchitecture}.attention.head_count_kv`]: number }
		\| { [K in `${TArchitecture}.attention.layer_norm_epsilon`]: number }
		\| { [K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number }
		\| { [K in `${TArchitecture}.attention.alibi_bias_max`]: number }
		\| { [K in `${TArchitecture}.attention.clip_kqv`]: number }
		\| { [K in `${TArchitecture}.attention.use_norm`]: number };

		type Rope<TArchitecture extends Architecture> =
		\| { [K in `${TArchitecture}.rope.dimension_count`]: number }
		\| { [K in `${TArchitecture}.rope.freq_base`]: number }
		\| { [K in `${TArchitecture}.rope.scale`]: number }
		\| { [K in `${TArchitecture}.rope.scale_linear`]: number };

		type ModelBase<
		export type ModelBase<
		TArchitecture extends
		@@ -91,15 +67,9 @@ \| Architecture
		\| `decoder.${Extract<Architecture, "whisper">}`,
		> =
		\| { [K in `${TArchitecture}.layer_count`]: number }
		\| { [K in `${TArchitecture}.feed_forward_length`]: number }
		\| { [K in `${TArchitecture}.context_length`]: number }
		\| { [K in `${TArchitecture}.embedding_length`]: number }
		\| { [K in `${TArchitecture}.block_count`]: number };
		> = { [K in `${TArchitecture}.layer_count`]: number } & { [K in `${TArchitecture}.feed_forward_length`]: number } & {
		[K in `${TArchitecture}.context_length`]: number;
		} & { [K in `${TArchitecture}.embedding_length`]: number } & { [K in `${TArchitecture}.block_count`]: number };

		type MOE<TArchitecture extends Architecture> =
		\| { [K in `${TArchitecture}.expert_count`]: number }
		\| { [K in `${TArchitecture}.expert_used_count`]: number };

		type TokenizerModel = "no_vocab" \| "llama" \| "gpt2" \| "bert";
		interface Tokenizer {
		"tokenizer.ggml.model": Architecture;
		"tokenizer.ggml.model": TokenizerModel;
		"tokenizer.ggml.tokens": string[];
		@@ -114,8 +84,2 @@ "tokenizer.ggml.scores": number[];

		type TransformerLLMArchitecture = Exclude<Architecture, "rwkv" \| "whisper">;
		type TransformerLLM = ModelBase<TransformerLLMArchitecture> &
		MOE<TransformerLLMArchitecture> &
		Attention<TransformerLLMArchitecture> &
		Rope<TransformerLLMArchitecture>;

		export type RWKV = ModelBase<"rwkv"> & { "rwkv.architecture_version": number };
		@@ -122,0 +86,0 @@ export type LLM = TransformerLLM \| RWKV;

dist/browser/index.mjs

Sorry, the diff of this file is not supported yet

dist/index.mjs

Sorry, the diff of this file is not supported yet

@huggingface/gguf - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics