@huggingface/gguf
Advanced tools
Comparing version 0.1.2 to 0.1.3
@@ -0,1 +1,150 @@ | ||
/** This file is auto-generated by generate-llm.ts */ | ||
declare const LLM_ARCHITECTURES: readonly ["llama", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "persimmon", "refact", "bert", "nomic-bert", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "phi2", "phi3", "plamo", "codeshell", "orion", "internlm2", "minicpm", "gemma", "starcoder2", "mamba", "xverse", "command-r", "dbrx", "olmo"]; | ||
type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number]; | ||
type Attention<TArchitecture extends string> = { | ||
[K in `${TArchitecture}.attention.head_count`]: number; | ||
} & { | ||
[K in `${TArchitecture}.attention.head_count_kv`]: number; | ||
} & { | ||
[K in `${TArchitecture}.attention.layer_norm_epsilon`]: number; | ||
} & { | ||
[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number; | ||
} & { | ||
[K in `${TArchitecture}.attention.alibi_bias_max`]: number; | ||
} & { | ||
[K in `${TArchitecture}.attention.clip_kqv`]: number; | ||
} & { | ||
[K in `${TArchitecture}.attention.use_norm`]: number; | ||
}; | ||
type Rope<TArchitecture extends LLMArchitecture> = { | ||
[K in `${TArchitecture}.rope.dimension_count`]: number; | ||
} & { | ||
[K in `${TArchitecture}.rope.freq_base`]: number; | ||
} & { | ||
[K in `${TArchitecture}.rope.scale`]: number; | ||
} & { | ||
[K in `${TArchitecture}.rope.scale_linear`]: number; | ||
}; | ||
type MOE<TArchitecture extends LLMArchitecture> = { | ||
[K in `${TArchitecture}.expert_count`]: number; | ||
} & { | ||
[K in `${TArchitecture}.expert_used_count`]: number; | ||
}; | ||
type TransformerLLMBase<TArchitecture extends LLMArchitecture> = ModelBase<TArchitecture> & MOE<TArchitecture> & Attention<TArchitecture> & Rope<TArchitecture>; | ||
declare enum TransformerLLMPoolingType { | ||
UNSPECIFIED = -1, | ||
NONE = 0, | ||
MEAN = 1, | ||
CLS = 2 | ||
} | ||
type ArchLlama = TransformerLLMBase<"llama"> & { | ||
"llama.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchFalcon = TransformerLLMBase<"falcon"> & { | ||
"falcon.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchGrok = TransformerLLMBase<"grok"> & { | ||
"grok.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchGpt2 = TransformerLLMBase<"gpt2"> & { | ||
"gpt2.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchGptj = TransformerLLMBase<"gptj">; | ||
type ArchGptneox = TransformerLLMBase<"gptneox">; | ||
type ArchMpt = TransformerLLMBase<"mpt"> & { | ||
"mpt.attention.layer_norm_epsilon": number; | ||
"mpt.attention.clamp_kqv": number; | ||
"mpt.attention.max_alibi_bias": number; | ||
}; | ||
type ArchBaichuan = TransformerLLMBase<"baichuan"> & { | ||
"baichuan.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchStarcoder = TransformerLLMBase<"starcoder"> & { | ||
"starcoder.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchPersimmon = TransformerLLMBase<"persimmon"> & { | ||
"persimmon.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchRefact = TransformerLLMBase<"refact"> & { | ||
"refact.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchBert = TransformerLLMBase<"bert"> & { | ||
"bert.attention.layer_norm_epsilon": number; | ||
"bert.attention.causal": boolean; | ||
"tokenizer.ggml.token_type_count": number; | ||
"bert.pooling_type": TransformerLLMPoolingType; | ||
}; | ||
type ArchNomicBert = TransformerLLMBase<"nomic-bert"> & { | ||
"nomic-bert.attention.layer_norm_epsilon": number; | ||
"nomic-bert.attention.causal": boolean; | ||
"tokenizer.ggml.token_type_count": number; | ||
"nomic-bert.pooling_type": TransformerLLMPoolingType; | ||
}; | ||
type ArchBloom = TransformerLLMBase<"bloom"> & { | ||
"bloom.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchStablelm = TransformerLLMBase<"stablelm"> & { | ||
"stablelm.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchQwen = TransformerLLMBase<"qwen"> & { | ||
"qwen.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchQwen2 = TransformerLLMBase<"qwen2"> & { | ||
"qwen2.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchQwen2moe = TransformerLLMBase<"qwen2moe"> & { | ||
"qwen2moe.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchPhi2 = TransformerLLMBase<"phi2"> & { | ||
"phi2.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchPhi3 = TransformerLLMBase<"phi3"> & { | ||
"phi3.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchPlamo = TransformerLLMBase<"plamo"> & { | ||
"plamo.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchCodeshell = TransformerLLMBase<"codeshell"> & { | ||
"codeshell.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchOrion = TransformerLLMBase<"orion"> & { | ||
"orion.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchInternlm2 = TransformerLLMBase<"internlm2"> & { | ||
"internlm2.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchMinicpm = TransformerLLMBase<"minicpm"> & { | ||
"minicpm.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchGemma = TransformerLLMBase<"gemma"> & { | ||
"gemma.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchStarcoder2 = TransformerLLMBase<"starcoder2"> & { | ||
"starcoder2.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchMamba = TransformerLLMBase<"mamba"> & { | ||
"mamba.ssm.conv_kernel": number; | ||
"mamba.ssm.inner_size": number; | ||
"mamba.ssm.state_size": number; | ||
"mamba.ssm.time_step_rank": number; | ||
"mamba.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchXverse = TransformerLLMBase<"xverse"> & { | ||
"xverse.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchCommandR = TransformerLLMBase<"command-r"> & { | ||
"command-r.logit_scale": number; | ||
"command-r.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchDbrx = TransformerLLMBase<"dbrx"> & { | ||
"dbrx.attention.layer_norm_epsilon": number; | ||
"dbrx.attention.clamp_kqv": number; | ||
}; | ||
type ArchOlmo = TransformerLLMBase<"olmo"> & { | ||
"olmo.attention.layer_norm_epsilon": number; | ||
"olmo.attention.clamp_kqv": number; | ||
}; | ||
type TransformerLLM = ArchLlama | ArchFalcon | ArchGrok | ArchGpt2 | ArchGptj | ArchGptneox | ArchMpt | ArchBaichuan | ArchStarcoder | ArchPersimmon | ArchRefact | ArchBert | ArchNomicBert | ArchBloom | ArchStablelm | ArchQwen | ArchQwen2 | ArchQwen2moe | ArchPhi2 | ArchPhi3 | ArchPlamo | ArchCodeshell | ArchOrion | ArchInternlm2 | ArchMinicpm | ArchGemma | ArchStarcoder2 | ArchMamba | ArchXverse | ArchCommandR | ArchDbrx | ArchOlmo; | ||
type MetadataBaseValue = string | number | bigint | boolean; | ||
@@ -43,3 +192,3 @@ type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; | ||
} | ||
declare const ARCHITECTURES: readonly ["llama", "mpt", "gptneox", "gptj", "gpt2", "bloom", "falcon", "gemma", "rwkv", "whisper"]; | ||
declare const ARCHITECTURES: readonly ["llama", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "persimmon", "refact", "bert", "nomic-bert", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "phi2", "phi3", "plamo", "codeshell", "orion", "internlm2", "minicpm", "gemma", "starcoder2", "mamba", "xverse", "command-r", "dbrx", "olmo", "rwkv", "whisper"]; | ||
type Architecture = (typeof ARCHITECTURES)[number]; | ||
@@ -52,44 +201,16 @@ interface General { | ||
} | ||
type Attention<TArchitecture extends Architecture> = { | ||
[K in `${TArchitecture}.attention.head_count`]: number; | ||
} | { | ||
[K in `${TArchitecture}.attention.head_count_kv`]: number; | ||
} | { | ||
[K in `${TArchitecture}.attention.layer_norm_epsilon`]: number; | ||
} | { | ||
[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number; | ||
} | { | ||
[K in `${TArchitecture}.attention.alibi_bias_max`]: number; | ||
} | { | ||
[K in `${TArchitecture}.attention.clip_kqv`]: number; | ||
} | { | ||
[K in `${TArchitecture}.attention.use_norm`]: number; | ||
}; | ||
type Rope<TArchitecture extends Architecture> = { | ||
[K in `${TArchitecture}.rope.dimension_count`]: number; | ||
} | { | ||
[K in `${TArchitecture}.rope.freq_base`]: number; | ||
} | { | ||
[K in `${TArchitecture}.rope.scale`]: number; | ||
} | { | ||
[K in `${TArchitecture}.rope.scale_linear`]: number; | ||
}; | ||
type ModelBase<TArchitecture extends Architecture | `encoder.${Extract<Architecture, "whisper">}` | `decoder.${Extract<Architecture, "whisper">}`> = { | ||
[K in `${TArchitecture}.layer_count`]: number; | ||
} | { | ||
} & { | ||
[K in `${TArchitecture}.feed_forward_length`]: number; | ||
} | { | ||
} & { | ||
[K in `${TArchitecture}.context_length`]: number; | ||
} | { | ||
} & { | ||
[K in `${TArchitecture}.embedding_length`]: number; | ||
} | { | ||
} & { | ||
[K in `${TArchitecture}.block_count`]: number; | ||
}; | ||
type MOE<TArchitecture extends Architecture> = { | ||
[K in `${TArchitecture}.expert_count`]: number; | ||
} | { | ||
[K in `${TArchitecture}.expert_used_count`]: number; | ||
}; | ||
type TokenizerModel = "no_vocab" | "llama" | "gpt2" | "bert"; | ||
interface Tokenizer { | ||
"tokenizer.ggml.model": Architecture; | ||
"tokenizer.ggml.model": TokenizerModel; | ||
"tokenizer.ggml.tokens": string[]; | ||
@@ -103,4 +224,2 @@ "tokenizer.ggml.scores": number[]; | ||
} | ||
type TransformerLLMArchitecture = Exclude<Architecture, "rwkv" | "whisper">; | ||
type TransformerLLM = ModelBase<TransformerLLMArchitecture> & MOE<TransformerLLMArchitecture> & Attention<TransformerLLMArchitecture> & Rope<TransformerLLMArchitecture>; | ||
type RWKV = ModelBase<"rwkv"> & { | ||
@@ -147,2 +266,3 @@ "rwkv.architecture_version": number; | ||
fetch?: typeof fetch; | ||
additionalFetchHeaders?: Record<string, string>; | ||
computeParametersCount: true; | ||
@@ -157,2 +277,3 @@ }): Promise<GGUFParseOutput & { | ||
fetch?: typeof fetch; | ||
additionalFetchHeaders?: Record<string, string>; | ||
}): Promise<GGUFParseOutput>; | ||
@@ -169,2 +290,2 @@ declare function ggufAllShards(url: string, params?: { | ||
export { GGMLQuantizationType, GGUFMetadata, GGUFParseOutput, GGUFTensorInfo, GGUFValueType, GGUF_QUANT_DESCRIPTIONS, GgufShardFileInfo, MetadataBaseValue, MetadataValue, RE_GGUF_FILE, RE_GGUF_SHARD_FILE, Version, gguf, ggufAllShards, parseGgufShardFilename }; | ||
export { Architecture, GGMLQuantizationType, GGUFMetadata, GGUFParseOutput, GGUFTensorInfo, GGUFValueType, GGUF_QUANT_DESCRIPTIONS, GgufShardFileInfo, MetadataBaseValue, MetadataValue, RE_GGUF_FILE, RE_GGUF_SHARD_FILE, Version, gguf, ggufAllShards, parseGgufShardFilename }; |
@@ -8,2 +8,38 @@ "use strict";Object.defineProperty(exports, "__esModule", {value: true});var __defProp = Object.defineProperty; | ||
// src/transformer-llm.ts | ||
var LLM_ARCHITECTURES = [ | ||
"llama", | ||
"falcon", | ||
"grok", | ||
"gpt2", | ||
"gptj", | ||
"gptneox", | ||
"mpt", | ||
"baichuan", | ||
"starcoder", | ||
"persimmon", | ||
"refact", | ||
"bert", | ||
"nomic-bert", | ||
"bloom", | ||
"stablelm", | ||
"qwen", | ||
"qwen2", | ||
"qwen2moe", | ||
"phi2", | ||
"phi3", | ||
"plamo", | ||
"codeshell", | ||
"orion", | ||
"internlm2", | ||
"minicpm", | ||
"gemma", | ||
"starcoder2", | ||
"mamba", | ||
"xverse", | ||
"command-r", | ||
"dbrx", | ||
"olmo" | ||
]; | ||
// src/types.ts | ||
@@ -51,2 +87,3 @@ var GGMLQuantizationType = /* @__PURE__ */ ((GGMLQuantizationType2) => { | ||
})(GGUFValueType || {}); | ||
var ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"]; | ||
@@ -205,3 +242,3 @@ // src/utils/promisesQueue.ts | ||
async fetchChunk() { | ||
var _a, _b; | ||
var _a, _b, _c, _d; | ||
const range = [this.chunk * HTTP_CHUNK_SIZE, (this.chunk + 1) * HTTP_CHUNK_SIZE - 1]; | ||
@@ -211,2 +248,3 @@ const buf = new Uint8Array( | ||
headers: { | ||
...(_d = (_c = this.params) == null ? void 0 : _c.additionalFetchHeaders) != null ? _d : {}, | ||
Range: `bytes=${range[0]}-${range[1]}` | ||
@@ -213,0 +251,0 @@ } |
@@ -0,1 +1,150 @@ | ||
/** This file is auto-generated by generate-llm.ts */ | ||
declare const LLM_ARCHITECTURES: readonly ["llama", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "persimmon", "refact", "bert", "nomic-bert", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "phi2", "phi3", "plamo", "codeshell", "orion", "internlm2", "minicpm", "gemma", "starcoder2", "mamba", "xverse", "command-r", "dbrx", "olmo"]; | ||
type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number]; | ||
type Attention<TArchitecture extends string> = { | ||
[K in `${TArchitecture}.attention.head_count`]: number; | ||
} & { | ||
[K in `${TArchitecture}.attention.head_count_kv`]: number; | ||
} & { | ||
[K in `${TArchitecture}.attention.layer_norm_epsilon`]: number; | ||
} & { | ||
[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number; | ||
} & { | ||
[K in `${TArchitecture}.attention.alibi_bias_max`]: number; | ||
} & { | ||
[K in `${TArchitecture}.attention.clip_kqv`]: number; | ||
} & { | ||
[K in `${TArchitecture}.attention.use_norm`]: number; | ||
}; | ||
type Rope<TArchitecture extends LLMArchitecture> = { | ||
[K in `${TArchitecture}.rope.dimension_count`]: number; | ||
} & { | ||
[K in `${TArchitecture}.rope.freq_base`]: number; | ||
} & { | ||
[K in `${TArchitecture}.rope.scale`]: number; | ||
} & { | ||
[K in `${TArchitecture}.rope.scale_linear`]: number; | ||
}; | ||
type MOE<TArchitecture extends LLMArchitecture> = { | ||
[K in `${TArchitecture}.expert_count`]: number; | ||
} & { | ||
[K in `${TArchitecture}.expert_used_count`]: number; | ||
}; | ||
type TransformerLLMBase<TArchitecture extends LLMArchitecture> = ModelBase<TArchitecture> & MOE<TArchitecture> & Attention<TArchitecture> & Rope<TArchitecture>; | ||
declare enum TransformerLLMPoolingType { | ||
UNSPECIFIED = -1, | ||
NONE = 0, | ||
MEAN = 1, | ||
CLS = 2 | ||
} | ||
type ArchLlama = TransformerLLMBase<"llama"> & { | ||
"llama.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchFalcon = TransformerLLMBase<"falcon"> & { | ||
"falcon.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchGrok = TransformerLLMBase<"grok"> & { | ||
"grok.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchGpt2 = TransformerLLMBase<"gpt2"> & { | ||
"gpt2.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchGptj = TransformerLLMBase<"gptj">; | ||
type ArchGptneox = TransformerLLMBase<"gptneox">; | ||
type ArchMpt = TransformerLLMBase<"mpt"> & { | ||
"mpt.attention.layer_norm_epsilon": number; | ||
"mpt.attention.clamp_kqv": number; | ||
"mpt.attention.max_alibi_bias": number; | ||
}; | ||
type ArchBaichuan = TransformerLLMBase<"baichuan"> & { | ||
"baichuan.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchStarcoder = TransformerLLMBase<"starcoder"> & { | ||
"starcoder.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchPersimmon = TransformerLLMBase<"persimmon"> & { | ||
"persimmon.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchRefact = TransformerLLMBase<"refact"> & { | ||
"refact.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchBert = TransformerLLMBase<"bert"> & { | ||
"bert.attention.layer_norm_epsilon": number; | ||
"bert.attention.causal": boolean; | ||
"tokenizer.ggml.token_type_count": number; | ||
"bert.pooling_type": TransformerLLMPoolingType; | ||
}; | ||
type ArchNomicBert = TransformerLLMBase<"nomic-bert"> & { | ||
"nomic-bert.attention.layer_norm_epsilon": number; | ||
"nomic-bert.attention.causal": boolean; | ||
"tokenizer.ggml.token_type_count": number; | ||
"nomic-bert.pooling_type": TransformerLLMPoolingType; | ||
}; | ||
type ArchBloom = TransformerLLMBase<"bloom"> & { | ||
"bloom.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchStablelm = TransformerLLMBase<"stablelm"> & { | ||
"stablelm.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchQwen = TransformerLLMBase<"qwen"> & { | ||
"qwen.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchQwen2 = TransformerLLMBase<"qwen2"> & { | ||
"qwen2.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchQwen2moe = TransformerLLMBase<"qwen2moe"> & { | ||
"qwen2moe.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchPhi2 = TransformerLLMBase<"phi2"> & { | ||
"phi2.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchPhi3 = TransformerLLMBase<"phi3"> & { | ||
"phi3.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchPlamo = TransformerLLMBase<"plamo"> & { | ||
"plamo.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchCodeshell = TransformerLLMBase<"codeshell"> & { | ||
"codeshell.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchOrion = TransformerLLMBase<"orion"> & { | ||
"orion.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchInternlm2 = TransformerLLMBase<"internlm2"> & { | ||
"internlm2.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchMinicpm = TransformerLLMBase<"minicpm"> & { | ||
"minicpm.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchGemma = TransformerLLMBase<"gemma"> & { | ||
"gemma.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchStarcoder2 = TransformerLLMBase<"starcoder2"> & { | ||
"starcoder2.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchMamba = TransformerLLMBase<"mamba"> & { | ||
"mamba.ssm.conv_kernel": number; | ||
"mamba.ssm.inner_size": number; | ||
"mamba.ssm.state_size": number; | ||
"mamba.ssm.time_step_rank": number; | ||
"mamba.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchXverse = TransformerLLMBase<"xverse"> & { | ||
"xverse.attention.layer_norm_rms_epsilon": number; | ||
}; | ||
type ArchCommandR = TransformerLLMBase<"command-r"> & { | ||
"command-r.logit_scale": number; | ||
"command-r.attention.layer_norm_epsilon": number; | ||
}; | ||
type ArchDbrx = TransformerLLMBase<"dbrx"> & { | ||
"dbrx.attention.layer_norm_epsilon": number; | ||
"dbrx.attention.clamp_kqv": number; | ||
}; | ||
type ArchOlmo = TransformerLLMBase<"olmo"> & { | ||
"olmo.attention.layer_norm_epsilon": number; | ||
"olmo.attention.clamp_kqv": number; | ||
}; | ||
type TransformerLLM = ArchLlama | ArchFalcon | ArchGrok | ArchGpt2 | ArchGptj | ArchGptneox | ArchMpt | ArchBaichuan | ArchStarcoder | ArchPersimmon | ArchRefact | ArchBert | ArchNomicBert | ArchBloom | ArchStablelm | ArchQwen | ArchQwen2 | ArchQwen2moe | ArchPhi2 | ArchPhi3 | ArchPlamo | ArchCodeshell | ArchOrion | ArchInternlm2 | ArchMinicpm | ArchGemma | ArchStarcoder2 | ArchMamba | ArchXverse | ArchCommandR | ArchDbrx | ArchOlmo; | ||
type MetadataBaseValue = string | number | bigint | boolean; | ||
@@ -43,3 +192,3 @@ type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; | ||
} | ||
declare const ARCHITECTURES: readonly ["llama", "mpt", "gptneox", "gptj", "gpt2", "bloom", "falcon", "gemma", "rwkv", "whisper"]; | ||
declare const ARCHITECTURES: readonly ["llama", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "persimmon", "refact", "bert", "nomic-bert", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "phi2", "phi3", "plamo", "codeshell", "orion", "internlm2", "minicpm", "gemma", "starcoder2", "mamba", "xverse", "command-r", "dbrx", "olmo", "rwkv", "whisper"]; | ||
type Architecture = (typeof ARCHITECTURES)[number]; | ||
@@ -52,44 +201,16 @@ interface General { | ||
} | ||
type Attention<TArchitecture extends Architecture> = { | ||
[K in `${TArchitecture}.attention.head_count`]: number; | ||
} | { | ||
[K in `${TArchitecture}.attention.head_count_kv`]: number; | ||
} | { | ||
[K in `${TArchitecture}.attention.layer_norm_epsilon`]: number; | ||
} | { | ||
[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number; | ||
} | { | ||
[K in `${TArchitecture}.attention.alibi_bias_max`]: number; | ||
} | { | ||
[K in `${TArchitecture}.attention.clip_kqv`]: number; | ||
} | { | ||
[K in `${TArchitecture}.attention.use_norm`]: number; | ||
}; | ||
type Rope<TArchitecture extends Architecture> = { | ||
[K in `${TArchitecture}.rope.dimension_count`]: number; | ||
} | { | ||
[K in `${TArchitecture}.rope.freq_base`]: number; | ||
} | { | ||
[K in `${TArchitecture}.rope.scale`]: number; | ||
} | { | ||
[K in `${TArchitecture}.rope.scale_linear`]: number; | ||
}; | ||
type ModelBase<TArchitecture extends Architecture | `encoder.${Extract<Architecture, "whisper">}` | `decoder.${Extract<Architecture, "whisper">}`> = { | ||
[K in `${TArchitecture}.layer_count`]: number; | ||
} | { | ||
} & { | ||
[K in `${TArchitecture}.feed_forward_length`]: number; | ||
} | { | ||
} & { | ||
[K in `${TArchitecture}.context_length`]: number; | ||
} | { | ||
} & { | ||
[K in `${TArchitecture}.embedding_length`]: number; | ||
} | { | ||
} & { | ||
[K in `${TArchitecture}.block_count`]: number; | ||
}; | ||
type MOE<TArchitecture extends Architecture> = { | ||
[K in `${TArchitecture}.expert_count`]: number; | ||
} | { | ||
[K in `${TArchitecture}.expert_used_count`]: number; | ||
}; | ||
type TokenizerModel = "no_vocab" | "llama" | "gpt2" | "bert"; | ||
interface Tokenizer { | ||
"tokenizer.ggml.model": Architecture; | ||
"tokenizer.ggml.model": TokenizerModel; | ||
"tokenizer.ggml.tokens": string[]; | ||
@@ -103,4 +224,2 @@ "tokenizer.ggml.scores": number[]; | ||
} | ||
type TransformerLLMArchitecture = Exclude<Architecture, "rwkv" | "whisper">; | ||
type TransformerLLM = ModelBase<TransformerLLMArchitecture> & MOE<TransformerLLMArchitecture> & Attention<TransformerLLMArchitecture> & Rope<TransformerLLMArchitecture>; | ||
type RWKV = ModelBase<"rwkv"> & { | ||
@@ -147,2 +266,3 @@ "rwkv.architecture_version": number; | ||
fetch?: typeof fetch; | ||
additionalFetchHeaders?: Record<string, string>; | ||
computeParametersCount: true; | ||
@@ -157,2 +277,3 @@ }): Promise<GGUFParseOutput & { | ||
fetch?: typeof fetch; | ||
additionalFetchHeaders?: Record<string, string>; | ||
}): Promise<GGUFParseOutput>; | ||
@@ -169,2 +290,2 @@ declare function ggufAllShards(url: string, params?: { | ||
export { GGMLQuantizationType, GGUFMetadata, GGUFParseOutput, GGUFTensorInfo, GGUFValueType, GGUF_QUANT_DESCRIPTIONS, GgufShardFileInfo, MetadataBaseValue, MetadataValue, RE_GGUF_FILE, RE_GGUF_SHARD_FILE, Version, gguf, ggufAllShards, parseGgufShardFilename }; | ||
export { Architecture, GGMLQuantizationType, GGUFMetadata, GGUFParseOutput, GGUFTensorInfo, GGUFValueType, GGUF_QUANT_DESCRIPTIONS, GgufShardFileInfo, MetadataBaseValue, MetadataValue, RE_GGUF_FILE, RE_GGUF_SHARD_FILE, Version, gguf, ggufAllShards, parseGgufShardFilename }; |
@@ -34,2 +34,38 @@ "use strict"; | ||
// src/transformer-llm.ts | ||
var LLM_ARCHITECTURES = [ | ||
"llama", | ||
"falcon", | ||
"grok", | ||
"gpt2", | ||
"gptj", | ||
"gptneox", | ||
"mpt", | ||
"baichuan", | ||
"starcoder", | ||
"persimmon", | ||
"refact", | ||
"bert", | ||
"nomic-bert", | ||
"bloom", | ||
"stablelm", | ||
"qwen", | ||
"qwen2", | ||
"qwen2moe", | ||
"phi2", | ||
"phi3", | ||
"plamo", | ||
"codeshell", | ||
"orion", | ||
"internlm2", | ||
"minicpm", | ||
"gemma", | ||
"starcoder2", | ||
"mamba", | ||
"xverse", | ||
"command-r", | ||
"dbrx", | ||
"olmo" | ||
]; | ||
// src/types.ts | ||
@@ -77,2 +113,3 @@ var GGMLQuantizationType = /* @__PURE__ */ ((GGMLQuantizationType2) => { | ||
})(GGUFValueType || {}); | ||
var ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"]; | ||
@@ -235,2 +272,3 @@ // src/utils/promisesQueue.ts | ||
headers: { | ||
...this.params?.additionalFetchHeaders ?? {}, | ||
Range: `bytes=${range[0]}-${range[1]}` | ||
@@ -237,0 +275,0 @@ } |
{ | ||
"name": "@huggingface/gguf", | ||
"packageManager": "pnpm@8.10.5", | ||
"version": "0.1.2", | ||
"version": "0.1.3", | ||
"description": "a GGUF parser that works on remotely hosted files", | ||
@@ -49,2 +49,3 @@ "repository": "https://github.com/huggingface/huggingface.js.git", | ||
"build": "tsup src/index.ts --format cjs,esm --clean --dts", | ||
"build:llm": "tsx scripts/generate-llm.ts && pnpm run format", | ||
"test": "vitest run", | ||
@@ -51,0 +52,0 @@ "check": "tsc" |
@@ -6,3 +6,3 @@ import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types"; | ||
export type { MetadataBaseValue, MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types"; | ||
export { GGUFValueType, GGMLQuantizationType } from "./types"; | ||
export { GGUFValueType, GGMLQuantizationType, Architecture } from "./types"; | ||
export { GGUF_QUANT_DESCRIPTIONS } from "./quant-descriptions"; | ||
@@ -68,2 +68,3 @@ | ||
fetch?: typeof fetch; | ||
additionalFetchHeaders?: Record<string, string>; | ||
} | ||
@@ -87,2 +88,3 @@ ) { | ||
headers: { | ||
...(this.params?.additionalFetchHeaders ?? {}), | ||
Range: `bytes=${range[0]}-${range[1]}`, | ||
@@ -215,2 +217,3 @@ }, | ||
fetch?: typeof fetch; | ||
additionalFetchHeaders?: Record<string, string>; | ||
computeParametersCount: true; | ||
@@ -226,2 +229,3 @@ } | ||
fetch?: typeof fetch; | ||
additionalFetchHeaders?: Record<string, string>; | ||
} | ||
@@ -236,2 +240,3 @@ ): Promise<GGUFParseOutput>; | ||
fetch?: typeof fetch; | ||
additionalFetchHeaders?: Record<string, string>; | ||
computeParametersCount?: boolean; | ||
@@ -238,0 +243,0 @@ } |
@@ -0,1 +1,4 @@ | ||
import type { TransformerLLM } from "./transformer-llm"; | ||
import { LLM_ARCHITECTURES } from "./transformer-llm"; | ||
export type MetadataBaseValue = string | number | bigint | boolean; | ||
@@ -47,15 +50,3 @@ export type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested. | ||
export const ARCHITECTURES = [ | ||
"llama", | ||
"mpt", | ||
"gptneox", | ||
"gptj", | ||
"gpt2", | ||
"bloom", | ||
"falcon", | ||
"gemma", | ||
"rwkv", | ||
"whisper", | ||
] as const; | ||
const ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"] as const; | ||
export type Architecture = (typeof ARCHITECTURES)[number]; | ||
@@ -70,18 +61,3 @@ | ||
type Attention<TArchitecture extends Architecture> = | ||
| { [K in `${TArchitecture}.attention.head_count`]: number } | ||
| { [K in `${TArchitecture}.attention.head_count_kv`]: number } | ||
| { [K in `${TArchitecture}.attention.layer_norm_epsilon`]: number } | ||
| { [K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number } | ||
| { [K in `${TArchitecture}.attention.alibi_bias_max`]: number } | ||
| { [K in `${TArchitecture}.attention.clip_kqv`]: number } | ||
| { [K in `${TArchitecture}.attention.use_norm`]: number }; | ||
type Rope<TArchitecture extends Architecture> = | ||
| { [K in `${TArchitecture}.rope.dimension_count`]: number } | ||
| { [K in `${TArchitecture}.rope.freq_base`]: number } | ||
| { [K in `${TArchitecture}.rope.scale`]: number } | ||
| { [K in `${TArchitecture}.rope.scale_linear`]: number }; | ||
type ModelBase< | ||
export type ModelBase< | ||
TArchitecture extends | ||
@@ -91,15 +67,9 @@ | Architecture | ||
| `decoder.${Extract<Architecture, "whisper">}`, | ||
> = | ||
| { [K in `${TArchitecture}.layer_count`]: number } | ||
| { [K in `${TArchitecture}.feed_forward_length`]: number } | ||
| { [K in `${TArchitecture}.context_length`]: number } | ||
| { [K in `${TArchitecture}.embedding_length`]: number } | ||
| { [K in `${TArchitecture}.block_count`]: number }; | ||
> = { [K in `${TArchitecture}.layer_count`]: number } & { [K in `${TArchitecture}.feed_forward_length`]: number } & { | ||
[K in `${TArchitecture}.context_length`]: number; | ||
} & { [K in `${TArchitecture}.embedding_length`]: number } & { [K in `${TArchitecture}.block_count`]: number }; | ||
type MOE<TArchitecture extends Architecture> = | ||
| { [K in `${TArchitecture}.expert_count`]: number } | ||
| { [K in `${TArchitecture}.expert_used_count`]: number }; | ||
type TokenizerModel = "no_vocab" | "llama" | "gpt2" | "bert"; | ||
interface Tokenizer { | ||
"tokenizer.ggml.model": Architecture; | ||
"tokenizer.ggml.model": TokenizerModel; | ||
"tokenizer.ggml.tokens": string[]; | ||
@@ -114,8 +84,2 @@ "tokenizer.ggml.scores": number[]; | ||
type TransformerLLMArchitecture = Exclude<Architecture, "rwkv" | "whisper">; | ||
type TransformerLLM = ModelBase<TransformerLLMArchitecture> & | ||
MOE<TransformerLLMArchitecture> & | ||
Attention<TransformerLLMArchitecture> & | ||
Rope<TransformerLLMArchitecture>; | ||
export type RWKV = ModelBase<"rwkv"> & { "rwkv.architecture_version": number }; | ||
@@ -122,0 +86,0 @@ export type LLM = TransformerLLM | RWKV; |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
140269
17
3391