Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@huggingface/gguf

Package Overview
Dependencies
Maintainers
4
Versions
29
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@huggingface/gguf - npm Package Compare versions

Comparing version 0.1.2 to 0.1.3

src/transformer-llm.ts

197

dist/browser/index.d.ts

@@ -0,1 +1,150 @@

/** This file is auto-generated by generate-llm.ts */
declare const LLM_ARCHITECTURES: readonly ["llama", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "persimmon", "refact", "bert", "nomic-bert", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "phi2", "phi3", "plamo", "codeshell", "orion", "internlm2", "minicpm", "gemma", "starcoder2", "mamba", "xverse", "command-r", "dbrx", "olmo"];
type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];
type Attention<TArchitecture extends string> = {
[K in `${TArchitecture}.attention.head_count`]: number;
} & {
[K in `${TArchitecture}.attention.head_count_kv`]: number;
} & {
[K in `${TArchitecture}.attention.layer_norm_epsilon`]: number;
} & {
[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number;
} & {
[K in `${TArchitecture}.attention.alibi_bias_max`]: number;
} & {
[K in `${TArchitecture}.attention.clip_kqv`]: number;
} & {
[K in `${TArchitecture}.attention.use_norm`]: number;
};
type Rope<TArchitecture extends LLMArchitecture> = {
[K in `${TArchitecture}.rope.dimension_count`]: number;
} & {
[K in `${TArchitecture}.rope.freq_base`]: number;
} & {
[K in `${TArchitecture}.rope.scale`]: number;
} & {
[K in `${TArchitecture}.rope.scale_linear`]: number;
};
type MOE<TArchitecture extends LLMArchitecture> = {
[K in `${TArchitecture}.expert_count`]: number;
} & {
[K in `${TArchitecture}.expert_used_count`]: number;
};
type TransformerLLMBase<TArchitecture extends LLMArchitecture> = ModelBase<TArchitecture> & MOE<TArchitecture> & Attention<TArchitecture> & Rope<TArchitecture>;
declare enum TransformerLLMPoolingType {
UNSPECIFIED = -1,
NONE = 0,
MEAN = 1,
CLS = 2
}
type ArchLlama = TransformerLLMBase<"llama"> & {
"llama.attention.layer_norm_rms_epsilon": number;
};
type ArchFalcon = TransformerLLMBase<"falcon"> & {
"falcon.attention.layer_norm_epsilon": number;
};
type ArchGrok = TransformerLLMBase<"grok"> & {
"grok.attention.layer_norm_rms_epsilon": number;
};
type ArchGpt2 = TransformerLLMBase<"gpt2"> & {
"gpt2.attention.layer_norm_epsilon": number;
};
type ArchGptj = TransformerLLMBase<"gptj">;
type ArchGptneox = TransformerLLMBase<"gptneox">;
type ArchMpt = TransformerLLMBase<"mpt"> & {
"mpt.attention.layer_norm_epsilon": number;
"mpt.attention.clamp_kqv": number;
"mpt.attention.max_alibi_bias": number;
};
type ArchBaichuan = TransformerLLMBase<"baichuan"> & {
"baichuan.attention.layer_norm_rms_epsilon": number;
};
type ArchStarcoder = TransformerLLMBase<"starcoder"> & {
"starcoder.attention.layer_norm_epsilon": number;
};
type ArchPersimmon = TransformerLLMBase<"persimmon"> & {
"persimmon.attention.layer_norm_epsilon": number;
};
type ArchRefact = TransformerLLMBase<"refact"> & {
"refact.attention.layer_norm_rms_epsilon": number;
};
type ArchBert = TransformerLLMBase<"bert"> & {
"bert.attention.layer_norm_epsilon": number;
"bert.attention.causal": boolean;
"tokenizer.ggml.token_type_count": number;
"bert.pooling_type": TransformerLLMPoolingType;
};
type ArchNomicBert = TransformerLLMBase<"nomic-bert"> & {
"nomic-bert.attention.layer_norm_epsilon": number;
"nomic-bert.attention.causal": boolean;
"tokenizer.ggml.token_type_count": number;
"nomic-bert.pooling_type": TransformerLLMPoolingType;
};
type ArchBloom = TransformerLLMBase<"bloom"> & {
"bloom.attention.layer_norm_epsilon": number;
};
type ArchStablelm = TransformerLLMBase<"stablelm"> & {
"stablelm.attention.layer_norm_epsilon": number;
};
type ArchQwen = TransformerLLMBase<"qwen"> & {
"qwen.attention.layer_norm_rms_epsilon": number;
};
type ArchQwen2 = TransformerLLMBase<"qwen2"> & {
"qwen2.attention.layer_norm_rms_epsilon": number;
};
type ArchQwen2moe = TransformerLLMBase<"qwen2moe"> & {
"qwen2moe.attention.layer_norm_rms_epsilon": number;
};
type ArchPhi2 = TransformerLLMBase<"phi2"> & {
"phi2.attention.layer_norm_epsilon": number;
};
type ArchPhi3 = TransformerLLMBase<"phi3"> & {
"phi3.attention.layer_norm_rms_epsilon": number;
};
type ArchPlamo = TransformerLLMBase<"plamo"> & {
"plamo.attention.layer_norm_rms_epsilon": number;
};
type ArchCodeshell = TransformerLLMBase<"codeshell"> & {
"codeshell.attention.layer_norm_epsilon": number;
};
type ArchOrion = TransformerLLMBase<"orion"> & {
"orion.attention.layer_norm_epsilon": number;
};
type ArchInternlm2 = TransformerLLMBase<"internlm2"> & {
"internlm2.attention.layer_norm_rms_epsilon": number;
};
type ArchMinicpm = TransformerLLMBase<"minicpm"> & {
"minicpm.attention.layer_norm_rms_epsilon": number;
};
type ArchGemma = TransformerLLMBase<"gemma"> & {
"gemma.attention.layer_norm_rms_epsilon": number;
};
type ArchStarcoder2 = TransformerLLMBase<"starcoder2"> & {
"starcoder2.attention.layer_norm_epsilon": number;
};
type ArchMamba = TransformerLLMBase<"mamba"> & {
"mamba.ssm.conv_kernel": number;
"mamba.ssm.inner_size": number;
"mamba.ssm.state_size": number;
"mamba.ssm.time_step_rank": number;
"mamba.attention.layer_norm_rms_epsilon": number;
};
type ArchXverse = TransformerLLMBase<"xverse"> & {
"xverse.attention.layer_norm_rms_epsilon": number;
};
type ArchCommandR = TransformerLLMBase<"command-r"> & {
"command-r.logit_scale": number;
"command-r.attention.layer_norm_epsilon": number;
};
type ArchDbrx = TransformerLLMBase<"dbrx"> & {
"dbrx.attention.layer_norm_epsilon": number;
"dbrx.attention.clamp_kqv": number;
};
type ArchOlmo = TransformerLLMBase<"olmo"> & {
"olmo.attention.layer_norm_epsilon": number;
"olmo.attention.clamp_kqv": number;
};
type TransformerLLM = ArchLlama | ArchFalcon | ArchGrok | ArchGpt2 | ArchGptj | ArchGptneox | ArchMpt | ArchBaichuan | ArchStarcoder | ArchPersimmon | ArchRefact | ArchBert | ArchNomicBert | ArchBloom | ArchStablelm | ArchQwen | ArchQwen2 | ArchQwen2moe | ArchPhi2 | ArchPhi3 | ArchPlamo | ArchCodeshell | ArchOrion | ArchInternlm2 | ArchMinicpm | ArchGemma | ArchStarcoder2 | ArchMamba | ArchXverse | ArchCommandR | ArchDbrx | ArchOlmo;
type MetadataBaseValue = string | number | bigint | boolean;

@@ -43,3 +192,3 @@ type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[];

}
declare const ARCHITECTURES: readonly ["llama", "mpt", "gptneox", "gptj", "gpt2", "bloom", "falcon", "gemma", "rwkv", "whisper"];
declare const ARCHITECTURES: readonly ["llama", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "persimmon", "refact", "bert", "nomic-bert", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "phi2", "phi3", "plamo", "codeshell", "orion", "internlm2", "minicpm", "gemma", "starcoder2", "mamba", "xverse", "command-r", "dbrx", "olmo", "rwkv", "whisper"];
type Architecture = (typeof ARCHITECTURES)[number];

@@ -52,44 +201,16 @@ interface General {

}
type Attention<TArchitecture extends Architecture> = {
[K in `${TArchitecture}.attention.head_count`]: number;
} | {
[K in `${TArchitecture}.attention.head_count_kv`]: number;
} | {
[K in `${TArchitecture}.attention.layer_norm_epsilon`]: number;
} | {
[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number;
} | {
[K in `${TArchitecture}.attention.alibi_bias_max`]: number;
} | {
[K in `${TArchitecture}.attention.clip_kqv`]: number;
} | {
[K in `${TArchitecture}.attention.use_norm`]: number;
};
type Rope<TArchitecture extends Architecture> = {
[K in `${TArchitecture}.rope.dimension_count`]: number;
} | {
[K in `${TArchitecture}.rope.freq_base`]: number;
} | {
[K in `${TArchitecture}.rope.scale`]: number;
} | {
[K in `${TArchitecture}.rope.scale_linear`]: number;
};
type ModelBase<TArchitecture extends Architecture | `encoder.${Extract<Architecture, "whisper">}` | `decoder.${Extract<Architecture, "whisper">}`> = {
[K in `${TArchitecture}.layer_count`]: number;
} | {
} & {
[K in `${TArchitecture}.feed_forward_length`]: number;
} | {
} & {
[K in `${TArchitecture}.context_length`]: number;
} | {
} & {
[K in `${TArchitecture}.embedding_length`]: number;
} | {
} & {
[K in `${TArchitecture}.block_count`]: number;
};
type MOE<TArchitecture extends Architecture> = {
[K in `${TArchitecture}.expert_count`]: number;
} | {
[K in `${TArchitecture}.expert_used_count`]: number;
};
type TokenizerModel = "no_vocab" | "llama" | "gpt2" | "bert";
interface Tokenizer {
"tokenizer.ggml.model": Architecture;
"tokenizer.ggml.model": TokenizerModel;
"tokenizer.ggml.tokens": string[];

@@ -103,4 +224,2 @@ "tokenizer.ggml.scores": number[];

}
type TransformerLLMArchitecture = Exclude<Architecture, "rwkv" | "whisper">;
type TransformerLLM = ModelBase<TransformerLLMArchitecture> & MOE<TransformerLLMArchitecture> & Attention<TransformerLLMArchitecture> & Rope<TransformerLLMArchitecture>;
type RWKV = ModelBase<"rwkv"> & {

@@ -147,2 +266,3 @@ "rwkv.architecture_version": number;

fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
computeParametersCount: true;

@@ -157,2 +277,3 @@ }): Promise<GGUFParseOutput & {

fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
}): Promise<GGUFParseOutput>;

@@ -169,2 +290,2 @@ declare function ggufAllShards(url: string, params?: {

export { GGMLQuantizationType, GGUFMetadata, GGUFParseOutput, GGUFTensorInfo, GGUFValueType, GGUF_QUANT_DESCRIPTIONS, GgufShardFileInfo, MetadataBaseValue, MetadataValue, RE_GGUF_FILE, RE_GGUF_SHARD_FILE, Version, gguf, ggufAllShards, parseGgufShardFilename };
export { Architecture, GGMLQuantizationType, GGUFMetadata, GGUFParseOutput, GGUFTensorInfo, GGUFValueType, GGUF_QUANT_DESCRIPTIONS, GgufShardFileInfo, MetadataBaseValue, MetadataValue, RE_GGUF_FILE, RE_GGUF_SHARD_FILE, Version, gguf, ggufAllShards, parseGgufShardFilename };

@@ -8,2 +8,38 @@ "use strict";Object.defineProperty(exports, "__esModule", {value: true});var __defProp = Object.defineProperty;

// src/transformer-llm.ts
var LLM_ARCHITECTURES = [
"llama",
"falcon",
"grok",
"gpt2",
"gptj",
"gptneox",
"mpt",
"baichuan",
"starcoder",
"persimmon",
"refact",
"bert",
"nomic-bert",
"bloom",
"stablelm",
"qwen",
"qwen2",
"qwen2moe",
"phi2",
"phi3",
"plamo",
"codeshell",
"orion",
"internlm2",
"minicpm",
"gemma",
"starcoder2",
"mamba",
"xverse",
"command-r",
"dbrx",
"olmo"
];
// src/types.ts

@@ -51,2 +87,3 @@ var GGMLQuantizationType = /* @__PURE__ */ ((GGMLQuantizationType2) => {

})(GGUFValueType || {});
var ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"];

@@ -205,3 +242,3 @@ // src/utils/promisesQueue.ts

async fetchChunk() {
var _a, _b;
var _a, _b, _c, _d;
const range = [this.chunk * HTTP_CHUNK_SIZE, (this.chunk + 1) * HTTP_CHUNK_SIZE - 1];

@@ -211,2 +248,3 @@ const buf = new Uint8Array(

headers: {
...(_d = (_c = this.params) == null ? void 0 : _c.additionalFetchHeaders) != null ? _d : {},
Range: `bytes=${range[0]}-${range[1]}`

@@ -213,0 +251,0 @@ }

@@ -0,1 +1,150 @@

/** This file is auto-generated by generate-llm.ts */
declare const LLM_ARCHITECTURES: readonly ["llama", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "persimmon", "refact", "bert", "nomic-bert", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "phi2", "phi3", "plamo", "codeshell", "orion", "internlm2", "minicpm", "gemma", "starcoder2", "mamba", "xverse", "command-r", "dbrx", "olmo"];
type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];
type Attention<TArchitecture extends string> = {
[K in `${TArchitecture}.attention.head_count`]: number;
} & {
[K in `${TArchitecture}.attention.head_count_kv`]: number;
} & {
[K in `${TArchitecture}.attention.layer_norm_epsilon`]: number;
} & {
[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number;
} & {
[K in `${TArchitecture}.attention.alibi_bias_max`]: number;
} & {
[K in `${TArchitecture}.attention.clip_kqv`]: number;
} & {
[K in `${TArchitecture}.attention.use_norm`]: number;
};
type Rope<TArchitecture extends LLMArchitecture> = {
[K in `${TArchitecture}.rope.dimension_count`]: number;
} & {
[K in `${TArchitecture}.rope.freq_base`]: number;
} & {
[K in `${TArchitecture}.rope.scale`]: number;
} & {
[K in `${TArchitecture}.rope.scale_linear`]: number;
};
type MOE<TArchitecture extends LLMArchitecture> = {
[K in `${TArchitecture}.expert_count`]: number;
} & {
[K in `${TArchitecture}.expert_used_count`]: number;
};
type TransformerLLMBase<TArchitecture extends LLMArchitecture> = ModelBase<TArchitecture> & MOE<TArchitecture> & Attention<TArchitecture> & Rope<TArchitecture>;
declare enum TransformerLLMPoolingType {
UNSPECIFIED = -1,
NONE = 0,
MEAN = 1,
CLS = 2
}
type ArchLlama = TransformerLLMBase<"llama"> & {
"llama.attention.layer_norm_rms_epsilon": number;
};
type ArchFalcon = TransformerLLMBase<"falcon"> & {
"falcon.attention.layer_norm_epsilon": number;
};
type ArchGrok = TransformerLLMBase<"grok"> & {
"grok.attention.layer_norm_rms_epsilon": number;
};
type ArchGpt2 = TransformerLLMBase<"gpt2"> & {
"gpt2.attention.layer_norm_epsilon": number;
};
type ArchGptj = TransformerLLMBase<"gptj">;
type ArchGptneox = TransformerLLMBase<"gptneox">;
type ArchMpt = TransformerLLMBase<"mpt"> & {
"mpt.attention.layer_norm_epsilon": number;
"mpt.attention.clamp_kqv": number;
"mpt.attention.max_alibi_bias": number;
};
type ArchBaichuan = TransformerLLMBase<"baichuan"> & {
"baichuan.attention.layer_norm_rms_epsilon": number;
};
type ArchStarcoder = TransformerLLMBase<"starcoder"> & {
"starcoder.attention.layer_norm_epsilon": number;
};
type ArchPersimmon = TransformerLLMBase<"persimmon"> & {
"persimmon.attention.layer_norm_epsilon": number;
};
type ArchRefact = TransformerLLMBase<"refact"> & {
"refact.attention.layer_norm_rms_epsilon": number;
};
type ArchBert = TransformerLLMBase<"bert"> & {
"bert.attention.layer_norm_epsilon": number;
"bert.attention.causal": boolean;
"tokenizer.ggml.token_type_count": number;
"bert.pooling_type": TransformerLLMPoolingType;
};
type ArchNomicBert = TransformerLLMBase<"nomic-bert"> & {
"nomic-bert.attention.layer_norm_epsilon": number;
"nomic-bert.attention.causal": boolean;
"tokenizer.ggml.token_type_count": number;
"nomic-bert.pooling_type": TransformerLLMPoolingType;
};
type ArchBloom = TransformerLLMBase<"bloom"> & {
"bloom.attention.layer_norm_epsilon": number;
};
type ArchStablelm = TransformerLLMBase<"stablelm"> & {
"stablelm.attention.layer_norm_epsilon": number;
};
type ArchQwen = TransformerLLMBase<"qwen"> & {
"qwen.attention.layer_norm_rms_epsilon": number;
};
type ArchQwen2 = TransformerLLMBase<"qwen2"> & {
"qwen2.attention.layer_norm_rms_epsilon": number;
};
type ArchQwen2moe = TransformerLLMBase<"qwen2moe"> & {
"qwen2moe.attention.layer_norm_rms_epsilon": number;
};
type ArchPhi2 = TransformerLLMBase<"phi2"> & {
"phi2.attention.layer_norm_epsilon": number;
};
type ArchPhi3 = TransformerLLMBase<"phi3"> & {
"phi3.attention.layer_norm_rms_epsilon": number;
};
type ArchPlamo = TransformerLLMBase<"plamo"> & {
"plamo.attention.layer_norm_rms_epsilon": number;
};
type ArchCodeshell = TransformerLLMBase<"codeshell"> & {
"codeshell.attention.layer_norm_epsilon": number;
};
type ArchOrion = TransformerLLMBase<"orion"> & {
"orion.attention.layer_norm_epsilon": number;
};
type ArchInternlm2 = TransformerLLMBase<"internlm2"> & {
"internlm2.attention.layer_norm_rms_epsilon": number;
};
type ArchMinicpm = TransformerLLMBase<"minicpm"> & {
"minicpm.attention.layer_norm_rms_epsilon": number;
};
type ArchGemma = TransformerLLMBase<"gemma"> & {
"gemma.attention.layer_norm_rms_epsilon": number;
};
type ArchStarcoder2 = TransformerLLMBase<"starcoder2"> & {
"starcoder2.attention.layer_norm_epsilon": number;
};
type ArchMamba = TransformerLLMBase<"mamba"> & {
"mamba.ssm.conv_kernel": number;
"mamba.ssm.inner_size": number;
"mamba.ssm.state_size": number;
"mamba.ssm.time_step_rank": number;
"mamba.attention.layer_norm_rms_epsilon": number;
};
type ArchXverse = TransformerLLMBase<"xverse"> & {
"xverse.attention.layer_norm_rms_epsilon": number;
};
type ArchCommandR = TransformerLLMBase<"command-r"> & {
"command-r.logit_scale": number;
"command-r.attention.layer_norm_epsilon": number;
};
type ArchDbrx = TransformerLLMBase<"dbrx"> & {
"dbrx.attention.layer_norm_epsilon": number;
"dbrx.attention.clamp_kqv": number;
};
type ArchOlmo = TransformerLLMBase<"olmo"> & {
"olmo.attention.layer_norm_epsilon": number;
"olmo.attention.clamp_kqv": number;
};
type TransformerLLM = ArchLlama | ArchFalcon | ArchGrok | ArchGpt2 | ArchGptj | ArchGptneox | ArchMpt | ArchBaichuan | ArchStarcoder | ArchPersimmon | ArchRefact | ArchBert | ArchNomicBert | ArchBloom | ArchStablelm | ArchQwen | ArchQwen2 | ArchQwen2moe | ArchPhi2 | ArchPhi3 | ArchPlamo | ArchCodeshell | ArchOrion | ArchInternlm2 | ArchMinicpm | ArchGemma | ArchStarcoder2 | ArchMamba | ArchXverse | ArchCommandR | ArchDbrx | ArchOlmo;
type MetadataBaseValue = string | number | bigint | boolean;

@@ -43,3 +192,3 @@ type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[];

}
declare const ARCHITECTURES: readonly ["llama", "mpt", "gptneox", "gptj", "gpt2", "bloom", "falcon", "gemma", "rwkv", "whisper"];
declare const ARCHITECTURES: readonly ["llama", "falcon", "grok", "gpt2", "gptj", "gptneox", "mpt", "baichuan", "starcoder", "persimmon", "refact", "bert", "nomic-bert", "bloom", "stablelm", "qwen", "qwen2", "qwen2moe", "phi2", "phi3", "plamo", "codeshell", "orion", "internlm2", "minicpm", "gemma", "starcoder2", "mamba", "xverse", "command-r", "dbrx", "olmo", "rwkv", "whisper"];
type Architecture = (typeof ARCHITECTURES)[number];

@@ -52,44 +201,16 @@ interface General {

}
type Attention<TArchitecture extends Architecture> = {
[K in `${TArchitecture}.attention.head_count`]: number;
} | {
[K in `${TArchitecture}.attention.head_count_kv`]: number;
} | {
[K in `${TArchitecture}.attention.layer_norm_epsilon`]: number;
} | {
[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number;
} | {
[K in `${TArchitecture}.attention.alibi_bias_max`]: number;
} | {
[K in `${TArchitecture}.attention.clip_kqv`]: number;
} | {
[K in `${TArchitecture}.attention.use_norm`]: number;
};
type Rope<TArchitecture extends Architecture> = {
[K in `${TArchitecture}.rope.dimension_count`]: number;
} | {
[K in `${TArchitecture}.rope.freq_base`]: number;
} | {
[K in `${TArchitecture}.rope.scale`]: number;
} | {
[K in `${TArchitecture}.rope.scale_linear`]: number;
};
type ModelBase<TArchitecture extends Architecture | `encoder.${Extract<Architecture, "whisper">}` | `decoder.${Extract<Architecture, "whisper">}`> = {
[K in `${TArchitecture}.layer_count`]: number;
} | {
} & {
[K in `${TArchitecture}.feed_forward_length`]: number;
} | {
} & {
[K in `${TArchitecture}.context_length`]: number;
} | {
} & {
[K in `${TArchitecture}.embedding_length`]: number;
} | {
} & {
[K in `${TArchitecture}.block_count`]: number;
};
type MOE<TArchitecture extends Architecture> = {
[K in `${TArchitecture}.expert_count`]: number;
} | {
[K in `${TArchitecture}.expert_used_count`]: number;
};
type TokenizerModel = "no_vocab" | "llama" | "gpt2" | "bert";
interface Tokenizer {
"tokenizer.ggml.model": Architecture;
"tokenizer.ggml.model": TokenizerModel;
"tokenizer.ggml.tokens": string[];

@@ -103,4 +224,2 @@ "tokenizer.ggml.scores": number[];

}
type TransformerLLMArchitecture = Exclude<Architecture, "rwkv" | "whisper">;
type TransformerLLM = ModelBase<TransformerLLMArchitecture> & MOE<TransformerLLMArchitecture> & Attention<TransformerLLMArchitecture> & Rope<TransformerLLMArchitecture>;
type RWKV = ModelBase<"rwkv"> & {

@@ -147,2 +266,3 @@ "rwkv.architecture_version": number;

fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
computeParametersCount: true;

@@ -157,2 +277,3 @@ }): Promise<GGUFParseOutput & {

fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
}): Promise<GGUFParseOutput>;

@@ -169,2 +290,2 @@ declare function ggufAllShards(url: string, params?: {

export { GGMLQuantizationType, GGUFMetadata, GGUFParseOutput, GGUFTensorInfo, GGUFValueType, GGUF_QUANT_DESCRIPTIONS, GgufShardFileInfo, MetadataBaseValue, MetadataValue, RE_GGUF_FILE, RE_GGUF_SHARD_FILE, Version, gguf, ggufAllShards, parseGgufShardFilename };
export { Architecture, GGMLQuantizationType, GGUFMetadata, GGUFParseOutput, GGUFTensorInfo, GGUFValueType, GGUF_QUANT_DESCRIPTIONS, GgufShardFileInfo, MetadataBaseValue, MetadataValue, RE_GGUF_FILE, RE_GGUF_SHARD_FILE, Version, gguf, ggufAllShards, parseGgufShardFilename };

@@ -34,2 +34,38 @@ "use strict";

// src/transformer-llm.ts
var LLM_ARCHITECTURES = [
"llama",
"falcon",
"grok",
"gpt2",
"gptj",
"gptneox",
"mpt",
"baichuan",
"starcoder",
"persimmon",
"refact",
"bert",
"nomic-bert",
"bloom",
"stablelm",
"qwen",
"qwen2",
"qwen2moe",
"phi2",
"phi3",
"plamo",
"codeshell",
"orion",
"internlm2",
"minicpm",
"gemma",
"starcoder2",
"mamba",
"xverse",
"command-r",
"dbrx",
"olmo"
];
// src/types.ts

@@ -77,2 +113,3 @@ var GGMLQuantizationType = /* @__PURE__ */ ((GGMLQuantizationType2) => {

})(GGUFValueType || {});
var ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"];

@@ -235,2 +272,3 @@ // src/utils/promisesQueue.ts

headers: {
...this.params?.additionalFetchHeaders ?? {},
Range: `bytes=${range[0]}-${range[1]}`

@@ -237,0 +275,0 @@ }

3

package.json
{
"name": "@huggingface/gguf",
"packageManager": "pnpm@8.10.5",
"version": "0.1.2",
"version": "0.1.3",
"description": "a GGUF parser that works on remotely hosted files",

@@ -49,2 +49,3 @@ "repository": "https://github.com/huggingface/huggingface.js.git",

"build": "tsup src/index.ts --format cjs,esm --clean --dts",
"build:llm": "tsx scripts/generate-llm.ts && pnpm run format",
"test": "vitest run",

@@ -51,0 +52,0 @@ "check": "tsc"

@@ -6,3 +6,3 @@ import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";

export type { MetadataBaseValue, MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
export { GGUFValueType, GGMLQuantizationType } from "./types";
export { GGUFValueType, GGMLQuantizationType, Architecture } from "./types";
export { GGUF_QUANT_DESCRIPTIONS } from "./quant-descriptions";

@@ -68,2 +68,3 @@

fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
}

@@ -87,2 +88,3 @@ ) {

headers: {
...(this.params?.additionalFetchHeaders ?? {}),
Range: `bytes=${range[0]}-${range[1]}`,

@@ -215,2 +217,3 @@ },

fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
computeParametersCount: true;

@@ -226,2 +229,3 @@ }

fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
}

@@ -236,2 +240,3 @@ ): Promise<GGUFParseOutput>;

fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
computeParametersCount?: boolean;

@@ -238,0 +243,0 @@ }

@@ -0,1 +1,4 @@

import type { TransformerLLM } from "./transformer-llm";
import { LLM_ARCHITECTURES } from "./transformer-llm";
export type MetadataBaseValue = string | number | bigint | boolean;

@@ -47,15 +50,3 @@ export type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested.

export const ARCHITECTURES = [
"llama",
"mpt",
"gptneox",
"gptj",
"gpt2",
"bloom",
"falcon",
"gemma",
"rwkv",
"whisper",
] as const;
const ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"] as const;
export type Architecture = (typeof ARCHITECTURES)[number];

@@ -70,18 +61,3 @@

type Attention<TArchitecture extends Architecture> =
| { [K in `${TArchitecture}.attention.head_count`]: number }
| { [K in `${TArchitecture}.attention.head_count_kv`]: number }
| { [K in `${TArchitecture}.attention.layer_norm_epsilon`]: number }
| { [K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number }
| { [K in `${TArchitecture}.attention.alibi_bias_max`]: number }
| { [K in `${TArchitecture}.attention.clip_kqv`]: number }
| { [K in `${TArchitecture}.attention.use_norm`]: number };
type Rope<TArchitecture extends Architecture> =
| { [K in `${TArchitecture}.rope.dimension_count`]: number }
| { [K in `${TArchitecture}.rope.freq_base`]: number }
| { [K in `${TArchitecture}.rope.scale`]: number }
| { [K in `${TArchitecture}.rope.scale_linear`]: number };
type ModelBase<
export type ModelBase<
TArchitecture extends

@@ -91,15 +67,9 @@ | Architecture

| `decoder.${Extract<Architecture, "whisper">}`,
> =
| { [K in `${TArchitecture}.layer_count`]: number }
| { [K in `${TArchitecture}.feed_forward_length`]: number }
| { [K in `${TArchitecture}.context_length`]: number }
| { [K in `${TArchitecture}.embedding_length`]: number }
| { [K in `${TArchitecture}.block_count`]: number };
> = { [K in `${TArchitecture}.layer_count`]: number } & { [K in `${TArchitecture}.feed_forward_length`]: number } & {
[K in `${TArchitecture}.context_length`]: number;
} & { [K in `${TArchitecture}.embedding_length`]: number } & { [K in `${TArchitecture}.block_count`]: number };
type MOE<TArchitecture extends Architecture> =
| { [K in `${TArchitecture}.expert_count`]: number }
| { [K in `${TArchitecture}.expert_used_count`]: number };
type TokenizerModel = "no_vocab" | "llama" | "gpt2" | "bert";
interface Tokenizer {
"tokenizer.ggml.model": Architecture;
"tokenizer.ggml.model": TokenizerModel;
"tokenizer.ggml.tokens": string[];

@@ -114,8 +84,2 @@ "tokenizer.ggml.scores": number[];

type TransformerLLMArchitecture = Exclude<Architecture, "rwkv" | "whisper">;
type TransformerLLM = ModelBase<TransformerLLMArchitecture> &
MOE<TransformerLLMArchitecture> &
Attention<TransformerLLMArchitecture> &
Rope<TransformerLLMArchitecture>;
export type RWKV = ModelBase<"rwkv"> & { "rwkv.architecture_version": number };

@@ -122,0 +86,0 @@ export type LLM = TransformerLLM | RWKV;

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc