@xenova/transformers
Advanced tools
Comparing version 1.0.3 to 1.0.4
{ | ||
"name": "@xenova/transformers", | ||
"version": "1.0.3", | ||
"version": "1.0.4", | ||
"description": "Run 🤗 Transformers in your browser! We currently support BERT, DistilBERT, T5, GPT2, and BART models, for a variety of tasks including: masked language modelling, text classification, translation, summarization, question answering, and text generation.", | ||
@@ -5,0 +5,0 @@ "main": "./src/transformers.js", |
@@ -7,6 +7,42 @@ # Transformers.js | ||
Check out our demo at [https://xenova.github.io/transformers.js/](https://xenova.github.io/transformers.js/). As you'll see, everything runs inside the browser! | ||
## Getting Started | ||
### Installation | ||
If you use [npm](https://www.npmjs.com/package/@xenova/transformers), you can install it using: | ||
```bash | ||
npm i @xenova/transformers | ||
``` | ||
Alternatively, you can use it in a `<script>` tag from a CDN, for example: | ||
```html | ||
<!-- Using jsDelivr --> | ||
<script src="https://cdn.jsdelivr.net/npm/@xenova/transformers/dist/transformers.min.js"></script> | ||
<!-- or UNPKG --> | ||
<script src="https://www.unpkg.com/@xenova/transformers/dist/transformers.min.js"></script> | ||
``` | ||
### Setup | ||
By default, Transformers.js uses hosted [models](https://huggingface.co/Xenova/transformers.js/tree/main/quantized) precompiled [WASM binaries](https://cdn.jsdelivr.net/npm/@xenova/transformers/dist/), which should work out-of-the-box. You can override this behaviour as follows: | ||
```javascript | ||
import { env } from "@xenova/transformers"; | ||
// Use a different host for models. | ||
// - `remoteURL` defaults to use the HuggingFace Hub | ||
// - `localURL` defaults to '/models/onnx/quantized/' | ||
env.remoteURL = 'https://www.example.com/'; | ||
env.localURL = '/path/to/models/'; | ||
// Set whether to use remote or local models. Defaults to true. | ||
// - If true, use the path specified by `env.remoteURL`. | ||
// - If false, use the path specified by `env.localURL`. | ||
env.remoteModels = false; | ||
// Set parent path of .wasm files. Defaults to use a CDN. | ||
env.onnx.wasm.wasmPaths = '/path/to/files/'; | ||
``` | ||
### Quick tour | ||
It's super easy to translate from existing code! | ||
@@ -37,19 +73,8 @@ | ||
*Note:* If running locally, it is assumed that the required model files are located in `./models/onnx/quantized/`. To override this behaviour, you can specify the model path or URL as a second argument to the pipeline function. For example, to use models from the HuggingFace hub: | ||
*Note:* In the same way as the Python library, you can use a different model by providing its name as the second argument to the pipeline function. For example: | ||
```javascript | ||
// Set host, model_id and task: | ||
const hf_url = 'https://huggingface.co/Xenova/transformers.js/resolve/main/quantized'; | ||
const model_id = 'distilbert-base-uncased-finetuned-sst-2-english'; | ||
const task = 'sequence-classification'; | ||
const model_url = `${hf_url}/${model_id}/${task}`; | ||
// You can now create the classifier using: | ||
let classifier = await pipeline('sentiment-analysis', model_url); | ||
// Use a different model for sentiment-analysis | ||
let classifier = await pipeline('sentiment-analysis', 'nlptown/bert-base-multilingual-uncased-sentiment'); | ||
``` | ||
## Demo | ||
Check out our demo at [https://xenova.github.io/transformers.js/](https://xenova.github.io/transformers.js/). As you'll see, everything runs inside the browser! | ||
## Usage | ||
@@ -56,0 +81,0 @@ |
@@ -5,3 +5,4 @@ const { | ||
getTopItems, | ||
cos_sim | ||
cos_sim, | ||
pathJoin | ||
} = require("./utils.js"); | ||
@@ -21,5 +22,6 @@ | ||
const { | ||
env | ||
} = require('./env.js') | ||
const HF_HUB_MODEL_PATH_TEMPLATE = 'https://huggingface.co/Xenova/transformers.js/resolve/main/quantized/{model}/{task}/'; | ||
const DEFAULT_MODEL_PATH_TEMPLATE = '/models/onnx/quantized/{model}/{task}'; | ||
@@ -329,7 +331,17 @@ class Pipeline extends Callable { | ||
const TASK_NAME_MAPPING = { | ||
// Fix mismatch between pipeline name and exports (folder name) | ||
// Fix mismatch between pipeline's task name and exports (folder name) | ||
'text-classification': 'sequence-classification', | ||
'embeddings': 'default' | ||
'embeddings': 'default', | ||
'fill-mask': 'masked-lm', | ||
'summarization': 'seq2seq-lm-with-past', | ||
'text-generation': 'causal-lm-with-past', | ||
} | ||
const TASK_PREFIX_MAPPING = { | ||
// if task starts with one of these, set the corresponding folder name | ||
'translation': 'seq2seq-lm-with-past', | ||
} | ||
const TASK_ALIASES = { | ||
@@ -346,3 +358,2 @@ "sentiment-analysis": "text-classification", | ||
{ | ||
default_model_path_template = DEFAULT_MODEL_PATH_TEMPLATE, | ||
progress_callback = null | ||
@@ -362,12 +373,33 @@ } = {} | ||
// Use model if specified, otherwise, use default | ||
let modelPath = model; | ||
if (!modelPath) { | ||
modelPath = default_model_path_template | ||
.replace('{model}', pipelineInfo.default.model) | ||
.replace('{task}', TASK_NAME_MAPPING[task] ?? task); | ||
if (!model) { | ||
model = pipelineInfo.default.model | ||
console.log(`No model specified. Using default model: "${model}".`); | ||
} | ||
console.log(`No model specified. Attempting to load default model from ${default_model_path_template}.`); | ||
// determine suffix | ||
let suffix = TASK_NAME_MAPPING[task]; | ||
if (!suffix) { | ||
// try get from suffix | ||
for (const [prefix, mapping] of Object.entries(TASK_PREFIX_MAPPING)) { | ||
if (task.startsWith(prefix)) { | ||
suffix = mapping; | ||
break; | ||
} | ||
} | ||
} | ||
if (!suffix) { | ||
// Still not set... so, we default to the name given | ||
suffix = task; | ||
} | ||
// Construct model path | ||
model = pathJoin( | ||
(env.remoteModels) ? env.remoteURL : env.localURL, // host prefix | ||
model, // model name | ||
suffix, // task suffix | ||
) | ||
let modelClass = pipelineInfo.model; | ||
@@ -378,4 +410,4 @@ let pipelineClass = pipelineInfo.pipeline; | ||
let [pipelineTokenizer, pipelineModel] = await Promise.all([ | ||
AutoTokenizer.from_pretrained(modelPath, progress_callback), | ||
modelClass.from_pretrained(modelPath, progress_callback) | ||
AutoTokenizer.from_pretrained(model, progress_callback), | ||
modelClass.from_pretrained(model, progress_callback) | ||
]) | ||
@@ -382,0 +414,0 @@ |
@@ -21,4 +21,5 @@ | ||
} = require("./pipelines.js"); | ||
const { env } = require('onnxruntime-web'); | ||
const { env } = require('./env.js'); | ||
const moduleExports = { | ||
@@ -44,3 +45,3 @@ // Tokenizers | ||
// onnx runtime web env | ||
// environment variables | ||
env | ||
@@ -47,0 +48,0 @@ }; |
const fs = require('fs'); | ||
// Use caching when available | ||
const CACHE_AVAILABLE = typeof self !== 'undefined' && 'caches' in self; | ||
const FS_AVAILABLE = !isEmpty(fs); // check if file system is available | ||
const { env } = require('./env.js'); | ||
@@ -118,3 +116,3 @@ class FileResponse { | ||
if (FS_AVAILABLE && !isValidHttpUrl(url)) { | ||
if (env.useFS && !isValidHttpUrl(url)) { | ||
return new FileResponse(url) | ||
@@ -131,6 +129,2 @@ | ||
function isEmpty(obj) { | ||
return Object.keys(obj).length === 0; | ||
} | ||
async function getModelFile(modelPath, fileName, progressCallback = null) { | ||
@@ -146,3 +140,3 @@ | ||
let cache; | ||
if (CACHE_AVAILABLE) { | ||
if (env.useCache) { | ||
cache = await caches.open('transformers-cache'); | ||
@@ -156,3 +150,3 @@ } | ||
if (!CACHE_AVAILABLE || (response = await cache.match(request)) === undefined) { | ||
if (!env.useCache || (response = await cache.match(request)) === undefined) { | ||
// Caching not available, or model is not cached, so we perform the request | ||
@@ -165,3 +159,3 @@ response = await getFile(request); | ||
if (CACHE_AVAILABLE) { | ||
if (env.useCache) { | ||
// only clone if cache available | ||
@@ -168,0 +162,0 @@ responseToCache = response.clone(); |
@@ -6,3 +6,2 @@ | ||
const path = require('path') | ||
const { pipeline, env } = require('..') | ||
@@ -12,11 +11,8 @@ | ||
// This is done by setting numThreads to 1 | ||
env.wasm.numThreads = 1 | ||
env.onnx.wasm.numThreads = 1 | ||
// Set base model dir for testing. | ||
const BASE_MODEL_DIR = path.join(path.dirname(__dirname), '/models/onnx/quantized/') | ||
// Only use local models | ||
env.remoteModels = false; | ||
// env.remoteModels = true; // Uncomment to test online | ||
// Uncomment to test online | ||
// const BASE_MODEL_DIR = 'https://huggingface.co/Xenova/transformers.js/resolve/main/quantized/' | ||
function isDeepEqual(obj1, obj2, { | ||
@@ -60,6 +56,5 @@ tol = 1e-3 | ||
async function embeddings() { | ||
let model_path = path.join(BASE_MODEL_DIR, 'sentence-transformers/all-MiniLM-L6-v2/default') | ||
// Load embeddings pipeline (uses by default) | ||
let embedder = await pipeline('embeddings', model_path) | ||
let embedder = await pipeline('embeddings', 'sentence-transformers/all-MiniLM-L6-v2') | ||
@@ -94,6 +89,4 @@ | ||
async function text_classification() { | ||
let model_path = path.join(BASE_MODEL_DIR, 'distilbert-base-uncased-finetuned-sst-2-english/sequence-classification') | ||
let classifier = await pipeline('text-classification', 'distilbert-base-uncased-finetuned-sst-2-english'); | ||
let classifier = await pipeline('text-classification', model_path); | ||
let outputs1 = await classifier("I hated the movie"); | ||
@@ -144,5 +137,4 @@ | ||
async function masked_language_modelling() { | ||
let model_path = path.join(BASE_MODEL_DIR, 'bert-base-uncased/masked-lm') | ||
let unmasker = await pipeline('fill-mask', model_path); | ||
let unmasker = await pipeline('fill-mask', 'bert-base-uncased'); | ||
@@ -184,3 +176,2 @@ let outputs1 = await unmasker("Once upon a [MASK]."); | ||
async function question_answering() { | ||
let model_path = path.join(BASE_MODEL_DIR, 'distilbert-base-uncased-distilled-squad/question-answering') | ||
@@ -190,3 +181,3 @@ let question = 'Who was Jim Henson?' | ||
let answerer = await pipeline('question-answering', model_path); | ||
let answerer = await pipeline('question-answering', 'distilbert-base-uncased-distilled-squad'); | ||
let outputs = await answerer(question, context); | ||
@@ -212,5 +203,4 @@ | ||
async function summarization() { | ||
let model_path = path.join(BASE_MODEL_DIR, 'sshleifer/distilbart-cnn-6-6/seq2seq-lm-with-past') | ||
let summarizer = await pipeline('summarization', model_path) | ||
let summarizer = await pipeline('summarization', 'sshleifer/distilbart-cnn-6-6') | ||
@@ -233,5 +223,4 @@ let texts = [ | ||
async function translation() { | ||
let model_path = path.join(BASE_MODEL_DIR, 't5-small/seq2seq-lm-with-past') | ||
let translator = await pipeline('translation_en_to_de', model_path) | ||
let translator = await pipeline('translation_en_to_de', 't5-small') | ||
@@ -260,5 +249,4 @@ let translation1 = await translator('Hello, how are you?') | ||
async function text_generation() { | ||
let model_path = path.join(BASE_MODEL_DIR, 'distilgpt2/causal-lm-with-past') | ||
let generator = await pipeline('text-generation', model_path) | ||
let generator = await pipeline('text-generation', 'distilgpt2') | ||
@@ -317,2 +305,2 @@ let output1 = await generator('Once upon a time, there was a', { | ||
console.log('Embeddings:', await embeddings()) | ||
})(); | ||
})(); |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
41909071
19
10676
110
2