Security News
tea.xyz Spam Plagues npm and RubyGems Package Registries
Tea.xyz, a crypto project aimed at rewarding open source contributions, is once again facing backlash due to an influx of spam packages flooding public package registries.
word2vector
Advanced tools
Changelog
v2.2.1
Readme
This is a Node.js interface for Google's word2vector.
Here is an example of how to load large model like GoogleNews-vectors-negative300.bin by this package.
Linux, Unix OS are supported. Install it via npm:
npm install word2vector --save
In Node.js, require the module as below:
var w2v = require( 'word2vector' );
train load getVector getVectors getSimilarWords getNeighbors similarity substract add
Click here to see example TrainFile format.
Example:
var w2v = require("./lib");
var trainFile = "./data/train.data",
modelFile = "./data/test.model.bin";
w2v.train(trainFile, modelFile, {
cbow: 1, // use the continuous bag of words model //default
size: 10, // sets the size (dimension) of word vectors // default 100
window: 8, // sets maximal skip length between words // default 5
binary: 1, // save the resulting vectors in binary mode // default off
negative: 25, // number of negative examples; common values are 3 - 10 (0 = not used) // default 5
hs: 0, // 1 = use Hierarchical Softmax // default 0
sample: 1e-4,
threads: 20,
iter: 15,
minCount: 1, // This will discard words that appear less than *minCount* times // default 5
logOn: false // sets whether any output should be printed to the console // default false
});
Should load model before calling any calcuation functions.
Params | Description | Default Value |
---|---|---|
readType | Model format, pass "utf-8" if using a raw text model. | "bin" |
var w2v = require("../lib");
var modelFile = "./test.model.bin";
w2v.load( modelFile );
// console.log(w2v.getSimilarWordsWords());
Params | Description | Default Value |
---|---|---|
word | String to be searched. | "word" |
'use strict';
var w2v = require("./lib");
var modelFile = "./data/test.model.bin";
w2v.load( modelFile );
console.log(w2v.getVector("孫悟空"));
console.log(w2v.getVector("李洵"));
Sample Output:
// Array Type Only
[ 0.104406,
-0.160019,
-0.604506,
-0.622804,
0.039482,
-0.120058,
0.073555,
0.05646,
0.099059,
-0.419282 ]
null // Return null if this word is not in model.
Params | Description | Default Value |
---|---|---|
words | Array of strings to be searched. | "word" |
var w2v = require("./lib");
var modelFile = "./data/test.model.bin";
w2v.load( modelFile );
console.log(w2v.getVectors(["孫悟空", "李洵"]));
Sample Output:
[ { word: '孫悟空',
vector:
[ 0.104406,
-0.160019,
-0.604506,
-0.622804,
0.039482,
-0.120058,
0.073555,
0.05646,
0.099059,
-0.419282 ] },
{ word: '李洵', vector: null } ]
// this will trigger a error log in console:
//'李洵' is not found in the model.
Params | Description | Default Value |
---|---|---|
word | Strings to be searched. | "word" |
options.N | return topN results | Array |
var w2v = require("./lib");
var modelFile = "./data/test.model.bin";
w2v.load( modelFile );
console.log(w2v.getSimilarWords("唐三藏"));
console.log(w2v.getSimilarWords("李洵"));
Sample Output:
// Array Type
[ { word: '孫悟空', similarity: 0.974369 },
{ word: '吳承恩', similarity: 0.96686 },
{ word: '林黛玉', similarity: 0.966664 },
{ word: '北地', similarity: 0.96264 },
{ word: '賈寶玉', similarity: 0.962137 },
{ word: '楚霸王', similarity: 0.955795 },
{ word: '梁山泊', similarity: 0.932804 },
{ word: '濮陽', similarity: 0.927542 },
{ word: '黃天霸', similarity: 0.927459 },
{ word: '英雄豪傑', similarity: 0.921575 }]
// Return empty [] if this word is not in model.
'李洵' is not found in the model.
[]
Params | Description | Default Value |
---|---|---|
vector | Vector to be searched. | "word" |
options.N | return topN results | Array |
var w2v = require("./lib");
var modelFile = "./data/test.model.bin";
w2v.load( modelFile );
var a = w2v.getNeighbors(w2v.getVector("唐三藏"), {N: 9});
// These are equal to use w2v.getSimilarWords("唐三藏");
console.log(a);
Sample Output1:
[ { word: '唐三藏', similarity: 0.9999993515200001 },
{ word: '孫悟空', similarity: 0.974368825898 },
{ word: '吳承恩', similarity: 0.966859435824 },
{ word: '林黛玉', similarity: 0.966663471323 },
{ word: '北地', similarity: 0.962639240211 },
{ word: '賈寶玉', similarity: 0.9621371820049999 },
{ word: '楚霸王', similarity: 0.9557946924850002 },
{ word: '梁山泊', similarity: 0.9328033548890001 },
{ word: '濮陽', similarity: 0.9275417727409999 } ]
{ '唐三藏': 0.9999993515200001,
'孫悟空': 0.974368825898,
'吳承恩': 0.966859435824,
'林黛玉': 0.966663471323,
'北地': 0.962639240211,
'賈寶玉': 0.9621371820049999,
'楚霸王': 0.9557946924850002,
'梁山泊': 0.9328033548890001,
'濮陽': 0.9275417727409999 }
Compute the [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) between the two vector. Will auto search the vector of passed word in model. Return false if it's not found.
Params | Description | Default Value |
---|---|---|
word1 | First Strings to be compared. | No default value |
word2 | Second Strings to be compared. | No default value |
vector1 | First Vector to be compared. | No default value |
vector2 | Second Vector to be compared. | No default value |
'use strict';
var w2v = require("./lib");
var modelFile = "./data/test.model.bin";
w2v.load( modelFile );
var a = w2v.similarity("唐三藏", "孫悟空"); // 0.974368825898
console.log(a);
var b = w2v.similarity("唐三藏", "李洵"); // 0.974368825898
// same as var b = w2v.similarity("唐三藏", w2v.getVector("李洵"));
// same as var b = w2v.similarity(w2v.getVector("唐三藏"), "李洵");
// same as var b = w2v.similarity(w2v.getVector("唐三藏"), w2v.getVector("李洵"));
console.log(b);
Sample Output:
0.974368825898
// '李洵' is not found in the model. // error alert in console
false
Substract vector1 from vector2. Will auto search the vector of passed word in model. Return false if it's not found.
Params | Description | Default Value |
---|---|---|
word1 | Subtrahend | No default value |
word2 | Minuend | No default value |
Example:
'use strict';
var w2v = require("./lib");
var modelFile = "./data/test.model.bin";
w2v.load( modelFile );
var a = w2v.substract("孫悟空", "孫悟空");
console.log(a);
Sample Output:
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
Add vector1 to vector2. Will auto search the vector of passed word in model. Return false if it's not found.
Params | Description | Default Value |
---|---|---|
word1 | Summand | No default value |
word2 | Addend | No default value |
Example:
'use strict';
var w2v = require("./lib");
var modelFile = "./data/test.model.bin";
w2v.load( modelFile );
var a = w2v.add("孫悟空", "孫悟空");
var b = w2v.getVector("孫悟空");
console.log(a);
console.log(b);
Sample Output:
[ 0.208812,
-0.320038,
-1.209012,
-1.245608,
0.078964,
-0.240116,
0.14711,
0.11292,
0.198118,
-0.838564 ]
[ 0.104406,
-0.160019,
-0.604506,
-0.622804,
0.039482,
-0.120058,
0.073555,
0.05646,
0.099059,
-0.419282 ]
FAQs
a word2vector interface for nodejs
The npm package word2vector receives a total of 61 weekly downloads. As such, word2vector popularity was classified as not popular.
We found that word2vector demonstrated a not healthy version release cadence and project activity because the last version was released a year ago. It has 1 open source maintainer collaborating on the project.
Did you know?
Socket for GitHub automatically highlights issues in each pull request and monitors the health of all your open source dependencies. Discover the contents of your packages and block harmful activity before you install or update your dependencies.
Security News
Tea.xyz, a crypto project aimed at rewarding open source contributions, is once again facing backlash due to an influx of spam packages flooding public package registries.
Security News
As cyber threats become more autonomous, AI-powered defenses are crucial for businesses to stay ahead of attackers who can exploit software vulnerabilities at scale.
Security News
UnitedHealth Group disclosed that the ransomware attack on Change Healthcare compromised protected health information for millions in the U.S., with estimated costs to the company expected to reach $1 billion.