Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

qminer

Package Overview
Dependencies
Maintainers
2
Versions
105
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

qminer - npm Package Compare versions

Comparing version 0.1.23 to 0.1.25

examples/twitter/2015-02-09T11-34

5

package.json
{
"name": "qminer",
"version": "0.1.23",
"version": "0.1.25",
"description": "A C++ based data analytics platform for processing large-scale real-time streams containing structured and unstructured data",

@@ -32,5 +32,6 @@ "author": "Blaz Fortuna <blaz@blazfortuna.com>",

"engines": {
"node": "0.11.14"
"node": "0.12.0"
},
"dependencies": {
"sget": "~0.1.5",
"bindings": "~1.2.1"

@@ -37,0 +38,0 @@ },

260

src/nodejs/scripts/analytics.js

@@ -11,3 +11,7 @@ // typical use case: pathPrefix = 'Release' or pathPrefix = 'Debug'. Empty argument is supported as well (the first binary that the bindings finds will be used)

var qm_util = require(__dirname + '/qm_util.js');
var sget = require('sget');
function defarg(arg, defaultval) {
return arg == undefined ? defaultval : arg;
}

@@ -23,3 +27,2 @@ function createBatchModel(featureSpace, models) {

// save list
debugger
sout.writeLine(this.models);

@@ -197,3 +200,258 @@ // save feature space

//#- `alModel = analytics.newActiveLearner(query, qRecSet, fRecSet, ftrSpace, settings)` -- initializes the
//# active learning. The algorihm is run by calling `model.startLoop()`. The algorithm has two stages: query mode, where the algorithm suggests potential
//# positive and negative examples based on the query text, and SVM mode, where the algorithm keeps
//# selecting examples that are closest to the SVM margin (every time an example is labeled, the SVM
//# is retrained.
//# The inputs are: query (text), record set `qRecSet`, record set `fRecSet`, the feature space `ftrSpace` and a
//# `settings`JSON object. The settings object specifies:`textField` (string) which is the name
//# of the field in records that is used to create feature vectors, `nPos` (integer) and `nNeg` (integer) set the number of positive and negative
//# examples that have to be identified in the query mode before the program enters SVM mode.
//# We can set two additional parameters `querySampleSize` and `randomSampleSize` which specify the sizes of subsamples of qRecSet and fRecSet, where the rest of the data is ignored in the active learning.
//# Final parameters are all SVM parameters (c, j, batchSize, maxIterations, maxTime, minDiff, verbose).
exports.newActiveLearner = function (query, qRecSet, fRecSet, ftrSpace, stts) {
return new exports.ActiveLearner(query, qRecSet, fRecSet, ftrSpace, stts);
}
exports.ActiveLearner = function (query, qRecSet, fRecSet, ftrSpace, stts) {
var settings = defarg(stts, {});
settings.nPos = defarg(stts.nPos, 2);
settings.nNeg = defarg(stts.nNeg, 2);
settings.textField = defarg(stts.textField, "Text");
settings.querySampleSize = defarg(stts.querySampleSize, -1);
settings.randomSampleSize = defarg(stts.randomSampleSize, -1);
settings.c = defarg(stts.c, 1.0);
settings.j = defarg(stts.j, 1.0);
settings.batchSize = defarg(stts.batchSize, 100);
settings.maxIterations = defarg(stts.maxIterations, 100000);
settings.maxTime = defarg(stts.maxTime, 1); // 1 second for computation by default
settings.minDiff = defarg(stts.minDiff, 1e-6);
settings.verbose = defarg(stts.verbose, false);
// compute features or provide them
settings.extractFeatures = defarg(stts.extractFeatures, true);
if (!settings.extractFeatures) {
if (stts.uMat == null) { throw 'settings uMat not provided, extractFeatures = false'; }
if (stts.uRecSet == null) { throw 'settings uRecSet not provided, extractFeatures = false'; }
if (stts.querySpVec == null) { throw 'settings querySpVec not provided, extractFeatures = false'; }
}
// QUERY MODE
var queryMode = true;
// bow similarity between query and training set
var querySpVec;
var uRecSet;
var uMat;
if (settings.extractFeatures) {
var temp = {}; temp[settings.textField] = query;
var queryRec = qRecSet.store.newRec(temp); // record
querySpVec = ftrSpace.ftrSpVec(queryRec);
// use sampling?
var sq = qRecSet;
if (settings.querySampleSize >= 0 && qRecSet != undefined) {
sq = qRecSet.sample(settings.querySampleSize);
}
var sf = fRecSet;
if (settings.randomSampleSize >= 0 && fRecSet != undefined) {
sf = fRecSet.sample(settings.randomSampleSize);
}
// take a union or just qset or just fset if some are undefined
uRecSet = (sq != undefined) ? ((sf != undefined) ? sq.setunion(sf) : sq) : sf;
if (uRecSet == undefined) { throw 'undefined record set for active learning!';}
uMat = ftrSpace.ftrSpColMat(uRecSet);
} else {
querySpVec = stts.querySpVec;
uRecSet = stts.uRecSet;
uMat = stts.uMat;
}
querySpVec.normalize();
uMat.normalizeCols();
var X = new la.SparseMatrix();
var y = new la.Vector();
var simV = uMat.multiplyT(querySpVec); //similarities (q, recSet)
var sortedSimV = simV.sortPerm(); //ascending sort
var simVs = sortedSimV.vec; //sorted similarities (q, recSet)
var simVp = sortedSimV.perm; //permutation of sorted similarities (q, recSet)
//// counters for questions in query mode
var nPosQ = 0; //for traversing simVp from the end
var nNegQ = 0; //for traversing simVp from the start
// SVM MODE
var svm;
var posIdxV = new la.IntVector(); //indices in recordSet
var negIdxV = new la.IntVector(); //indices in recordSet
var posRecIdV = new la.IntVector(); //record IDs
var negRecIdV = new la.IntVector(); //record IDs
var classVec = new la.Vector({ "vals": uRecSet.length }); //svm scores for record set
var resultVec = new la.Vector({ "vals": uRecSet.length }); // non-absolute svm scores for record set
//# - `rs = alModel.getRecSet()` -- returns the record set that is being used (result of sampling)
this.getRecSet = function () { return uRecSet };
//# - `idx = alModel.selectedQuestionIdx()` -- returns the index of the last selected question in alModel.getRecSet()
this.selectedQuestionIdx = -1;
//# - `bool = alModel.getQueryMode()` -- returns true if in query mode, false otherwise (SVM mode)
this.getQueryMode = function () { return queryMode; };
//# - `numArr = alModel.getPos(thresh)` -- given a `threshold` (number) return the indexes of records classified above it as a javascript array of numbers. Must be in SVM mode.
this.getPos = function (threshold) {
if (this.queryMode) { return null; } // must be in SVM mode to return results
if (!threshold) { threshold = 0; }
var posIdxArray = [];
for (var recN = 0; recN < uRecSet.length; recN++) {
if (resultVec[recN] >= threshold) {
posIdxArray.push(recN);
}
}
return posIdxArray;
};
this.debug = function () { debugger; }
this.getTop = function (limit) {
if (this.queryMode) { return null; } // must be in SVM mode to return results
if (!limit) { limit = 20; }
var idxArray = [];
var marginArray = [];
var sorted = resultVec.sortPerm(false);
for (var recN = 0; recN < uRecSet.length && recN < limit; recN++) {
idxArray.push(sorted.perm[recN]);
var val = sorted.vec[recN];
val = val == Number.POSITIVE_INFINITY ? Number.MAX_VALUE : val;
val = val == Number.NEGATIVE_INFINITY ? -Number.MAX_VALUE : val;
marginArray.push(val);
}
return { posIdx: idxArray, margins: marginArray };
};
//# - `objJSON = alModel.getSettings()` -- returns the settings object
this.getSettings = function () { return settings; }
// returns record set index of the unlabeled record that is closest to the margin
//# - `recSetIdx = alModel.selectQuestion()` -- returns `recSetIdx` - the index of the record in `recSet`, whose class is unknonw and requires user input
this.selectQuestion = function () {
if (posRecIdV.length >= settings.nPos && negRecIdV.length >= settings.nNeg) { queryMode = false; }
if (queryMode) {
if (posRecIdV.length < settings.nPos && nPosQ + 1 < uRecSet.length) {
nPosQ = nPosQ + 1;
console.log("query mode, try to get pos");
this.selectedQuestionIdx = simVp[simVp.length - 1 - (nPosQ - 1)];
return this.selectedQuestionIdx;
}
if (negRecIdV.length < settings.nNeg && nNegQ + 1 < uRecSet.length) {
nNegQ = nNegQ + 1;
// TODO if nNegQ == rRecSet.length, find a new sample
console.log("query mode, try to get neg");
this.selectedQuestionIdx = simVp[nNegQ - 1];
return this.selectedQuestionIdx;
}
}
else {
////call svm, get record closest to the margin
svm = new exports.SVC(settings);
svm.fit(X, y);//column examples, y float vector of +1/-1, default svm paramvals
// mark positives
for (var i = 0; i < posIdxV.length; i++) {
classVec[posIdxV[i]] = Number.POSITIVE_INFINITY;
resultVec[posIdxV[i]] = Number.POSITIVE_INFINITY;
}
// mark negatives
for (var i = 0; i < negIdxV.length; i++) {
classVec[negIdxV[i]] = Number.POSITIVE_INFINITY;
resultVec[negIdxV[i]] = Number.NEGATIVE_INFINITY;
}
var posCount = posIdxV.length;
var negCount = negIdxV.length;
// classify unlabeled
for (var recN = 0; recN < uRecSet.length; recN++) {
if (classVec[recN] !== Number.POSITIVE_INFINITY) {
var svmMargin = svm.predict(uMat.getCol(recN));
if (svmMargin > 0) {
posCount++;
} else {
negCount++;
}
classVec[recN] = Math.abs(svmMargin);
resultVec[recN] = svmMargin;
}
}
var sorted = classVec.sortPerm();
console.log("svm mode, margin: " + sorted.vec[0] + ", npos: " + posCount + ", nneg: " + negCount);
this.selectedQuestionIdx = sorted.perm[0];
return this.selectedQuestionIdx;
}
};
// asks the user for class label given a record set index
//# - `alModel.getAnswer(ALAnswer, recSetIdx)` -- given user input `ALAnswer` (string) and `recSetIdx` (integer, result of model.selectQuestion) the training set is updated.
//# The user input should be either "y" (indicating that recSet[recSetIdx] is a positive example), "n" (negative example).
this.getAnswer = function (ALanswer, recSetIdx) {
//todo options: ?newQuery
if (ALanswer === "y") {
posIdxV.push(recSetIdx);
posRecIdV.push(uRecSet[recSetIdx].$id);
//X.push(ftrSpace.ftrSpVec(uRecSet[recSetIdx]));
X.push(uMat.getCol(recSetIdx));
y.push(1.0);
} else {
negIdxV.push(recSetIdx);
negRecIdV.push(uRecSet[recSetIdx].$id);
//X.push(ftrSpace.ftrSpVec(uRecSet[recSetIdx]));
X.push(uMat.getCol(recSetIdx));
y.push(-1.0);
}
// +k query // rank unlabeled according to query, ask for k most similar
// -k query // rank unlabeled according to query, ask for k least similar
};
//# - `alModel.startLoop()` -- starts the active learning loop in console
this.startLoop = function () {
while (true) {
var recSetIdx = this.selectQuestion();
var ALanswer = sget(uRecSet[recSetIdx].Text + ": y/(n)/s? Command s stops the process").trim();
if (ALanswer == "s") { break; }
if (posIdxV.length + negIdxV.length == uRecSet.length) { break; }
this.getAnswer(ALanswer, recSetIdx);
}
};
//# - `alModel.saveSvmModel(fout)` -- saves the binary SVM model to an output stream `fout`. The algorithm must be in SVM mode.
this.saveSvmModel = function (outputStream) {
// must be in SVM mode
if (queryMode) {
console.log("AL.save: Must be in svm mode");
return;
}
svm.save(outputStream);
};
this.getWeights = function () {
return svm.weights;
}
//this.saveLabeled
//this.loadLabeled
};
return exports;
}

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc