Comparing version 0.0.6 to 0.0.7
{ | ||
"name": "ner-node", | ||
"version": "0.0.6", | ||
"version": "0.0.7", | ||
"description": "Node library for spawning NER server and pinging it to get entities out of raw text.", | ||
@@ -24,4 +24,4 @@ "main": "SocketNER.js", | ||
"dependencies": { | ||
"synchronize": "^0.9.15" | ||
"deasync": "^0.1.4" | ||
} | ||
} |
<h1>NER-Node</h2> | ||
<h5>Library to connect to Stanford NER local Server, send in the Raw Text and get back Entity JSON</h5> | ||
<h4><i>4X performance boost with same syntax in versions 0.0.6 and further. Please do upgrade.</i></h4> | ||
<h4><i>6X Performance Boost with <strong>Changed Syntax</strong> in versions 0.0.7 and further. Please do upgrade.</i></h4> | ||
@@ -15,12 +15,14 @@ <h4>Installation</h4> | ||
````javascript | ||
// Importing the module | ||
var socketNER = require("ner-node") | ||
socketNER(port, classifierFileName, pathToNER, function (obj) { | ||
// you can define your own function to parse tagged text | ||
obj.parser = function (taggedText) {..... return entities} | ||
// Synchronous function to get the Entities JSON | ||
var entitiesJSON = obj.getEntities(rawText, requiredEntity) | ||
// closes the server and client when done | ||
obj.close() | ||
}) | ||
// Creating an instance | ||
var NER = socketNER(port, classifierFileName, pathToNER) | ||
// Initiating Server and Client | ||
NER.init() | ||
// You can optionally define your own function to parse tagged text | ||
NER.parser = function (taggedText) {..... return entities} | ||
// Using the getEntities function of NER object anywhere to get the parsed entities | ||
var entitiesJSON = NER.getEntities(rawText, requiredEntity) | ||
// Closes the server and client when done | ||
NER.close() | ||
@@ -34,3 +36,16 @@ ```` | ||
<h4>Updates</h4> | ||
<p>Its 20-Feb-2016. I have pushed an update to make the library run 4x faster than previous versions. so please <strong>use version 0.0.6</strong> or lastest for production purposes. There has been no change in the Syntax.</p> | ||
<ul> | ||
<li> | ||
<h5>Versions upto 0.0.4</h5> | ||
<p>These versions uses node sockets to connect to NER server(A java command line command) but the sockets were very slow when it came to perfomance.</p> | ||
</li> | ||
<li> | ||
<h5>Versions 0.0.5 and 0.0.6</h5> | ||
<p>These version uses the NER Client(A java command line command) to ping NER server. I tested it out and found that it was way too fast and almost 6 times better with application, I was working on. This time I used Synchronize library to sync functions up But its disadvantage was that - It takes so many sync.fibar wrappers to keeps this working and that too wasn't possible in my case. Actually Synchronize library was useless and very frustrating.</p> | ||
</li> | ||
<li> | ||
<h5>Versions >=0.0.7</h5> | ||
<p>In this version, I switched back to deasync after doing some more standalone testing to convert async functions to sync one and able to create a fully sync API with node without any intentional sleeps. So there is a change in the sytax this time. This Version is even more efficient than previous versions.</p> | ||
</li> | ||
</ul> | ||
@@ -37,0 +52,0 @@ <h4>Issues & Suggestions</h4> |
170
SocketNER.js
var spawn = require("child_process").spawn | ||
var sync = require("synchronize") | ||
var deasync = require("deasync") | ||
function SocketNER(port, classifierFileName, pathToNER, callback) { | ||
//defining defaults if arguments is a false value | ||
port = port || 1234 | ||
classifierFileName = classifierFileName || "english.all.3class.distsim.crf.ser.gz" | ||
pathToNER = pathToNER || "/" | ||
var client | ||
function socketNER(port, classifierFileName, pathToNER) { | ||
this.port = port || 1234 | ||
this.classifier = classifierFileName || "english.all.3class.distsim.crf.ser.gz" | ||
this.pathToNER = pathToNER || "/" | ||
this.server = undefined | ||
this.client = undefined | ||
} | ||
// starting server as a seperate process | ||
var server = spawn( | ||
socketNER.prototype.startServer = deasync(function (cb) { | ||
var self = this | ||
self.server = spawn( | ||
"java",[ | ||
"-mx750m", "-cp", | ||
pathToNER + "stanford-ner.jar", | ||
self.pathToNER + "stanford-ner.jar", | ||
"edu.stanford.nlp.ie.NERServer", | ||
"-loadClassifier", pathToNER + classifierFileName, | ||
"-port", port, "-outputFormat", "inlineXML" | ||
"-loadClassifier", self.pathToNER + self.classifier, | ||
"-port", self.port, "-outputFormat", "inlineXML" | ||
] | ||
) | ||
// I don't know why server's stderr stream gets | ||
// all output and why stdout don't | ||
server.stderr.on("data", function (data) { | ||
// Server would finish loading, | ||
// when it flushes out 'done [x secs]' | ||
if (data.toString().search("done") > -1) { | ||
startNERClient() | ||
// all output and why stdout don't. | ||
self.server.stderr.on("data", reader) | ||
// Server would finish loading, when it flushes | ||
// out 'done [x secs]' | ||
function reader(data) { | ||
if (data.toString().search("done") > -1) { | ||
// Removing listener | ||
self.server.stderr.removeListener("data", reader) | ||
cb(null, true) | ||
} | ||
} | ||
}) | ||
socketNER.prototype.startClient = deasync(function (cb) { | ||
var self = this | ||
self.client = spawn( | ||
"java",[ | ||
"-cp", | ||
self.pathToNER + "stanford-ner.jar", | ||
"edu.stanford.nlp.ie.NERServer", | ||
"-port", self.port, "-client" | ||
] | ||
) | ||
self.client.stdout.once("data", function (data) { | ||
if (data.toString().trim().match(/^Input some text/g)) { | ||
cb(null, true) | ||
} | ||
}) | ||
function startNERClient() { | ||
client = spawn( | ||
"java",[ | ||
"-cp", | ||
pathToNER + "stanford-ner.jar", | ||
"edu.stanford.nlp.ie.NERServer", | ||
"-port", port, "-client" | ||
] | ||
) | ||
}) | ||
// This "data" listener would be removed soon. | ||
client.stdout.on("data", reader) | ||
socketNER.prototype.init = function () { | ||
var self = this | ||
self.startServer() | ||
self.startClient() | ||
} | ||
function reader(data) { | ||
if (data.toString().trim() === "") { | ||
// Keeping the "data" listener untill the client is started. | ||
client.stdout.removeListener("data", reader) | ||
// Running Callback in fiber to make it sync aware | ||
sync.fiber(function () { | ||
callback(socketNER) | ||
}) | ||
} | ||
} | ||
} | ||
socketNER.prototype.close = function () { | ||
var self = this | ||
self.server.kill() | ||
self.client.kill() | ||
} | ||
function tagIt(rawText, reqEntity, cb) { | ||
client.stdin.write(rawText) | ||
client.stdout.once("data", function (data) { | ||
// Trim() is necessary to avoid leading and follwing line breaks. | ||
var taggedText = data.toString().trim() | ||
// Synchronize module follows (err, data) format for cb. | ||
cb(null, socketNER.parser(taggedText, reqEntity)) | ||
}) | ||
} | ||
socketNER.prototype.getEntities = function (rawText, reqEntity) { | ||
var self = this | ||
rawText = rawText.replace(/[\r\n\f\t\v]/g, " ") + "\n" | ||
return self.tagIt(rawText, reqEntity) | ||
} | ||
var socketNER = {} | ||
socketNER.prototype.tagIt = deasync(function (rawText, reqEntity, cb) { | ||
var self = this | ||
// Emptying the readable stream to make it writable | ||
self.client.stdout.read() | ||
// Writing to writable stream to push rawText to NER server | ||
self.client.stdin.write(rawText) | ||
// Processing data when NER server sends back data to stream | ||
// making stream readable again. "data" event would emptify the | ||
// readable stream to make it writable again. | ||
self.client.stdout.once("data", function (data) { | ||
// Trim() is necessary to avoid leading and follwing line breaks. | ||
var taggedText = data.toString().trim() | ||
// Synchronize module follows (err, data) format for cb. | ||
cb(null, self.parser(taggedText, reqEntity)) | ||
}) | ||
}) | ||
socketNER.getEntities = function (rawText, reqEntity) { | ||
rawText = rawText.replace(/[\r\n\f\t\v]/g, " ") + "\n" | ||
return sync.await(tagIt(rawText, reqEntity, sync.defer())) | ||
} | ||
// Closes the socket and kills the server process | ||
socketNER.close = function () { | ||
client.kill() | ||
server.kill() | ||
} | ||
// Passing in 'the parser' to the socketNER return object, | ||
// so that user could be able to define his own parser later on | ||
socketNER.parser = function (taggedText, requiredEntity) { | ||
var matches, entities = {} // return value of parser function | ||
requiredEntity = requiredEntity.toUpperCase() | ||
var re = requiredEntity ? new RegExp(["<(",requiredEntity,"?)>(.*?)<\/",requiredEntity,"?>"].join(""), "g") | ||
: /<([A-Z]+?)>(.*?)<\/[A-Z]+?>/g | ||
while((matches = re.exec(taggedText)) !== null) { | ||
if (entities[matches[1]]) { | ||
// if tagName is present, then pushing in the tagValue Array | ||
entities[matches[1]].push(matches[2]) | ||
} | ||
else { | ||
// otherwise adding the tagName with a new tagValue Array | ||
entities[matches[1]] = [matches[2]] | ||
} | ||
socketNER.prototype.parser = function (taggedText, reqEntity) { | ||
var matches, entities = {} // return value of parser function | ||
reqEntity = reqEntity ? reqEntity.toUpperCase() : "" | ||
var re = reqEntity ? new RegExp(["<(",reqEntity,"?)>(.*?)<\/",reqEntity,"?>"].join(""), "g") | ||
: /<([A-Z]+?)>(.*?)<\/[A-Z]+?>/g | ||
while((matches = re.exec(taggedText)) !== null) { | ||
if (entities[matches[1]]) { | ||
// if tagName is present, then pushing in the tagValue Array | ||
entities[matches[1]].push(matches[2]) | ||
} | ||
return entities | ||
else { | ||
// otherwise adding the tagName with a new tagValue Array | ||
entities[matches[1]] = [matches[2]] | ||
} | ||
} | ||
return entities | ||
} | ||
module.exports = SocketNER | ||
module.exports = function (port, classifierFileName, pathToNER) { | ||
return new socketNER(port, classifierFileName, pathToNER) | ||
} |
133
test.js
@@ -1,103 +0,38 @@ | ||
// Terminal command to run the test file :- node test.js | ||
var rawText = [ | ||
'Steven Paul Jobs (/\ˈdʒɒbz/; February 24, 1955 – October 5,', | ||
'2011) was an American technology entrepreneur, visionary and ', | ||
'inventor. He was the co-founder, chairman, and chief executive', | ||
' officer (CEO) of Apple Inc.; CEO and largest shareholder of Pixar', | ||
' Animation Studios;[3] a member of The Walt Disney Company\'s board', | ||
' of directors following its acquisition of Pixar; and founder,', | ||
' chairman, and CEO of NeXT Inc. Jobs is widely recognized as', | ||
' a pioneer of the microcomputer revolution of the 1970s, along', | ||
' with Apple co-founder Steve Wozniak. Shortly after his death,', | ||
' Jobs\'s official biographer, Walter Isaacson, described him as the', | ||
' "creative entrepreneur whose passion for perfection and ferocious', | ||
' drive revolutionized six industries: personal computers, animated', | ||
' movies, music, phones, tablet computing, and digital', | ||
' publishing."[2]', | ||
'Adopted at birth in San Francisco, and raised in the San Francisco', | ||
' Bay Area during the 1960s, Jobs\'s countercultural lifestyle was a', | ||
' product of his time. As a senior at Homestead High School, in', | ||
' Cupertino, California, his two closest friends were the older', | ||
' engineering student (and Homestead High alumnus) Wozniak and his', | ||
' countercultural girlfriend, the artistically inclined Homestead', | ||
' High junior Chrisann Brennan. Jobs briefly attended Reed College', | ||
' in 1972 before dropping out, deciding to travel through India in', | ||
' 1974 and study Buddhism.', | ||
].join(" ") | ||
var socketNER = require("./SocketNER.js") | ||
var NER = socketNER(1234, null , "./StanfordNER/") | ||
var nerServer = require("./SocketNER.js") | ||
nerServer(8080, null , "./StanfordNER/", function (ner) { | ||
var jsonEntities = ner.getEntities(rawText, "") | ||
console.log("Test1 All Entities:-") | ||
console.log(jsonEntities) | ||
//starting server | ||
NER.init() | ||
var persons = ner.getEntities(rawText, "PERSON") | ||
console.log("\n\nTest2 Only Persons:-") | ||
console.log(persons) | ||
var rawText = [ | ||
'Steven Paul Jobs (/\ˈdʒɒbz/; February 24, 1955 – October 5,', | ||
'2011) was an American technology entrepreneur, visionary and ', | ||
'inventor. He was the co-founder, chairman, and chief executive', | ||
' officer (CEO) of Apple Inc.; CEO and largest shareholder of Pixar', | ||
' Animation Studios;[3] a member of The Walt Disney Company\'s board', | ||
' of directors following its acquisition of Pixar; and founder,', | ||
' chairman, and CEO of NeXT Inc. Jobs is widely recognized as', | ||
' a pioneer of the microcomputer revolution of the 1970s, along', | ||
' with Apple co-founder Steve Wozniak. Shortly after his death,', | ||
' Jobs\'s official biographer, Walter Isaacson, described him as the', | ||
' "creative entrepreneur whose passion for perfection and ferocious', | ||
' drive revolutionized six industries: personal computers, animated', | ||
' movies, music, phones, tablet computing, and digital', | ||
' publishing."[2]', | ||
'Adopted at birth in San Francisco, and raised in the San Francisco', | ||
' Bay Area during the 1960s, Jobs\'s countercultural lifestyle was a', | ||
' product of his time. As a senior at Homestead High School, in', | ||
' Cupertino, California, his two closest friends were the older', | ||
' engineering student (and Homestead High alumnus) Wozniak and his', | ||
' countercultural girlfriend, the artistically inclined Homestead', | ||
' High junior Chrisann Brennan. Jobs briefly attended Reed College', | ||
' in 1972 before dropping out, deciding to travel through India in', | ||
' 1974 and study Buddhism.', | ||
] | ||
var organizations = ner.getEntities(rawText, "ORGANIZATION") | ||
console.log("\n\nTest3 Only Organizations:-") | ||
console.log(organizations) | ||
var locations = ner.getEntities(rawText, "LOCATION") | ||
console.log("\n\nTest4 Only Locations:-") | ||
console.log(locations) | ||
ner.close() | ||
var startTime = process.uptime() | ||
rawText.forEach(function (line, i) { | ||
console.log(NER.getEntities(line, "")) | ||
}) | ||
//Output | ||
// Test1 All Entities:- | ||
// { PERSON: | ||
// [ 'Steven Paul Jobs', | ||
// 'Steve Wozniak', | ||
// 'Walter Isaacson', | ||
// 'Wozniak', | ||
// 'Chrisann Brennan' ], | ||
// ORGANIZATION: | ||
// [ 'Apple Inc.', | ||
// 'Pixar Animation Studios', | ||
// 'The Walt Disney Company', | ||
// 'Pixar', | ||
// 'NeXT Inc', | ||
// 'Apple', | ||
// 'Homestead High School', | ||
// 'Homestead High', | ||
// 'Reed College' ], | ||
// LOCATION: | ||
// [ 'San Francisco', | ||
// 'San Francisco Bay Area', | ||
// 'Cupertino', | ||
// 'California', | ||
// 'India' ] } | ||
// Test2 Only Persons:- | ||
// { PERSON: | ||
// [ 'Steven Paul Jobs', | ||
// 'Steve Wozniak', | ||
// 'Walter Isaacson', | ||
// 'Wozniak', | ||
// 'Chrisann Brennan' ] } | ||
// Test3 Only Organizations:- | ||
// { ORGANIZATION: | ||
// [ 'Apple Inc.', | ||
// 'Pixar Animation Studios', | ||
// 'The Walt Disney Company', | ||
// 'Pixar', | ||
// 'NeXT Inc', | ||
// 'Apple', | ||
// 'Homestead High School', | ||
// 'Homestead High', | ||
// 'Reed College' ] } | ||
// Test4 Only Locations:- | ||
// { LOCATION: | ||
// [ 'San Francisco', | ||
// 'San Francisco Bay Area', | ||
// 'Cupertino', | ||
// 'California', | ||
// 'India' ] } | ||
console.log(process.uptime() - startTime, "ms for 23 lines") | ||
NER.close() |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
59
9086
6
133
2
+ Addeddeasync@^0.1.4
+ Addedbindings@1.5.0(transitive)
+ Addeddeasync@0.1.30(transitive)
+ Addedfile-uri-to-path@1.0.0(transitive)
+ Addednode-addon-api@1.7.2(transitive)
- Removedsynchronize@^0.9.15
- Removedfibers@1.0.15(transitive)
- Removedsynchronize@0.9.15(transitive)