Comparing version 0.0.4 to 0.0.5
{ | ||
"name": "ner-node", | ||
"version": "0.0.4", | ||
"version": "0.0.5", | ||
"description": "Node library for spawning NER server and pinging it to get entities out of raw text.", | ||
@@ -13,5 +13,5 @@ "main": "SocketNER.js", | ||
"type": "git", | ||
"url" : "https://github.com/Vikasg7/NER-Node.git" | ||
"url": "https://github.com/Vikasg7/NER-Node.git" | ||
}, | ||
"keywords":[ | ||
"keywords": [ | ||
"Stanford NER", | ||
@@ -25,4 +25,4 @@ "Named Entity Recognition", | ||
"dependencies": { | ||
"deasync": "^0.1.4" | ||
"synchronize": "^0.9.15" | ||
} | ||
} |
<h1>NER-Node</h2> | ||
<h5>Library to connect to Stanford NER local Server, send in the Raw Text and get back Entity JSON</h5> | ||
<h4><i>4X performance boost with same syntax in versions 0.0.5 and further. Please do upgrade.</i></h4> | ||
<h4>Installation</h4> | ||
@@ -12,3 +14,3 @@ <ol> | ||
``` | ||
````javascript | ||
@@ -19,9 +21,9 @@ var socketNER = require("ner-node") | ||
obj.parser = function (taggedText) {..... return entities} | ||
// A Sync function to get the Entities JSON | ||
// Synchronous function to get the Entities JSON | ||
var entitiesJSON = obj.getEntities(rawText, requiredEntity) | ||
// closes the server and socket when done | ||
// closes the server and client when done | ||
obj.close() | ||
}) | ||
``` | ||
```` | ||
<p> | ||
@@ -32,2 +34,5 @@ <strong>Note:-</strong> | ||
<h4>Updates</h4> | ||
<p>Its 20-Feb-2016. I have pushed an update to make the library run 4x faster than previous versions. so please <strong>use version 0.0.5</strong> or lastest for production purposes. There has been no change in the Syntax.</p> | ||
<h4>Issues & Suggestions</h4> | ||
@@ -39,2 +44,5 @@ <p>If you find an issues using the Library OR if you have any suggestions to make it perform better, then you can write to us in the Issues Section.</p> | ||
<a href="https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=F3QQCWFPWHBYE" target="_blank">Buy Me Coffee</a> | ||
OR you can use | ||
<a href="https://gratipay.com/~xcelancer/" target="_blank">Gratipay</a> | ||
to show your appreciation and gratitude. | ||
</p> |
var spawn = require("child_process").spawn | ||
var socket = require("net").Socket() | ||
var deasync = require("deasync") | ||
// The purpose of deasync is to make the getEntities function synchronous | ||
// without guessing the time it would take to finish. Previously, I was using | ||
// socket.on(data, function () {...}) with a callback. But the downside was when I | ||
// used the getEntities function in a loop, NodeJs warns me of Memory leak, as I was | ||
// registering so many on("data") events. So I register on("data") event only one time | ||
// by taking the code out of getEntities function and by using a global variable jsonEntities. | ||
var sync = require("synchronize") | ||
function SocketNER(port, classifierFileName, pathToNER, callback) { | ||
@@ -17,8 +9,3 @@ //defining defaults if arguments is a false value | ||
pathToNER = pathToNER || "/" | ||
// using jsonEntities as a list is a clever technique of mine | ||
// to empty using .pop() while returning the value in getEntities function** | ||
var jsonEntities = [] | ||
var taggedText | ||
var requiredEntityG | ||
var rawTextG | ||
var client | ||
@@ -32,40 +19,52 @@ // starting server as a seperate process | ||
"-loadClassifier", pathToNER + classifierFileName, | ||
"-port", port, "-outputFormat", "inlineXML" | ||
] | ||
) | ||
"-port", port, "-outputFormat", "inlineXML" | ||
] | ||
) | ||
// Setup a Socket Connection after Server loads the Classifier | ||
// I don't know why server's stderr stream gets all output and why stdout don't | ||
// I don't know why server's stderr stream gets | ||
// all output and why stdout don't | ||
server.stderr.on("data", function (data) { | ||
// Server would finish loading, when it flushes out 'done [x secs]' | ||
if (data.toString().search("done") !== -1) { callback(socketNER) } | ||
// Server would finish loading, | ||
// when it flushes out 'done [x secs]' | ||
if (data.toString().search("done") > -1) { | ||
startNERClient() | ||
} | ||
}) | ||
function startNERClient() { | ||
client = spawn( | ||
"java",[ | ||
"-cp", | ||
pathToNER + "stanford-ner.jar", | ||
"edu.stanford.nlp.ie.NERServer", | ||
"-port", port, "-client" | ||
] | ||
) | ||
// Adding event handlers to the socket | ||
socket.on("connect", function () { socket.write(rawTextG) }) | ||
socket.on("error", function (err) { console.log(err.toString()) }) | ||
socket.on("data", function (data) { taggedText = data.toString() }) | ||
// The NER server (ie. the other end) sends FIN signal after each write request, thereby closes | ||
// the socket at its end. So I am using the 'end' event to process the returned taggedText | ||
socket.on("end", function (err) { | ||
jsonEntities.push(socketNER.parser(taggedText, requiredEntityG)) | ||
}) | ||
client.stdout.once("data", function (data) { | ||
if (data.toString().search("Input some text") > -1) { | ||
// Running Callback in fiber to make it sync aware | ||
sync.fiber(function () { | ||
callback(socketNER) | ||
}) | ||
} | ||
}) | ||
} | ||
function tagIt(rawText, reqEntity, cb) { | ||
client.stdin.write(rawText) | ||
client.stdout.once("data", function (data) { | ||
taggedText = data.toString() | ||
// Synchronize module follows (err, data) format for cb. | ||
// Trim() is necessary to avoid leading and follwing | ||
// line breaks. | ||
cb(null, socketNER.parser(taggedText.trim(), reqEntity)) | ||
}) | ||
} | ||
var socketNER = {} | ||
socketNER.getEntities = function (rawText, requiredEntity) { | ||
// taking a copy of requiredEntity preference | ||
requiredEntityG = requiredEntity | ||
// replacing line breaks with spaces and adding two line breaks at the end | ||
// for an unknown reason. May be, it relates to how a request should be sent through socket. | ||
rawTextG = rawText.replace(/[\r\n\f\t\v]/g, " ") + "\n\n" | ||
// Reconnecting on each getEntities function call, that's neccessary as the NER sever closes | ||
// the socket after each such call. | ||
socket.connect(port) | ||
// deasync would won't halt the Event Loop ie. it would also next events to be met | ||
// but at the same time, it won't exit this function untill jsonEntities comes back. | ||
deasync.loopWhile(function () { return (jsonEntities.length !== 1) }) | ||
return jsonEntities.pop() // ** or jsonEntities.shift() | ||
socketNER.getEntities = function (rawText, reqEntity) { | ||
rawText = rawText.replace(/[\r\n\f\t\v]/g, " ") + "\n" | ||
return sync.await(tagIt(rawText, reqEntity, sync.defer())) | ||
} | ||
@@ -75,3 +74,3 @@ | ||
socketNER.close = function () { | ||
socket.end() | ||
client.kill() | ||
server.kill() | ||
@@ -83,5 +82,4 @@ } | ||
socketNER.parser = function (taggedText, requiredEntity) { | ||
var matches, entities = {} //return value of parser function | ||
// Change the regex scope according to user's Entitry requirements | ||
// Please always pass the requiredEntity in Upper case as NER uses upper cased Tags | ||
var matches, entities = {} // return value of parser function | ||
requiredEntity = requiredEntity.toUpperCase() | ||
var re = requiredEntity ? new RegExp(["<(",requiredEntity,"?)>(.*?)<\/",requiredEntity,"?>"].join(""), "g") | ||
@@ -101,5 +99,4 @@ : /<([A-Z]+?)>(.*?)<\/[A-Z]+?>/g | ||
} | ||
} | ||
module.exports = SocketNER |
@@ -29,3 +29,3 @@ // Terminal command to run the test file :- node test.js | ||
var nerServer = require("ner-node") | ||
var nerServer = require("./SocketNER.js") | ||
nerServer(8080, null , "./StanfordNER/", function (ner) { | ||
@@ -32,0 +32,0 @@ var jsonEntities = ner.getEntities(rawText, "") |
// Terminal command to run the test file :- node test2.js | ||
var rawText = | ||
[ | ||
var rawText = [ | ||
'Steven Paul Jobs (/\ˈdʒɒbz/; February 24, 1955 – October 5,', | ||
@@ -33,3 +32,3 @@ '2011) was an American technology entrepreneur, visionary and', | ||
var nerServer = require("ner-node") | ||
var nerServer = require("./SocketNER.js") | ||
@@ -36,0 +35,0 @@ nerServer(8080, null , "./StanfordNER/", function (ner) { |
Network access
Supply chain riskThis module accesses the network.
Found 1 instance in 1 package
7
44
1
14469
260
+ Addedsynchronize@^0.9.15
+ Addedfibers@1.0.15(transitive)
+ Addedsynchronize@0.9.15(transitive)
- Removeddeasync@^0.1.4
- Removedbindings@1.5.0(transitive)
- Removeddeasync@0.1.30(transitive)
- Removedfile-uri-to-path@1.0.0(transitive)
- Removednode-addon-api@1.7.2(transitive)