Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

ner-node

Package Overview
Dependencies
Maintainers
1
Versions
7
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

ner-node - npm Package Compare versions

Comparing version 0.0.4 to 0.0.5

.npmignore

8

package.json
{
"name": "ner-node",
"version": "0.0.4",
"version": "0.0.5",
"description": "Node library for spawning NER server and pinging it to get entities out of raw text.",

@@ -13,5 +13,5 @@ "main": "SocketNER.js",

"type": "git",
"url" : "https://github.com/Vikasg7/NER-Node.git"
"url": "https://github.com/Vikasg7/NER-Node.git"
},
"keywords":[
"keywords": [
"Stanford NER",

@@ -25,4 +25,4 @@ "Named Entity Recognition",

"dependencies": {
"deasync": "^0.1.4"
"synchronize": "^0.9.15"
}
}
<h1>NER-Node</h2>
<h5>Library to connect to Stanford NER local Server, send in the Raw Text and get back Entity JSON</h5>
<h4><i>4X performance boost with same syntax in versions 0.0.5 and further. Please do upgrade.</i></h4>
<h4>Installation</h4>

@@ -12,3 +14,3 @@ <ol>

```
````javascript

@@ -19,9 +21,9 @@ var socketNER = require("ner-node")

obj.parser = function (taggedText) {..... return entities}
// A Sync function to get the Entities JSON
// Synchronous function to get the Entities JSON
var entitiesJSON = obj.getEntities(rawText, requiredEntity)
// closes the server and socket when done
// closes the server and client when done
obj.close()
})
```
````
<p>

@@ -32,2 +34,5 @@ <strong>Note:-</strong>

<h4>Updates</h4>
<p>Its 20-Feb-2016. I have pushed an update to make the library run 4x faster than previous versions. so please <strong>use version 0.0.5</strong> or lastest for production purposes. There has been no change in the Syntax.</p>
<h4>Issues & Suggestions</h4>

@@ -39,2 +44,5 @@ <p>If you find an issues using the Library OR if you have any suggestions to make it perform better, then you can write to us in the Issues Section.</p>

<a href="https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=F3QQCWFPWHBYE" target="_blank">Buy Me Coffee</a>
OR you can use
<a href="https://gratipay.com/~xcelancer/" target="_blank">Gratipay</a>
to show your appreciation and gratitude.
</p>
var spawn = require("child_process").spawn
var socket = require("net").Socket()
var deasync = require("deasync")
// The purpose of deasync is to make the getEntities function synchronous
// without guessing the time it would take to finish. Previously, I was using
// socket.on(data, function () {...}) with a callback. But the downside was when I
// used the getEntities function in a loop, NodeJs warns me of Memory leak, as I was
// registering so many on("data") events. So I register on("data") event only one time
// by taking the code out of getEntities function and by using a global variable jsonEntities.
var sync = require("synchronize")
function SocketNER(port, classifierFileName, pathToNER, callback) {

@@ -17,8 +9,3 @@ //defining defaults if arguments is a false value

pathToNER = pathToNER || "/"
// using jsonEntities as a list is a clever technique of mine
// to empty using .pop() while returning the value in getEntities function**
var jsonEntities = []
var taggedText
var requiredEntityG
var rawTextG
var client

@@ -32,40 +19,52 @@ // starting server as a seperate process

"-loadClassifier", pathToNER + classifierFileName,
"-port", port, "-outputFormat", "inlineXML"
]
)
"-port", port, "-outputFormat", "inlineXML"
]
)
// Setup a Socket Connection after Server loads the Classifier
// I don't know why server's stderr stream gets all output and why stdout don't
// I don't know why server's stderr stream gets
// all output and why stdout don't
server.stderr.on("data", function (data) {
// Server would finish loading, when it flushes out 'done [x secs]'
if (data.toString().search("done") !== -1) { callback(socketNER) }
// Server would finish loading,
// when it flushes out 'done [x secs]'
if (data.toString().search("done") > -1) {
startNERClient()
}
})
function startNERClient() {
client = spawn(
"java",[
"-cp",
pathToNER + "stanford-ner.jar",
"edu.stanford.nlp.ie.NERServer",
"-port", port, "-client"
]
)
// Adding event handlers to the socket
socket.on("connect", function () { socket.write(rawTextG) })
socket.on("error", function (err) { console.log(err.toString()) })
socket.on("data", function (data) { taggedText = data.toString() })
// The NER server (ie. the other end) sends FIN signal after each write request, thereby closes
// the socket at its end. So I am using the 'end' event to process the returned taggedText
socket.on("end", function (err) {
jsonEntities.push(socketNER.parser(taggedText, requiredEntityG))
})
client.stdout.once("data", function (data) {
if (data.toString().search("Input some text") > -1) {
// Running Callback in fiber to make it sync aware
sync.fiber(function () {
callback(socketNER)
})
}
})
}
function tagIt(rawText, reqEntity, cb) {
client.stdin.write(rawText)
client.stdout.once("data", function (data) {
taggedText = data.toString()
// Synchronize module follows (err, data) format for cb.
// Trim() is necessary to avoid leading and follwing
// line breaks.
cb(null, socketNER.parser(taggedText.trim(), reqEntity))
})
}
var socketNER = {}
socketNER.getEntities = function (rawText, requiredEntity) {
// taking a copy of requiredEntity preference
requiredEntityG = requiredEntity
// replacing line breaks with spaces and adding two line breaks at the end
// for an unknown reason. May be, it relates to how a request should be sent through socket.
rawTextG = rawText.replace(/[\r\n\f\t\v]/g, " ") + "\n\n"
// Reconnecting on each getEntities function call, that's neccessary as the NER sever closes
// the socket after each such call.
socket.connect(port)
// deasync would won't halt the Event Loop ie. it would also next events to be met
// but at the same time, it won't exit this function untill jsonEntities comes back.
deasync.loopWhile(function () { return (jsonEntities.length !== 1) })
return jsonEntities.pop() // ** or jsonEntities.shift()
socketNER.getEntities = function (rawText, reqEntity) {
rawText = rawText.replace(/[\r\n\f\t\v]/g, " ") + "\n"
return sync.await(tagIt(rawText, reqEntity, sync.defer()))
}

@@ -75,3 +74,3 @@

socketNER.close = function () {
socket.end()
client.kill()
server.kill()

@@ -83,5 +82,4 @@ }

socketNER.parser = function (taggedText, requiredEntity) {
var matches, entities = {} //return value of parser function
// Change the regex scope according to user's Entitry requirements
// Please always pass the requiredEntity in Upper case as NER uses upper cased Tags
var matches, entities = {} // return value of parser function
requiredEntity = requiredEntity.toUpperCase()
var re = requiredEntity ? new RegExp(["<(",requiredEntity,"?)>(.*?)<\/",requiredEntity,"?>"].join(""), "g")

@@ -101,5 +99,4 @@ : /<([A-Z]+?)>(.*?)<\/[A-Z]+?>/g

}
}
module.exports = SocketNER

@@ -29,3 +29,3 @@ // Terminal command to run the test file :- node test.js

var nerServer = require("ner-node")
var nerServer = require("./SocketNER.js")
nerServer(8080, null , "./StanfordNER/", function (ner) {

@@ -32,0 +32,0 @@ var jsonEntities = ner.getEntities(rawText, "")

// Terminal command to run the test file :- node test2.js
var rawText =
[
var rawText = [
'Steven Paul Jobs (/\ˈdʒɒbz/; February 24, 1955 – October 5,',

@@ -33,3 +32,3 @@ '2011) was an American technology entrepreneur, visionary and',

var nerServer = require("ner-node")
var nerServer = require("./SocketNER.js")

@@ -36,0 +35,0 @@ nerServer(8080, null , "./StanfordNER/", function (ner) {

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc