Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

ner-node

Package Overview
Dependencies
Maintainers
1
Versions
7
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

ner-node - npm Package Compare versions

Comparing version 0.0.6 to 0.0.7

4

package.json
{
"name": "ner-node",
"version": "0.0.6",
"version": "0.0.7",
"description": "Node library for spawning NER server and pinging it to get entities out of raw text.",

@@ -24,4 +24,4 @@ "main": "SocketNER.js",

"dependencies": {
"synchronize": "^0.9.15"
"deasync": "^0.1.4"
}
}
<h1>NER-Node</h2>
<h5>Library to connect to Stanford NER local Server, send in the Raw Text and get back Entity JSON</h5>
<h4><i>4X performance boost with same syntax in versions 0.0.6 and further. Please do upgrade.</i></h4>
<h4><i>6X Performance Boost with <strong>Changed Syntax</strong> in versions 0.0.7 and further. Please do upgrade.</i></h4>

@@ -15,12 +15,14 @@ <h4>Installation</h4>

````javascript
// Importing the module
var socketNER = require("ner-node")
socketNER(port, classifierFileName, pathToNER, function (obj) {
// you can define your own function to parse tagged text
obj.parser = function (taggedText) {..... return entities}
// Synchronous function to get the Entities JSON
var entitiesJSON = obj.getEntities(rawText, requiredEntity)
// closes the server and client when done
obj.close()
})
// Creating an instance
var NER = socketNER(port, classifierFileName, pathToNER)
// Initiating Server and Client
NER.init()
// You can optionally define your own function to parse tagged text
NER.parser = function (taggedText) {..... return entities}
// Using the getEntities function of NER object anywhere to get the parsed entities
var entitiesJSON = NER.getEntities(rawText, requiredEntity)
// Closes the server and client when done
NER.close()

@@ -34,3 +36,16 @@ ````

<h4>Updates</h4>
<p>Its 20-Feb-2016. I have pushed an update to make the library run 4x faster than previous versions. so please <strong>use version 0.0.6</strong> or lastest for production purposes. There has been no change in the Syntax.</p>
<ul>
<li>
<h5>Versions upto 0.0.4</h5>
<p>These versions uses node sockets to connect to NER server(A java command line command) but the sockets were very slow when it came to perfomance.</p>
</li>
<li>
<h5>Versions 0.0.5 and 0.0.6</h5>
<p>These version uses the NER Client(A java command line command) to ping NER server. I tested it out and found that it was way too fast and almost 6 times better with application, I was working on. This time I used Synchronize library to sync functions up But its disadvantage was that - It takes so many sync.fibar wrappers to keeps this working and that too wasn't possible in my case. Actually Synchronize library was useless and very frustrating.</p>
</li>
<li>
<h5>Versions >=0.0.7</h5>
<p>In this version, I switched back to deasync after doing some more standalone testing to convert async functions to sync one and able to create a fully sync API with node without any intentional sleeps. So there is a change in the sytax this time. This Version is even more efficient than previous versions.</p>
</li>
</ul>

@@ -37,0 +52,0 @@ <h4>Issues & Suggestions</h4>

var spawn = require("child_process").spawn
var sync = require("synchronize")
var deasync = require("deasync")
function SocketNER(port, classifierFileName, pathToNER, callback) {
//defining defaults if arguments is a false value
port = port || 1234
classifierFileName = classifierFileName || "english.all.3class.distsim.crf.ser.gz"
pathToNER = pathToNER || "/"
var client
function socketNER(port, classifierFileName, pathToNER) {
this.port = port || 1234
this.classifier = classifierFileName || "english.all.3class.distsim.crf.ser.gz"
this.pathToNER = pathToNER || "/"
this.server = undefined
this.client = undefined
}
// starting server as a seperate process
var server = spawn(
socketNER.prototype.startServer = deasync(function (cb) {
var self = this
self.server = spawn(
"java",[
"-mx750m", "-cp",
pathToNER + "stanford-ner.jar",
self.pathToNER + "stanford-ner.jar",
"edu.stanford.nlp.ie.NERServer",
"-loadClassifier", pathToNER + classifierFileName,
"-port", port, "-outputFormat", "inlineXML"
"-loadClassifier", self.pathToNER + self.classifier,
"-port", self.port, "-outputFormat", "inlineXML"
]
)
// I don't know why server's stderr stream gets
// all output and why stdout don't
server.stderr.on("data", function (data) {
// Server would finish loading,
// when it flushes out 'done [x secs]'
if (data.toString().search("done") > -1) {
startNERClient()
// all output and why stdout don't.
self.server.stderr.on("data", reader)
// Server would finish loading, when it flushes
// out 'done [x secs]'
function reader(data) {
if (data.toString().search("done") > -1) {
// Removing listener
self.server.stderr.removeListener("data", reader)
cb(null, true)
}
}
})
socketNER.prototype.startClient = deasync(function (cb) {
var self = this
self.client = spawn(
"java",[
"-cp",
self.pathToNER + "stanford-ner.jar",
"edu.stanford.nlp.ie.NERServer",
"-port", self.port, "-client"
]
)
self.client.stdout.once("data", function (data) {
if (data.toString().trim().match(/^Input some text/g)) {
cb(null, true)
}
})
function startNERClient() {
client = spawn(
"java",[
"-cp",
pathToNER + "stanford-ner.jar",
"edu.stanford.nlp.ie.NERServer",
"-port", port, "-client"
]
)
})
// This "data" listener would be removed soon.
client.stdout.on("data", reader)
socketNER.prototype.init = function () {
var self = this
self.startServer()
self.startClient()
}
function reader(data) {
if (data.toString().trim() === "") {
// Keeping the "data" listener untill the client is started.
client.stdout.removeListener("data", reader)
// Running Callback in fiber to make it sync aware
sync.fiber(function () {
callback(socketNER)
})
}
}
}
socketNER.prototype.close = function () {
var self = this
self.server.kill()
self.client.kill()
}
function tagIt(rawText, reqEntity, cb) {
client.stdin.write(rawText)
client.stdout.once("data", function (data) {
// Trim() is necessary to avoid leading and follwing line breaks.
var taggedText = data.toString().trim()
// Synchronize module follows (err, data) format for cb.
cb(null, socketNER.parser(taggedText, reqEntity))
})
}
socketNER.prototype.getEntities = function (rawText, reqEntity) {
var self = this
rawText = rawText.replace(/[\r\n\f\t\v]/g, " ") + "\n"
return self.tagIt(rawText, reqEntity)
}
var socketNER = {}
socketNER.prototype.tagIt = deasync(function (rawText, reqEntity, cb) {
var self = this
// Emptying the readable stream to make it writable
self.client.stdout.read()
// Writing to writable stream to push rawText to NER server
self.client.stdin.write(rawText)
// Processing data when NER server sends back data to stream
// making stream readable again. "data" event would emptify the
// readable stream to make it writable again.
self.client.stdout.once("data", function (data) {
// Trim() is necessary to avoid leading and follwing line breaks.
var taggedText = data.toString().trim()
// Synchronize module follows (err, data) format for cb.
cb(null, self.parser(taggedText, reqEntity))
})
})
socketNER.getEntities = function (rawText, reqEntity) {
rawText = rawText.replace(/[\r\n\f\t\v]/g, " ") + "\n"
return sync.await(tagIt(rawText, reqEntity, sync.defer()))
}
// Closes the socket and kills the server process
socketNER.close = function () {
client.kill()
server.kill()
}
// Passing in 'the parser' to the socketNER return object,
// so that user could be able to define his own parser later on
socketNER.parser = function (taggedText, requiredEntity) {
var matches, entities = {} // return value of parser function
requiredEntity = requiredEntity.toUpperCase()
var re = requiredEntity ? new RegExp(["<(",requiredEntity,"?)>(.*?)<\/",requiredEntity,"?>"].join(""), "g")
: /<([A-Z]+?)>(.*?)<\/[A-Z]+?>/g
while((matches = re.exec(taggedText)) !== null) {
if (entities[matches[1]]) {
// if tagName is present, then pushing in the tagValue Array
entities[matches[1]].push(matches[2])
}
else {
// otherwise adding the tagName with a new tagValue Array
entities[matches[1]] = [matches[2]]
}
socketNER.prototype.parser = function (taggedText, reqEntity) {
var matches, entities = {} // return value of parser function
reqEntity = reqEntity ? reqEntity.toUpperCase() : ""
var re = reqEntity ? new RegExp(["<(",reqEntity,"?)>(.*?)<\/",reqEntity,"?>"].join(""), "g")
: /<([A-Z]+?)>(.*?)<\/[A-Z]+?>/g
while((matches = re.exec(taggedText)) !== null) {
if (entities[matches[1]]) {
// if tagName is present, then pushing in the tagValue Array
entities[matches[1]].push(matches[2])
}
return entities
else {
// otherwise adding the tagName with a new tagValue Array
entities[matches[1]] = [matches[2]]
}
}
return entities
}
module.exports = SocketNER
module.exports = function (port, classifierFileName, pathToNER) {
return new socketNER(port, classifierFileName, pathToNER)
}

@@ -1,103 +0,38 @@

// Terminal command to run the test file :- node test.js
var rawText = [
'Steven Paul Jobs (/\ˈdʒɒbz/; February 24, 1955 – October 5,',
'2011) was an American technology entrepreneur, visionary and ',
'inventor. He was the co-founder, chairman, and chief executive',
' officer (CEO) of Apple Inc.; CEO and largest shareholder of Pixar',
' Animation Studios;[3] a member of The Walt Disney Company\'s board',
' of directors following its acquisition of Pixar; and founder,',
' chairman, and CEO of NeXT Inc. Jobs is widely recognized as',
' a pioneer of the microcomputer revolution of the 1970s, along',
' with Apple co-founder Steve Wozniak. Shortly after his death,',
' Jobs\'s official biographer, Walter Isaacson, described him as the',
' "creative entrepreneur whose passion for perfection and ferocious',
' drive revolutionized six industries: personal computers, animated',
' movies, music, phones, tablet computing, and digital',
' publishing."[2]',
'Adopted at birth in San Francisco, and raised in the San Francisco',
' Bay Area during the 1960s, Jobs\'s countercultural lifestyle was a',
' product of his time. As a senior at Homestead High School, in',
' Cupertino, California, his two closest friends were the older',
' engineering student (and Homestead High alumnus) Wozniak and his',
' countercultural girlfriend, the artistically inclined Homestead',
' High junior Chrisann Brennan. Jobs briefly attended Reed College',
' in 1972 before dropping out, deciding to travel through India in',
' 1974 and study Buddhism.',
].join(" ")
var socketNER = require("./SocketNER.js")
var NER = socketNER(1234, null , "./StanfordNER/")
var nerServer = require("./SocketNER.js")
nerServer(8080, null , "./StanfordNER/", function (ner) {
var jsonEntities = ner.getEntities(rawText, "")
console.log("Test1 All Entities:-")
console.log(jsonEntities)
//starting server
NER.init()
var persons = ner.getEntities(rawText, "PERSON")
console.log("\n\nTest2 Only Persons:-")
console.log(persons)
var rawText = [
'Steven Paul Jobs (/\ˈdʒɒbz/; February 24, 1955 – October 5,',
'2011) was an American technology entrepreneur, visionary and ',
'inventor. He was the co-founder, chairman, and chief executive',
' officer (CEO) of Apple Inc.; CEO and largest shareholder of Pixar',
' Animation Studios;[3] a member of The Walt Disney Company\'s board',
' of directors following its acquisition of Pixar; and founder,',
' chairman, and CEO of NeXT Inc. Jobs is widely recognized as',
' a pioneer of the microcomputer revolution of the 1970s, along',
' with Apple co-founder Steve Wozniak. Shortly after his death,',
' Jobs\'s official biographer, Walter Isaacson, described him as the',
' "creative entrepreneur whose passion for perfection and ferocious',
' drive revolutionized six industries: personal computers, animated',
' movies, music, phones, tablet computing, and digital',
' publishing."[2]',
'Adopted at birth in San Francisco, and raised in the San Francisco',
' Bay Area during the 1960s, Jobs\'s countercultural lifestyle was a',
' product of his time. As a senior at Homestead High School, in',
' Cupertino, California, his two closest friends were the older',
' engineering student (and Homestead High alumnus) Wozniak and his',
' countercultural girlfriend, the artistically inclined Homestead',
' High junior Chrisann Brennan. Jobs briefly attended Reed College',
' in 1972 before dropping out, deciding to travel through India in',
' 1974 and study Buddhism.',
]
var organizations = ner.getEntities(rawText, "ORGANIZATION")
console.log("\n\nTest3 Only Organizations:-")
console.log(organizations)
var locations = ner.getEntities(rawText, "LOCATION")
console.log("\n\nTest4 Only Locations:-")
console.log(locations)
ner.close()
var startTime = process.uptime()
rawText.forEach(function (line, i) {
console.log(NER.getEntities(line, ""))
})
//Output
// Test1 All Entities:-
// { PERSON:
// [ 'Steven Paul Jobs',
// 'Steve Wozniak',
// 'Walter Isaacson',
// 'Wozniak',
// 'Chrisann Brennan' ],
// ORGANIZATION:
// [ 'Apple Inc.',
// 'Pixar Animation Studios',
// 'The Walt Disney Company',
// 'Pixar',
// 'NeXT Inc',
// 'Apple',
// 'Homestead High School',
// 'Homestead High',
// 'Reed College' ],
// LOCATION:
// [ 'San Francisco',
// 'San Francisco Bay Area',
// 'Cupertino',
// 'California',
// 'India' ] }
// Test2 Only Persons:-
// { PERSON:
// [ 'Steven Paul Jobs',
// 'Steve Wozniak',
// 'Walter Isaacson',
// 'Wozniak',
// 'Chrisann Brennan' ] }
// Test3 Only Organizations:-
// { ORGANIZATION:
// [ 'Apple Inc.',
// 'Pixar Animation Studios',
// 'The Walt Disney Company',
// 'Pixar',
// 'NeXT Inc',
// 'Apple',
// 'Homestead High School',
// 'Homestead High',
// 'Reed College' ] }
// Test4 Only Locations:-
// { LOCATION:
// [ 'San Francisco',
// 'San Francisco Bay Area',
// 'Cupertino',
// 'California',
// 'India' ] }
console.log(process.uptime() - startTime, "ms for 23 lines")
NER.close()
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc