calais-entity-extractor
Advanced tools
Comparing version 1.1.1 to 1.2.0
@@ -5,3 +5,3 @@ | ||
var Calais = require('calais-entity-extractor').Calais; | ||
var Calais = require('./lib/calais-entity-extractor.js').Calais; | ||
@@ -18,2 +18,22 @@ //You can enter options as the second parameter. | ||
// Example entity lookup | ||
calais.lookup("1-4295861160", function(result, err) { | ||
if (err) | ||
return console.log("Error: " + err); | ||
console.log("Result: " + util.inspect(result, false, null)); | ||
}); | ||
// Example entity searching | ||
calais.search("IBM", function(result, err) { | ||
if (err) | ||
return console.log("Error: " + err); | ||
console.log("Result: " + util.inspect(result, false, null)); | ||
}); | ||
/* | ||
//Example text entity tagging functionality. | ||
calais.extractFromText(function(result, err) { //perform the request | ||
@@ -52,2 +72,3 @@ if (err) { | ||
}); | ||
}); | ||
}); | ||
*/ |
@@ -21,2 +21,5 @@ var request = require('request'); | ||
this.entitySearchUrl = "https://api.thomsonreuters.com:443/permid/search"; | ||
this.entityLookupUrl = "https://permid.org:443/"; | ||
this._setDefaultOptions(options); | ||
@@ -44,2 +47,4 @@ }, | ||
var tags = [ ]; | ||
var industries = [ ]; | ||
for(var i in result) { | ||
@@ -86,2 +91,11 @@ var p = result[i]; | ||
} | ||
} else if (p._typeGroup == 'industry') { | ||
var industry = { | ||
'name' : p['name'], | ||
'rcscode' : p['rcscode'], | ||
'trbccode' : p['trbccode'], | ||
'permid' : p['permid'], | ||
'relevance' : p['relevance'] | ||
}; | ||
industries.push(industry); | ||
} | ||
@@ -92,5 +106,167 @@ } | ||
return {'entities' : entities, 'tags' : tags }; | ||
return {'entities' : entities, 'tags' : tags, 'industries' : industries }; | ||
}, | ||
_parseCalaisSearchResult: function(data) { | ||
var organizations = []; | ||
var instruments = []; | ||
var quotes = []; | ||
if (!data.hasOwnProperty('result')) | ||
return { 'organizations' : organizations, 'instruments' : instruments, 'quotes' : quotes }; | ||
if (data.result.hasOwnProperty('organizations') && data.result.organizations.hasOwnProperty('entities')) { | ||
var orgs = data.result.organizations.entities; | ||
for (var i = 0, len = orgs.length; i < len; i++) { | ||
var entity = { }; | ||
if (orgs[i]["@id"]) { | ||
entity['id'] = orgs[i]["@id"]; | ||
if(entity['id'].indexOf("/") != -1) | ||
entity['id'] = entity['id'].substring(entity['id'].lastIndexOf("/")+1); | ||
} | ||
if (orgs[i].hasOwnProperty("organizationName")) | ||
entity['name'] = orgs[i].organizationName; | ||
if (orgs[i].hasOwnProperty("primaryTicker")) | ||
entity['ticker'] = orgs[i].primaryTicker; | ||
if (orgs[i].hasOwnProperty("orgSubtype")) | ||
entity['type'] = orgs[i].orgSubtype; | ||
if (orgs[i].hasOwnProperty("hasHoldingClassification")) | ||
entity['public'] = true; | ||
else | ||
entity['public'] = false; | ||
if (orgs[i].hasOwnProperty("hasURL")) | ||
entity['url'] = orgs[i].hasURL; | ||
organizations.push(entity); | ||
} | ||
} | ||
if (data.result.hasOwnProperty('instruments') && data.result.instruments.hasOwnProperty('entities')) { | ||
var ins = data.result.instruments.entities; | ||
for (var i = 0, len = ins.length; i < len; i++) { | ||
var entity = { }; | ||
if (ins[i].hasOwnProperty("@id")) { | ||
entity['id'] = ins[i]["@id"]; | ||
if(entity['id'].indexOf("/") != -1) | ||
entity['id'] = entity['id'].substring(entity['id'].lastIndexOf("/")+1); | ||
} | ||
if (ins[i].hasOwnProperty("hasName")) | ||
entity['name'] = ins[i].hasName; | ||
if (ins[i].hasOwnProperty("assetClass")) | ||
entity['assetClass'] = ins[i].assetClass; | ||
if (ins[i].hasOwnProperty("isIssuedByName")) | ||
entity['issuerName'] = ins[i].isIssuedByName; | ||
if (ins[i].hasOwnProperty("isIssuedBy")) { | ||
entity['issuerId'] = ins[i].isIssuedBy; | ||
if(entity.issuerId.indexOf("/") != -1) | ||
entity.issuerId = entity.issuerId.substring(entity.issuerId.lastIndexOf("/")+1); | ||
} | ||
if (ins[i].hasPrimaryQuote) { | ||
entity['primaryQuoteId'] = ins[i].hasPrimaryQuote; | ||
if (entity.primaryQuoteId.indexOf("/") != -1) | ||
entity.primaryQuoteId = entity.primaryQuoteId.substring(entity.primaryQuoteId.lastIndexOf("/")+1); | ||
} | ||
if (ins[i].hasOwnProperty("primaryTicker")) | ||
entity['primaryTicker'] = ins[i].primaryTicker; | ||
instruments.push(entity); | ||
} | ||
} | ||
if (data.result.hasOwnProperty('quotes') && data.result.quotes.hasOwnProperty('entities')) { | ||
var quo = data.result.quotes.entities; | ||
for (var i = 0, len = quo.length; i < len; i++) { | ||
var entity = { }; | ||
if (quo[i]["@id"]) { | ||
entity['id'] = ins[i]["@id"]; | ||
if(entity['id'].indexOf("/") != -1) | ||
entity['id'] = entity['id'].substring(entity['id'].lastIndexOf("/")+1); | ||
} | ||
if (quo[i].hasName) | ||
entity['name'] = quo[i].hasName; | ||
if (quo[i].assetClass) | ||
entity['class'] = quo[i].assetClass; | ||
if (quo[i].isQuoteOfInstrumentName) | ||
entity['instrumentName'] = quo[i].isQuoteOfInstrumentName; | ||
if (quo[i].isIssuedByName) | ||
entity['issuer'] = quo[i].isIssuedByName; | ||
if (quo[i].hasRIC) | ||
entity["ric"] = quo[i].hasRIC; | ||
if (quo[i].hasMic) | ||
entity["mic"] = quo[i].hasMic; | ||
if (quo[i].hasExchangeTicker) | ||
entity["ticker"] = quo[i].hasExchangeTicker; | ||
if (quo[i].isQuoteOf) { | ||
entity["quoteId"] = quo[i].isQuoteOf; | ||
if (entity.quoteId.indexOf("/") != -1) | ||
entity.quoteId = entity.quoteId.substring(entity.quoteId.lastIndexOf("/")+1); | ||
} | ||
quotes.push(entity); | ||
} | ||
} | ||
return { 'organizations' : organizations, 'instruments' : instruments, 'quotes' : quotes }; | ||
}, | ||
_parseCalaisLookupResult: function(data) { | ||
var result = { }; | ||
if (data.hasOwnProperty("@id")) { | ||
result['id'] = data["@id"]; | ||
if(result.id.indexOf("/") != -1) | ||
result.id = result.id.substring(result.id.lastIndexOf("/")+1); | ||
} | ||
if (data.hasOwnProperty("@type")) { | ||
result['type'] = data["@type"]; | ||
if(result.type.indexOf(":") != -1) | ||
result.type = result.type.substring(result.type.lastIndexOf(":")+1); | ||
} | ||
if (data.hasOwnProperty("mdaas:HeadquartersAddress")) | ||
result.headquarters = data["mdaas:HeadquartersAddress"]; | ||
if (data.hasOwnProperty("mdaas:RegisteredAddress")) | ||
result.address = data["mdaas:RegisteredAddress"]; | ||
if (data.hasOwnProperty("hasOrganizationPrimaryQuote")) { | ||
result.primaryQuoteId = data["hasOrganizationPrimaryQuote"]; | ||
if(result.primaryQuoteId.indexOf("/") != -1) | ||
result.primaryQuoteId = result.primaryQuoteId.substring(result.primaryQuoteId.lastIndexOf("/")+1); | ||
} | ||
if (data.hasOwnProperty("hasPrimaryInstrument")) { | ||
result.primaryInstrumentId = data["hasPrimaryInstrument"]; | ||
if(result.primaryInstrumentId.indexOf("/") != -1) | ||
result.primaryInstrumentId = result.primaryInstrumentId.substring(result.primaryInstrumentId.lastIndexOf("/")+1); | ||
} | ||
if (data.hasOwnProperty("hasActivityStatus")) { | ||
result.status = data["hasActivityStatus"]; | ||
if(result.status.indexOf("status") != -1) | ||
result.status = result.status.substring(result.status.lastIndexOf("status")+6).toLowerCase(); | ||
} | ||
if (data.hasOwnProperty("tr-org:hasHeadquartersPhoneNumber")) | ||
result.headquartersPhoneNumber = data["tr-org:hasHeadquartersPhoneNumber"]; | ||
if (data.hasOwnProperty("hasHoldingClassification")) | ||
result.public = true; | ||
if (data.hasOwnProperty("hasIPODate")) | ||
result.ipoDate = data["hasIPODate"]; | ||
if (data.hasOwnProperty("hasLatestOrganizationFoundedDate")) | ||
result.founded = data["hasLatestOrganizationFoundedDate"]; | ||
if (data.hasOwnProperty("tr-org:hasRegisteredPhoneNumber")) | ||
result.registeredPhoneNumber = data["tr-org:hasRegisteredPhoneNumber"]; | ||
if (data.hasOwnProperty("isIncorporatedIn")) | ||
result.incorporatedLocation = data["isIncorporatedIn"]; | ||
if (data.hasOwnProperty("isDomiciledIn")) | ||
result.domiciledLocation = data["isDomiciledIn"]; | ||
if (data.hasOwnProperty("hasURL")) | ||
result.url = data["hasURL"]; | ||
if (data.hasOwnProperty("vcard:organization-name")) | ||
result.name = data["vcard:organization-name"]; | ||
return result; | ||
}, | ||
set: function (key, value) { | ||
@@ -240,2 +416,95 @@ this.options[key] = value; | ||
}, | ||
search: function(query, cb) { | ||
var calais = this; | ||
if (!calais.validateOptions()) | ||
return cb({}, 'Bad options'); | ||
//Make sure we were given a URL. | ||
if (this._undefinedOrNull(query) || typeof query != 'string' || query.length == 0) | ||
return cb({}, 'Invalid query given.'); | ||
var compositeUrl = calais.entitySearchUrl + "?q=" + query; | ||
//We can upload the html directly to Calais if we set the contentType as text/html | ||
var params = { | ||
'Host' : calais.options.apiHost, | ||
'x-ag-access-token' : calais.apiKey, | ||
'x-calais-language' : calais.options.language, | ||
'Accept' : 'application/json', | ||
'OutputFormat' : 'application/json' | ||
}; | ||
var options = { | ||
uri : compositeUrl, | ||
method : 'GET', | ||
headers: params | ||
}; | ||
request(options, function(error, response, calaisData) { | ||
if (error) | ||
return cb({}, error); | ||
if (response === undefined) { | ||
return cb({}, 'Undefined Calais response'); | ||
} else if (response.statusCode === 200) { | ||
// parse to a Javascript object if requested | ||
var result = JSON.parse(calaisData); | ||
result = (typeof result === 'string') ? JSON.parse(result) : result; | ||
var parsedResult = calais._parseCalaisSearchResult(result); | ||
return cb(parsedResult, calais.errors); | ||
} else | ||
return cb({}, 'Request error: ' + (typeof response === 'string' ? response : JSON.stringify(response))); | ||
}); | ||
}, | ||
lookup: function(identifier, cb) { | ||
var calais = this; | ||
if (!calais.validateOptions()) | ||
return cb({}, 'Bad options'); | ||
//Make sure we were given a URL. | ||
if (this._undefinedOrNull(identifier) || typeof identifier != 'string' || identifier.length == 0) | ||
return cb({}, 'Invalid identifier given.'); | ||
var compositeUrl = calais.entityLookupUrl + identifier + "?format=json-ld&access-token=" + calais.apiKey; | ||
var options = { | ||
uri : compositeUrl, | ||
method : 'GET' | ||
}; | ||
request(options, function(error, response, calaisData) { | ||
if (error) | ||
return cb({}, error); | ||
if (response === undefined) { | ||
return cb({}, 'Undefined Calais response'); | ||
} else if (response.statusCode === 200) { | ||
// parse to a Javascript object if requested | ||
var result = JSON.parse(calaisData); | ||
result = (typeof result === 'string') ? JSON.parse(result) : result; | ||
var parsedResult = calais._parseCalaisLookupResult(result); | ||
return cb(parsedResult, calais.errors); | ||
} else | ||
return cb({}, 'Request error: ' + (typeof response === 'string' ? response : JSON.stringify(response))); | ||
}); | ||
} | ||
@@ -242,0 +511,0 @@ }; |
{ | ||
"name": "calais-entity-extractor", | ||
"version": "1.1.1", | ||
"version": "1.2.0", | ||
"description": "Extract entities from text using Open Calais.", | ||
@@ -5,0 +5,0 @@ "scripts": { |
32392
473