Socket
Socket
Sign inDemoInstall

node-normalizer

Package Overview
Dependencies
Maintainers
1
Versions
21
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

node-normalizer - npm Package Compare versions

Comparing version 0.0.11 to 0.1.0

2

data/substitutes.txt

@@ -1259,3 +1259,3 @@ a.i. artificial+intelligence

quite
raelly
raelly really
rather_then rather+than

@@ -1262,0 +1262,0 @@ real_actual actual

@@ -7,8 +7,21 @@ var fs = require('fs');

var async = require("async");
var RE2 = require("re2");
var debug = require('debug')("Normalizer");
var re1 = new RE2(/\+/g);
var re2 = new RE2(/\t/g);
var re3 = new RE2(/\s+/g);
var re4 = new RE2(/(’|‘)/g);
var re5 = new RE2(/(“|”)/g);
var re6 = new RE2(/(–|—)/g);
var re7 = new RE2(/[^\x00-\x7F]/g);
var re8 = new RE2(/[\+]{1}/g);
var re9 = new RE2(/<plus>/g);
var re10 = new RE2(/\d,\d/g);
var re11 = new RE2(/_/g);
// TODO, fix the paths
var tasks = [
{'key':'_sys','file':'systemessentials.txt'},
{'key':'_extra','file':'substitutes.txt'},
{'key':'_sys','file':'systemessentials.txt'},
{'key':'_extra','file':'substitutes.txt'},
{'key':'_contractions','file':'contractions.txt'},

@@ -25,3 +38,3 @@ {'key':'_interjections','file':'interjections.txt'},

var p = path.join(__dirname, "../data/", file)
var p = path.join(__dirname, "../data/", file);
var data = fs.readFileSync(p,'utf8').split("\r\n");

@@ -39,4 +52,4 @@

if (parts[1] == undefined) {
lineHandle(parts[0], "");
if (parts[1] === undefined) {
lineHandle(parts[0], "");
} else {

@@ -47,3 +60,3 @@

} else if (pos > 0) {
} else if (pos > 0) {
nline = nline.left(pos);

@@ -56,3 +69,3 @@ var parts = nline.s.split(" ");

closeHandle();
}
};

@@ -84,7 +97,6 @@ exports.loadData = function(cb){

endM = true;
lookup = lookup.substring(0, lookup.length - 1);
lookup = lookup.substring(0, lookup.length - 1);
}
lookup = lookup.replace(/_/g," ");
var qm = quotemeta(lookup);
var qm = quotemeta(re11.replace(lookup, " "));

@@ -110,3 +122,3 @@ if (startM && endM) {

}
}
};

@@ -120,3 +132,3 @@ readSubstitutes(item.file, lineHandle, function() {

});
}
};

@@ -131,17 +143,15 @@ async.map(tasks, itor, function(){

msg = msg.replace(/\+/g, "<plus>");
msg = msg.replace(new RegExp("\t", "g"), " ");
msg = msg.replace(/\s+/g, " ");
msg = msg.replace(/(’|‘)/g, "'");
msg = msg.replace(/(“|”)/g, '"');
msg = msg.replace(/(–|—)/g, "—");
msg = msg.replace(/[^\x00-\x7F]/g, "");
msg = re1.replace(msg, "<plus>");
msg = re2.replace(msg, " ");
msg = re3.replace(msg, " ");
msg = re4.replace(msg, "'");
msg = re5.replace(msg, '"');
msg = re6.replace(msg, "—");
msg = re7.replace(msg, "");
var fileItor = function(item1, next1) {
var itemItor = function(item2, next2) {
var reArray = reSet[item1][item2];
var reItor = function(item3, next3) {
var pm = msg;
// msg = item3.re.replace(msg, item3.r);
msg = msg.replace(item3.re, item3.r);

@@ -154,17 +164,17 @@ next3(null);

});
}
};
async.each(Object.keys(reSet[item1]), itemItor, function(){
next1(null)
next1(null);
});
}
};
async.mapSeries(Object.keys(reSet), fileItor, function() {
msg = msg.replace(new RegExp("[\+]{1}", "g"), " ");
msg = msg.replace(new RegExp("<plus>", "g"), "+");
msg = msg.replace(/\d,\d/g, function(v) { return v.replace(",",""); });
msg = re8.replace(msg, " ");
msg = re9.replace(msg, "+");
msg = re10.replace(msg, function(v) { return v.replace(",",""); });
});
return msg.trim();
}
return msg.trim();
};

@@ -177,2 +187,2 @@ var quotemeta = function (string) {

return string;
};
};
{
"name": "node-normalizer",
"version": "0.0.11",
"version": "0.1.0",
"description": "Normalize and clean text",

@@ -12,5 +12,6 @@ "main": "index.js",

"dependencies": {
"string": "~1.8.0",
"async": "~0.7.0",
"debug": "~0.8.0",
"async": "~0.7.0"
"re2": "^1.3.0",
"string": "~1.8.0"
},

@@ -17,0 +18,0 @@ "devDependencies": {

@@ -1,3 +0,3 @@

var mocha = require("mocha");
var mocha = require("mocha");
var should = require("should");

@@ -8,66 +8,66 @@ var norm = require("../index");

before(function(done){
norm.loadData(function(){
done();
});
});
before(function(done){
norm.loadData(function(){
done();
});
});
describe('Should clean input', function() {
describe('Should clean input', function() {
it("should replace subsitutes", function() {
norm.clean("Nov 1st I weighed 90 kgs. total").should.eql("November 1st I weighed 90 kilograms total");
norm.clean("I shared it on FB w/ friends, ie: you").should.eql("I shared it on Facebook with friends, for example : you");
});
it("should replace subsitutes", function() {
norm.clean("Nov 1st I weighed 90 kgs. total").should.eql("November 1st I weighed 90 kilograms total");
norm.clean("I shared it on FB w/ friends, ie: you").should.eql("I shared it on Facebook with friends, for example : you");
});
it("should expand contractions", function() {
norm.clean("I'm on the yelow zebra").should.eql("I am on the yellow zebra");
norm.clean("I'll listen to y'all").should.eql("I will listen to you all");
norm.clean("do n't make it right").should.eql("do not make it right");
norm.clean("it's all good").should.eql("it is all good");
});
it("should expand contractions", function() {
norm.clean("I'm on the yelow zebra").should.eql("I am on the yellow zebra");
norm.clean("I'll listen to y'all").should.eql("I will listen to you all");
norm.clean("do n't make it right").should.eql("do not make it right");
norm.clean("it's all good").should.eql("it is all good");
});
it("should swap british / canadian words", function() {
norm.clean("armour axe coloured gold").should.eql("armor ax colored gold");
});
it("should swap british / canadian words", function() {
norm.clean("armour axe coloured gold").should.eql("armor ax colored gold");
});
it("should fix spelling", function() {
norm.clean("are we sceduled thrsday for teh restraunt").should.eql("are we scheduled Thursday for the restaurant");
});
it("should fix spelling", function() {
norm.clean("are we sceduled thrsday for teh restraunt").should.eql("are we scheduled Thursday for the restaurant");
});
it("should expand txt speak", function() {
norm.clean("n").should.eql("~no");
norm.clean("lol").should.eql("~emolaugh");
norm.clean("haha").should.eql("~emolaugh");
norm.clean(":)").should.eql("~emohappy");
});
it("should expand txt speak", function() {
norm.clean("n").should.eql("~no");
norm.clean("lol").should.eql("~emolaugh");
norm.clean("haha").should.eql("~emolaugh");
norm.clean(":)").should.eql("~emohappy");
});
it("should clean this", function() {
norm.clean("Well , I could not help it, could I").should.eql("I could not help it, could I")
});
it("should clean this", function() {
norm.clean("Well , I could not help it, could I").should.eql("I could not help it, could I")
});
it("should not remove +", function() {
norm.clean("3+4=7").should.eql("3+4=7");
});
it("should not remove +", function() {
norm.clean("3+4=7").should.eql("3+4=7");
});
it("should remove extra spaces", function() {
norm.clean("this is spaced out").should.eql("this is spaced out");
});
it("should remove extra spaces", function() {
norm.clean("this is spaced out").should.eql("this is spaced out");
});
it("should remove punct", function() {
norm.clean("why do i care?").should.eql("why do I care");
});
it("should remove punct", function() {
norm.clean("why do i care?").should.eql("why do I care");
});
it("Fix numbers", function() {
norm.clean("how much is 1,000.00").should.eql("how much is 1000.00");
});
it("Fix ASCII characters", function() {
norm.clean("What’s up").should.eql("what is up");
norm.clean("What's up").should.eql("what is up");
norm.clean("I said “shut up”").should.eql('I said "shut up"');
norm.clean("œ").should.eql('');
});
it("Fix numbers", function() {
norm.clean("how much is 1,000.00").should.eql("how much is 1000.00");
});
it("Fix ASCII characters", function() {
norm.clean("What’s up").should.eql("what is up");
norm.clean("What's up").should.eql("what is up");
norm.clean("I said “shut up”").should.eql('I said "shut up"');
norm.clean("œ").should.eql('');
});
});
});
});
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc