Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

tokenizer

Package Overview
Dependencies
Maintainers
1
Versions
11
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

tokenizer - npm Package Compare versions

Comparing version 1.0.1 to 1.1.0

.travis.yml

95

lib/Tokenizer.js

@@ -5,2 +5,3 @@ var EventEmitter = require('events').EventEmitter;

var Transform = require('stream').Transform;
var disect = require('disect');

@@ -29,4 +30,8 @@ function noop(){}

process.nextTick(function () {
self._tokenize(chunk);
callback();
try {
self._tokenize(chunk);
callback();
} catch(e) {
callback(e);
}
})

@@ -36,53 +41,39 @@ };

Tokenizer.prototype._tokenize = function _tokenize(data, nobuffer) {
var regexes = this._regexes;
// in case we buffered data on previous writes
data = this._buffered + data;
// if we couldn't tokenize it last time, no need to retry
var i = this._buffered.length;
// the index at which unparsed data begins
var last_tokenized = 0;
var matching; // array of matching rules of the previous iteration
this._buffered = '';
if(!data.length) {
return;
}
while(i <= data.length) {
// we take a little bit of the data an try to match it
var buf = data.substring(last_tokenized, i);
if(!buf.length) { ++i; continue; } // ignore ""
// create a list of the rules matching this bit
var m = this._regexes.filter(function(e) {
return e.regex.test(buf);
});
// if no match now...
if(!m.length) {
// ... and no match during the last iteration
if(!matching || !matching.length) {
// something went wrong
this.emit('error', new SyntaxError('could not parse '+JSON.stringify(buf)));
this._tokenize = noop;
return;
}
// if something was matching for the previous bit
// this is our token
else {
var token = buf.substr(0, buf.length-1);
this._gotToken(token, matching[0]);
last_tokenized = --i; // adjust these values
matching = null; // start matching something else
}
}
// we got some matches
// let's see if it still matches on the next iteration
else {
matching = m;
}
++i;
var maxIndex = disect(0, data.length, function (index) {
var buf = data.substr(0, index);
var matching = regexes.filter(function (e) {
return e.regex.test(buf);
});
return matching.length === 0;
});
if(maxIndex > 0) {
if(maxIndex === data.length) {
var str = data.substr(0, maxIndex);
}
else {
var str = data.substr(0, maxIndex - 1);
}
if(!nobuffer && (maxIndex === data.length)) {
this._buffered = data;
return;
}
var matching = regexes.filter(function (e) {
return e.regex.test(str);
});
if(!matching.length) {
throw new Error('wut ?');
}
this._gotToken(str, matching[0]);
this._tokenize(data.substr(maxIndex), nobuffer);
}
// no other data is coming, we can emit what we have
if(nobuffer) {
// when no actual data was tokenized, matching is undefined
if(matching) {
this._gotToken(data.substr(last_tokenized), matching[0]);
}
}
// buffer data for the next write
else {
this._buffered = data.substring(last_tokenized);
throw new SyntaxError('could not parse '+JSON.stringify(data));
}

@@ -94,4 +85,8 @@ };

process.nextTick(function () {
self._tokenize('', true);
callback();
try {
self._tokenize('', true);
callback();
} catch(e) {
callback(e);
}
});

@@ -98,0 +93,0 @@ };

{
"name": "tokenizer",
"description": "A wide purpose tokenizer for node.js which looks like a stream",
"version": "1.0.1",
"version": "1.1.0",
"homepage": "http://github.com/floby/node-tokenizer",

@@ -20,3 +20,9 @@ "repository": {

"node": "0.10.x"
},
"devDependencies": {
"nodeunit": "~0.8.1"
},
"dependencies": {
"disect": "~1.1.0"
}
}

@@ -0,1 +1,3 @@

[![Build Status](https://travis-ci.org/Floby/node-tokenizer.png)](https://travis-ci.org/Floby/node-tokenizer)
# Synopsis

@@ -2,0 +4,0 @@ A wide purpose tokenizer for JavaScript. The interface follows more or less

@@ -114,1 +114,18 @@ var tokenizer = require('../');

}.withDomain();
exports['words in two chunks'] = function(test) {
var strings = ["Hello", "World"];
var t = tokenizer();
t.addRule('word');
t.addRule('whitespace');
t.ignore('whitespace');
test.expect(2 * 2);
t.on('data', function(token) {
console.log('got token', token)
test.equal('word', token.type);
test.equal(token , strings.shift(), "We should get the values we input");
});
t.on('end', test.done.bind(test));
t.write('Hell');
t.end('o World');
}.withDomain();
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc