Comparing version 1.0.1 to 1.1.0
@@ -5,2 +5,3 @@ var EventEmitter = require('events').EventEmitter; | ||
var Transform = require('stream').Transform; | ||
var disect = require('disect'); | ||
@@ -29,4 +30,8 @@ function noop(){} | ||
process.nextTick(function () { | ||
self._tokenize(chunk); | ||
callback(); | ||
try { | ||
self._tokenize(chunk); | ||
callback(); | ||
} catch(e) { | ||
callback(e); | ||
} | ||
}) | ||
@@ -36,53 +41,39 @@ }; | ||
Tokenizer.prototype._tokenize = function _tokenize(data, nobuffer) { | ||
var regexes = this._regexes; | ||
// in case we buffered data on previous writes | ||
data = this._buffered + data; | ||
// if we couldn't tokenize it last time, no need to retry | ||
var i = this._buffered.length; | ||
// the index at which unparsed data begins | ||
var last_tokenized = 0; | ||
var matching; // array of matching rules of the previous iteration | ||
this._buffered = ''; | ||
if(!data.length) { | ||
return; | ||
} | ||
while(i <= data.length) { | ||
// we take a little bit of the data an try to match it | ||
var buf = data.substring(last_tokenized, i); | ||
if(!buf.length) { ++i; continue; } // ignore "" | ||
// create a list of the rules matching this bit | ||
var m = this._regexes.filter(function(e) { | ||
return e.regex.test(buf); | ||
}); | ||
// if no match now... | ||
if(!m.length) { | ||
// ... and no match during the last iteration | ||
if(!matching || !matching.length) { | ||
// something went wrong | ||
this.emit('error', new SyntaxError('could not parse '+JSON.stringify(buf))); | ||
this._tokenize = noop; | ||
return; | ||
} | ||
// if something was matching for the previous bit | ||
// this is our token | ||
else { | ||
var token = buf.substr(0, buf.length-1); | ||
this._gotToken(token, matching[0]); | ||
last_tokenized = --i; // adjust these values | ||
matching = null; // start matching something else | ||
} | ||
} | ||
// we got some matches | ||
// let's see if it still matches on the next iteration | ||
else { | ||
matching = m; | ||
} | ||
++i; | ||
var maxIndex = disect(0, data.length, function (index) { | ||
var buf = data.substr(0, index); | ||
var matching = regexes.filter(function (e) { | ||
return e.regex.test(buf); | ||
}); | ||
return matching.length === 0; | ||
}); | ||
if(maxIndex > 0) { | ||
if(maxIndex === data.length) { | ||
var str = data.substr(0, maxIndex); | ||
} | ||
else { | ||
var str = data.substr(0, maxIndex - 1); | ||
} | ||
if(!nobuffer && (maxIndex === data.length)) { | ||
this._buffered = data; | ||
return; | ||
} | ||
var matching = regexes.filter(function (e) { | ||
return e.regex.test(str); | ||
}); | ||
if(!matching.length) { | ||
throw new Error('wut ?'); | ||
} | ||
this._gotToken(str, matching[0]); | ||
this._tokenize(data.substr(maxIndex), nobuffer); | ||
} | ||
// no other data is coming, we can emit what we have | ||
if(nobuffer) { | ||
// when no actual data was tokenized, matching is undefined | ||
if(matching) { | ||
this._gotToken(data.substr(last_tokenized), matching[0]); | ||
} | ||
} | ||
// buffer data for the next write | ||
else { | ||
this._buffered = data.substring(last_tokenized); | ||
throw new SyntaxError('could not parse '+JSON.stringify(data)); | ||
} | ||
@@ -94,4 +85,8 @@ }; | ||
process.nextTick(function () { | ||
self._tokenize('', true); | ||
callback(); | ||
try { | ||
self._tokenize('', true); | ||
callback(); | ||
} catch(e) { | ||
callback(e); | ||
} | ||
}); | ||
@@ -98,0 +93,0 @@ }; |
{ | ||
"name": "tokenizer", | ||
"description": "A wide purpose tokenizer for node.js which looks like a stream", | ||
"version": "1.0.1", | ||
"version": "1.1.0", | ||
"homepage": "http://github.com/floby/node-tokenizer", | ||
@@ -20,3 +20,9 @@ "repository": { | ||
"node": "0.10.x" | ||
}, | ||
"devDependencies": { | ||
"nodeunit": "~0.8.1" | ||
}, | ||
"dependencies": { | ||
"disect": "~1.1.0" | ||
} | ||
} |
@@ -0,1 +1,3 @@ | ||
[![Build Status](https://travis-ci.org/Floby/node-tokenizer.png)](https://travis-ci.org/Floby/node-tokenizer) | ||
# Synopsis | ||
@@ -2,0 +4,0 @@ A wide purpose tokenizer for JavaScript. The interface follows more or less |
@@ -114,1 +114,18 @@ var tokenizer = require('../'); | ||
}.withDomain(); | ||
exports['words in two chunks'] = function(test) { | ||
var strings = ["Hello", "World"]; | ||
var t = tokenizer(); | ||
t.addRule('word'); | ||
t.addRule('whitespace'); | ||
t.ignore('whitespace'); | ||
test.expect(2 * 2); | ||
t.on('data', function(token) { | ||
console.log('got token', token) | ||
test.equal('word', token.type); | ||
test.equal(token , strings.shift(), "We should get the values we input"); | ||
}); | ||
t.on('end', test.done.bind(test)); | ||
t.write('Hell'); | ||
t.end('o World'); | ||
}.withDomain(); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
15234
11
367
96
1
1
+ Addeddisect@~1.1.0
+ Addeddisect@1.1.1(transitive)