@@ -5,2 +5,3 @@ var EventEmitter = require('events').EventEmitter;
		var Transform = require('stream').Transform;
		var disect = require('disect');

		@@ -29,4 +30,8 @@ function noop(){}
		process.nextTick(function () {
		self._tokenize(chunk);
		callback();
		try {
		self._tokenize(chunk);
		callback();
		} catch(e) {
		callback(e);
		}
		})
		@@ -36,53 +41,39 @@ };
		Tokenizer.prototype._tokenize = function _tokenize(data, nobuffer) {
		var regexes = this._regexes;
		// in case we buffered data on previous writes
		data = this._buffered + data;
		// if we couldn't tokenize it last time, no need to retry
		var i = this._buffered.length;
		// the index at which unparsed data begins
		var last_tokenized = 0;
		var matching; // array of matching rules of the previous iteration
		this._buffered = '';
		if(!data.length) {
		return;
		}

		while(i <= data.length) {
		// we take a little bit of the data an try to match it
		var buf = data.substring(last_tokenized, i);
		if(!buf.length) { ++i; continue; } // ignore ""
		// create a list of the rules matching this bit
		var m = this._regexes.filter(function(e) {
		return e.regex.test(buf);
		});
		// if no match now...
		if(!m.length) {
		// ... and no match during the last iteration
		if(!matching \|\| !matching.length) {
		// something went wrong
		this.emit('error', new SyntaxError('could not parse '+JSON.stringify(buf)));
		this._tokenize = noop;
		return;
		}
		// if something was matching for the previous bit
		// this is our token
		else {
		var token = buf.substr(0, buf.length-1);
		this._gotToken(token, matching[0]);
		last_tokenized = --i; // adjust these values
		matching = null; // start matching something else
		}
		}
		// we got some matches
		// let's see if it still matches on the next iteration
		else {
		matching = m;
		}
		++i;
		var maxIndex = disect(0, data.length, function (index) {
		var buf = data.substr(0, index);
		var matching = regexes.filter(function (e) {
		return e.regex.test(buf);
		});
		return matching.length === 0;
		});
		if(maxIndex > 0) {
		if(maxIndex === data.length) {
		var str = data.substr(0, maxIndex);
		}
		else {
		var str = data.substr(0, maxIndex - 1);
		}
		if(!nobuffer && (maxIndex === data.length)) {
		this._buffered = data;
		return;
		}
		var matching = regexes.filter(function (e) {
		return e.regex.test(str);
		});
		if(!matching.length) {
		throw new Error('wut ?');
		}
		this._gotToken(str, matching[0]);
		this._tokenize(data.substr(maxIndex), nobuffer);
		}
		// no other data is coming, we can emit what we have
		if(nobuffer) {
		// when no actual data was tokenized, matching is undefined
		if(matching) {
		this._gotToken(data.substr(last_tokenized), matching[0]);
		}
		}
		// buffer data for the next write
		else {
		this._buffered = data.substring(last_tokenized);
		throw new SyntaxError('could not parse '+JSON.stringify(data));
		}
		@@ -94,4 +85,8 @@ };
		process.nextTick(function () {
		self._tokenize('', true);
		callback();
		try {
		self._tokenize('', true);
		callback();
		} catch(e) {
		callback(e);
		}
		});
		@@ -98,0 +93,0 @@ };

package.json

		{
		"name": "tokenizer",
		"description": "A wide purpose tokenizer for node.js which looks like a stream",
		"version": "1.0.1",
		"version": "1.1.0",
		"homepage": "http://github.com/floby/node-tokenizer",
		@@ -20,3 +20,9 @@ "repository": {
		"node": "0.10.x"
		},
		"devDependencies": {
		"nodeunit": "~0.8.1"
		},
		"dependencies": {
		"disect": "~1.1.0"
		}
		}

README.md

		@@ -0,1 +1,3 @@
		[![Build Status](https://travis-ci.org/Floby/node-tokenizer.png)](https://travis-ci.org/Floby/node-tokenizer)

		# Synopsis
		@@ -2,0 +4,0 @@ A wide purpose tokenizer for JavaScript. The interface follows more or less

test/test-tokenizer.js

		@@ -114,1 +114,18 @@ var tokenizer = require('../');
		}.withDomain();

		exports['words in two chunks'] = function(test) {
		var strings = ["Hello", "World"];
		var t = tokenizer();
		t.addRule('word');
		t.addRule('whitespace');
		t.ignore('whitespace');
		test.expect(2 * 2);
		t.on('data', function(token) {
		console.log('got token', token)
		test.equal('word', token.type);
		test.equal(token , strings.shift(), "We should get the values we input");
		});
		t.on('end', test.done.bind(test));
		t.write('Hell');
		t.end('o World');
		}.withDomain();

tokenizer - npm Package Compare versions

Improved metrics

Worsened metrics

Dependency changes