lexing - npm Package Compare versions

Comparing version

0.1.4

0.2.0

test/simple.js

test/simple.ts

type_declarations/DefinitelyTyped/mocha/mocha.d.ts

388

index.js

		@@ -1,27 +0,56 @@
		var __extends = this.__extends \|\| function (d, b) {
		for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p];
		function __() { this.constructor = d; }
		__.prototype = b.prototype;
		d.prototype = new __();
		};
		/// <reference path="type_declarations/DefinitelyTyped/node/node.d.ts" />
		var fs = require('fs');
		var BufferedBufferReader = (function () {
		function BufferedBufferReader(buffer) {
		this.buffer = buffer;
		// #############################################################################
		// BASIC BUFFER READER
		/**
		Wraps a Buffer as a stateful iterable.
		*/
		var BufferIterator = (function () {
		function BufferIterator(_buffer, _position) {
		if (_position === void 0) { _position = 0; }
		this._buffer = _buffer;
		this._position = _position;
		}
		BufferedBufferReader.prototype.peekByte = function () {
		return this.buffer[0];
		BufferIterator.fromString = function (str, encoding) {
		var buffer = new Buffer(str, encoding);
		return new BufferIterator(buffer);
		};
		BufferedBufferReader.prototype.peekBuffer = function (length) {
		return this.buffer.slice(0, length);
		Object.defineProperty(BufferIterator.prototype, "position", {
		/**
		Return the current position within the underlying Buffer.
		*/
		get: function () {
		return this._position;
		},
		enumerable: true,
		configurable: true
		});
		Object.defineProperty(BufferIterator.prototype, "size", {
		/**
		Return the total length of the underlying Buffer.
		*/
		get: function () {
		return this._buffer.length;
		},
		enumerable: true,
		configurable: true
		});
		/**
		Read the next `length` bytes from the underlying Buffer, or fewer iff we reach
		EOF, without advancing our position within the Buffer. Returns a Buffer slice.
		*/
		BufferIterator.prototype.peek = function (length) {
		return this._buffer.slice(this._position, this._position + length);
		};
		BufferedBufferReader.prototype.readByte = function () {
		var byte = this.peekByte();
		this.buffer = this.buffer.slice(1);
		return byte;
		};
		BufferedBufferReader.prototype.readBuffer = function (length) {
		var buffer = this.peekBuffer(length);
		this.buffer = this.buffer.slice(length);
		/**
		Read the next `length` bytes from the underlying Buffer, or fewer iff we reach
		EOF, and advance our position within the Buffer. Returns a Buffer slice.

		Buffer#slice never returns entries beyond the end of the buffer:

		`new Buffer([1, 2, 3, 4]).slice(2, 10)` produces `<Buffer 03 04>`
		*/
		BufferIterator.prototype.next = function (length) {
		var buffer = this._buffer.slice(this._position, this._position + length);
		this._position += buffer.length;
		return buffer;
		@@ -32,50 +61,83 @@ };
		characters (which may be < `length` iff EOF has been reached).

		We do not allow skipping beyond the end of the buffer.
		*/
		BufferedBufferReader.prototype.skip = function (length) {
		// we cannot skip more than `this.buffer.length` bytes
		var bytesSkipped = Math.min(length, this.buffer.length);
		this.buffer = this.buffer.slice(length);
		BufferIterator.prototype.skip = function (length) {
		var bytesSkipped = Math.min(length, this._buffer.length - this._position);
		this._position += bytesSkipped;
		return bytesSkipped;
		};
		BufferedBufferReader.prototype.toString = function () {
		return this.buffer.toString();
		return BufferIterator;
		})();
		exports.BufferIterator = BufferIterator;
		/**
		Wrap an Array as an iterable.
		*/
		var ArrayIterator = (function () {
		function ArrayIterator(_array, position) {
		if (position === void 0) { position = 0; }
		this._array = _array;
		this.position = position;
		}
		Object.defineProperty(ArrayIterator.prototype, "size", {
		get: function () {
		return this._array.length;
		},
		enumerable: true,
		configurable: true
		});
		ArrayIterator.prototype.next = function () {
		return this._array[this.position++];
		};
		return BufferedBufferReader;
		ArrayIterator.prototype.peek = function () {
		return this._array[this.position + 1];
		};
		ArrayIterator.prototype.skip = function () {
		if (this.position < this._array.length) {
		this.position++;
		return true;
		}
		return false;
		};
		return ArrayIterator;
		})();
		exports.BufferedBufferReader = BufferedBufferReader;
		var BufferedStringReader = (function (_super) {
		__extends(BufferedStringReader, _super);
		function BufferedStringReader(input, encoding) {
		_super.call(this, new Buffer(input, encoding));
		}
		return BufferedStringReader;
		})(BufferedBufferReader);
		exports.BufferedStringReader = BufferedStringReader;
		exports.ArrayIterator = ArrayIterator;
		// #############################################################################
		// FILE READERS
		// SYNCHRONOUS FILE READER
		/**
		Provide buffered (and Buffer-friendly) access to a file.
		Provide iterative access to a file.

		It is buffered, which means you can call `peek(same_number)` repeatedly without
		triggering a `read(2)` system call on the underlying file each time. Likewise,
		calling `read(small_number)` repeatedly will issue a `read(2)` system call only
		when the buffer doesn't have enough data.

		When calling `read()` on the underlying file, it will read batches of
		`_block_size` (default: 1024) bytes.
		*/
		var BufferedFileReader = (function () {
		function BufferedFileReader(fd, file_position) {
		if (file_position === void 0) { file_position = 0; }
		this.fd = fd;
		this.file_position = file_position;
		this.buffer = new Buffer(0);
		var FileIterator = (function () {
		// when reading more data, pull in chunks of `_block_size` bytes.
		function FileIterator(_fd, _position, _block_size) {
		if (_position === void 0) { _position = 0; }
		if (_block_size === void 0) { _block_size = 1024; }
		this._fd = _fd;
		this._position = _position;
		this._block_size = _block_size;
		this._buffer = new Buffer(0);
		}
		BufferedFileReader.open = function (filepath) {
		FileIterator.open = function (filepath) {
		var fd = fs.openSync(filepath, 'r');
		return new BufferedFileReader(fd);
		return new FileIterator(fd);
		};
		BufferedFileReader.prototype.close = function () {
		fs.closeSync(this.fd);
		FileIterator.prototype.close = function () {
		fs.closeSync(this._fd);
		};
		Object.defineProperty(BufferedFileReader.prototype, "position", {
		Object.defineProperty(FileIterator.prototype, "position", {
		/**
		Return the position in the file that would be read from if we called
		readBuffer(...). This is different from the internally-held position, which
		read(...). This is different from the internally-held position, which
		points to the end of the currently held buffer.
		*/
		get: function () {
		return this.file_position - this.buffer.length;
		return this._position - this._buffer.length;
		},
		@@ -85,25 +147,24 @@ enumerable: true,
		});
		Object.defineProperty(FileIterator.prototype, "size", {
		/**
		Return the total size (in bytes) of the underlying file.
		*/
		get: function () {
		return fs.fstatSync(this._fd).size;
		},
		enumerable: true,
		configurable: true
		});
		/**
		Calls fs.readSync on the underlying file descriptor with pretty much the same
		argument signature.

		Returns `bytesRead`, the number of bytes that were read into the given Buffer.

		Node.js documentation for fs.read() / fs.readSync():
		> position is an integer specifying where to begin reading from in the file.
		> If position is null, data will be read from the current file position.
		*/
		BufferedFileReader.prototype.read = function (buffer, offset, length, position) {
		return fs.readSync(this.fd, buffer, offset, length, position);
		};
		/**
		Ensure that the available buffer is at least `length` bytes long.

		This may return without the condition being met of this.buffer.length >= length,
		This may return without the condition being met (this.buffer.length >= length),
		if the end of the underlying file has been reached.

		TODO: pull _fillBuffer into the loop, with the Buffer declaration outside.
		*/
		BufferedFileReader.prototype.ensureLength = function (length) {
		while (length > this.buffer.length) {
		FileIterator.prototype._ensureLength = function (length) {
		while (length > this._buffer.length) {
		// all the action happens only if we need more bytes than are in the buffer
		var EOF = this.fillBuffer(BufferedFileReader.BLOCK_SIZE);
		var EOF = this._fillBuffer(this._block_size);
		if (EOF) {
		@@ -118,30 +179,22 @@ break;
		Returns false iff EOF has been reached, otherwise returns true. */
		BufferedFileReader.prototype.fillBuffer = function (length) {
		FileIterator.prototype._fillBuffer = function (length) {
		var buffer = new Buffer(length);
		// always read from the reader's current position
		var bytesRead = this.read(buffer, 0, length, this.file_position);
		// always read from the current position
		var bytesRead = fs.readSync(this._fd, buffer, 0, length, this._position);
		// and update it accordingly
		this.file_position += bytesRead;
		this._position += bytesRead;
		// use the Buffer.concat totalLength argument to slice the fresh buffer if needed
		this.buffer = Buffer.concat([this.buffer, buffer], this.buffer.length + bytesRead);
		this._buffer = Buffer.concat([this._buffer, buffer], this._buffer.length + bytesRead);
		return bytesRead < length;
		};
		BufferedFileReader.prototype.peekByte = function () {
		this.ensureLength(1);
		return this.buffer[0];
		FileIterator.prototype.next = function (length) {
		this._ensureLength(length);
		var buffer = this._buffer.slice(0, length);
		this._buffer = this._buffer.slice(length);
		return buffer;
		};
		BufferedFileReader.prototype.peekBuffer = function (length) {
		this.ensureLength(length);
		return this.buffer.slice(0, length);
		FileIterator.prototype.peek = function (length) {
		this._ensureLength(length);
		return this._buffer.slice(0, length);
		};
		BufferedFileReader.prototype.readByte = function () {
		var byte = this.peekByte();
		this.buffer = this.buffer.slice(1);
		return byte;
		};
		BufferedFileReader.prototype.readBuffer = function (length) {
		var buffer = this.peekBuffer(length);
		this.buffer = this.buffer.slice(length);
		return buffer;
		};
		/**
		@@ -151,37 +204,61 @@ Skip over the next `length` characters, returning the number of skipped
		*/
		BufferedFileReader.prototype.skip = function (length) {
		this.ensureLength(length);
		FileIterator.prototype.skip = function (length) {
		this._ensureLength(length);
		// we cannot skip more than `this.buffer.length` bytes
		var bytesSkipped = Math.min(length, this.buffer.length);
		this.buffer = this.buffer.slice(length);
		var bytesSkipped = Math.min(length, this._buffer.length);
		this._buffer = this._buffer.slice(length);
		return bytesSkipped;
		};
		// when reading more data, pull in chunks of `BLOCK_SIZE` bytes.
		BufferedFileReader.BLOCK_SIZE = 1024;
		return BufferedFileReader;
		return FileIterator;
		})();
		exports.BufferedFileReader = BufferedFileReader;
		var BufferedLexer = (function () {
		function BufferedLexer(default_rules, state_rules) {
		exports.FileIterator = FileIterator;
		function Token(name, value) {
		if (value === void 0) { value = null; }
		return { name: name, value: value };
		}
		exports.Token = Token;
		/**
		The type T is the type of each token value, usually `any` (the token name is
		always a string).

		BufferIterable
		*/
		var Tokenizer = (function () {
		function Tokenizer(default_rules, state_rules) {
		if (state_rules === void 0) { state_rules = {}; }
		this.default_rules = default_rules;
		this.state_rules = state_rules;
		this.reset();
		}
		Tokenizer.prototype.getRules = function (state_name) {
		return (state_name === undefined) ? this.default_rules : this.state_rules[state_name];
		};
		/**
		Reset the Lexer back to its initial state.
		Create a closure around the iterable.

		Unfortunately, it seems that TypeScript doesn't like inline functions, so we
		use a helper class (TokenizerIterator).
		*/
		BufferedLexer.prototype.reset = function () {
		this.states = [];
		Tokenizer.prototype.map = function (iterable, states) {
		if (states === void 0) { states = []; }
		return new TokenizerIterator(this, iterable, states);
		};
		return Tokenizer;
		})();
		exports.Tokenizer = Tokenizer;
		var TokenizerIterator = (function () {
		function TokenizerIterator(tokenizer, iterable, states) {
		this.tokenizer = tokenizer;
		this.iterable = iterable;
		this.states = states;
		}
		/**
		Returns the next available pair from the input reader (usually [token, data]).
		Returns the next available Token from the input reader.
		If the matching rule's action returns null, this will return null.

		If the matching rule's action returns null, this will return null.
		TODO: optimize string conversion; abstract out the peek + toString, back into the reader?
		*/
		BufferedLexer.prototype.read = function () {
		// TODO: abstract out the peekBuffer + toString, back into the reader?
		// optimize string conversion
		var input = this.reader.peekBuffer(256).toString('utf8');
		TokenizerIterator.prototype._next = function () {
		var state = this.states[this.states.length - 1];
		var rules = state ? this.state_rules[state] : this.default_rules;
		var rules = this.tokenizer.getRules(state);
		var input = this.iterable.peek(256).toString('utf8');
		for (var i = 0, rule; (rule = rules[i]); i++) {
		@@ -191,3 +268,3 @@ var match = input.match(rule[0]);
		var byteLength = Buffer.byteLength(match[0], 'utf8');
		this.reader.skip(byteLength);
		this.iterable.skip(byteLength);
		return rule[1].call(this, match);
		@@ -202,14 +279,83 @@ }

		This will never return null.
		This will never return null, but may return undefined if one of the rules
		returns undefined, which the rule should not do! It will never a Token with
		a null name.
		*/
		BufferedLexer.prototype.next = function () {
		var result;
		do {
		result = this.read();
		} while (result === null);
		return result;
		TokenizerIterator.prototype.next = function () {
		while (1) {
		var token = this._next();
		if (token !== null && token.name !== null) {
		return token;
		}
		}
		};
		return BufferedLexer;
		return TokenizerIterator;
		})();
		exports.BufferedLexer = BufferedLexer;
		/**
		Recombine a stream of tokens using a stack of lists, e.g.,

		WORD:BT START:STRING CHAR:A CHAR:b CHAR:c END:STRING WORD:ET

		becomes:

		WORD:BT STRING:Abc WORD:ET

		*/
		var Combiner = (function () {
		function Combiner(rules) {
		this.rules = rules;
		}
		Combiner.prototype.findRule = function (name) {
		for (var i = 0, rule; (rule = this.rules[i]); i++) {
		if (rule[0] === name) {
		return rule;
		}
		}
		throw new Error("No combiner rule found with the name: " + name);
		};
		Combiner.prototype.map = function (iterable, stack) {
		if (stack === void 0) { stack = []; }
		return new CombinerIterator(this, iterable, stack);
		};
		return Combiner;
		})();
		exports.Combiner = Combiner;
		var CombinerIterator = (function () {
		function CombinerIterator(combiner, iterable, stack) {
		this.combiner = combiner;
		this.iterable = iterable;
		this.stack = stack;
		}
		/**
		Returns the next available pair from the input reader (usually [token, data]).

		If the matching rule's action returns null, this will return null.
		*/
		CombinerIterator.prototype.next = function () {
		var token = this.iterable.next();
		if (token.name == 'END') {
		// TODO: check that the preceding START token has the same value
		var tokens = this.stack.pop();
		// type hack with <any>
		var rule = this.combiner.findRule(token.value);
		// reduce into combined token
		token = rule[1](tokens);
		}
		if (token.name == 'START') {
		// TODO: store the START token's value somewhere so that we can verify the END token's value matches
		this.stack.push([]);
		return this.next();
		}
		else if (this.stack.length > 0) {
		// push it onto the list at the top of the stack
		this.stack[this.stack.length - 1].push(token);
		return this.next();
		}
		else {
		// tokens at root level pass through transparently
		return token;
		}
		};
		return CombinerIterator;
		})();
		//// }

394

index.ts

		@@ -7,41 +7,80 @@ /// <reference path="type_declarations/DefinitelyTyped/node/node.d.ts" />
		// #############################################################################
		// READERS
		// ITERABLES

		export interface Reader {
		/**
		Reads a single byte.
		*/
		readByte(): number;
		/**
		Reads a series of bytes.
		*/
		readBuffer(length: number): Buffer;
		/**
		Any sort of sequence can implement Iterable<T>. It's a lot like Array<T>, but
		read-only, and without random access.
		*/
		export interface Iterable<T> {
		next(): T;
		}
		export interface StatefulIterable<T> extends Iterable<T> {
		position: number;
		size: number;
		peek(): T;
		skip(): boolean;
		}

		export interface BufferedReader extends Reader {
		peekByte(): number;
		peekBuffer(length: number): Buffer;
		/**
		In some cases, it makes more sense to iterate in batches, or chunks, through an
		iterable of a particular type. The T in ChunkedIterable<T> should itself be a
		sequence type, like string[] or Buffer.
		*/
		export interface ChunkedIterable<T> {
		next(length: number): T;
		}
		export interface StatefulChunkedIterable<T> extends ChunkedIterable<T> {
		position: number;
		size: number;
		peek(length: number): T;
		skip(length: number): number;
		}

		export class BufferedBufferReader implements BufferedReader {
		constructor(private buffer: Buffer) { }
		// #############################################################################
		// BASIC BUFFER READER

		peekByte(): number {
		return this.buffer[0];
		/**
		Wraps a Buffer as a stateful iterable.
		*/
		export class BufferIterator implements StatefulChunkedIterable<Buffer> {
		constructor(private _buffer: Buffer, private _position = 0) { }

		static fromString(str: string, encoding?: string): BufferIterator {
		var buffer = new Buffer(str, encoding);
		return new BufferIterator(buffer);
		}

		peekBuffer(length: number): Buffer {
		return this.buffer.slice(0, length);
		/**
		Return the current position within the underlying Buffer.
		*/
		get position(): number {
		return this._position;
		}

		readByte(): number {
		var byte = this.peekByte();
		this.buffer = this.buffer.slice(1);
		return byte;
		/**
		Return the total length of the underlying Buffer.
		*/
		get size(): number {
		return this._buffer.length;
		}

		readBuffer(length: number): Buffer {
		var buffer = this.peekBuffer(length);
		this.buffer = this.buffer.slice(length);
		/**
		Read the next `length` bytes from the underlying Buffer, or fewer iff we reach
		EOF, without advancing our position within the Buffer. Returns a Buffer slice.
		*/
		peek(length: number): Buffer {
		return this._buffer.slice(this._position, this._position + length);
		}

		/**
		Read the next `length` bytes from the underlying Buffer, or fewer iff we reach
		EOF, and advance our position within the Buffer. Returns a Buffer slice.

		Buffer#slice never returns entries beyond the end of the buffer:

		`new Buffer([1, 2, 3, 4]).slice(2, 10)` produces `<Buffer 03 04>`
		*/
		next(length: number): Buffer {
		var buffer = this._buffer.slice(this._position, this._position + length);
		this._position += buffer.length;
		return buffer;
		@@ -53,41 +92,66 @@ }
		characters (which may be < `length` iff EOF has been reached).

		We do not allow skipping beyond the end of the buffer.
		*/
		skip(length: number): number {
		// we cannot skip more than `this.buffer.length` bytes
		var bytesSkipped = Math.min(length, this.buffer.length);
		this.buffer = this.buffer.slice(length);
		var bytesSkipped = Math.min(length, this._buffer.length - this._position);
		this._position += bytesSkipped;
		return bytesSkipped;
		}
		}

		toString(): string {
		return this.buffer.toString();
		/**
		Wrap an Array as an iterable.
		*/
		export class ArrayIterator<T> implements StatefulIterable<T> {
		constructor(private _array: Array<T>, public position = 0) { }

		get size(): number {
		return this._array.length;
		}
		}

		export class BufferedStringReader extends BufferedBufferReader {
		constructor(input: string, encoding?: string) {
		super(new Buffer(input, encoding))
		next(): T {
		return this._array[this.position++];
		}

		peek(): T {
		return this._array[this.position + 1];
		}

		skip(): boolean {
		if (this.position < this._array.length) {
		this.position++;
		return true;
		}
		return false;
		}
		}

		// #############################################################################
		// FILE READERS
		// SYNCHRONOUS FILE READER

		/**
		Provide buffered (and Buffer-friendly) access to a file.
		Provide iterative access to a file.

		It is buffered, which means you can call `peek(same_number)` repeatedly without
		triggering a `read(2)` system call on the underlying file each time. Likewise,
		calling `read(small_number)` repeatedly will issue a `read(2)` system call only
		when the buffer doesn't have enough data.

		When calling `read()` on the underlying file, it will read batches of
		`_block_size` (default: 1024) bytes.
		*/
		export class BufferedFileReader implements BufferedReader {
		// when reading more data, pull in chunks of `BLOCK_SIZE` bytes.
		static BLOCK_SIZE = 1024;
		private buffer: Buffer = new Buffer(0);
		export class FileIterator implements StatefulChunkedIterable<Buffer> {
		private _buffer: Buffer = new Buffer(0);

		constructor(private fd: number, private file_position: number = 0) { }
		// when reading more data, pull in chunks of `_block_size` bytes.
		constructor(private _fd: number, private _position = 0, private _block_size = 1024) { }

		static open(filepath: string): BufferedFileReader {
		static open(filepath: string): FileIterator {
		var fd = fs.openSync(filepath, 'r');
		return new BufferedFileReader(fd);
		return new FileIterator(fd);
		}

		close(): void {
		fs.closeSync(this.fd);
		fs.closeSync(this._fd);
		}
		@@ -97,21 +161,14 @@
		Return the position in the file that would be read from if we called
		readBuffer(...). This is different from the internally-held position, which
		read(...). This is different from the internally-held position, which
		points to the end of the currently held buffer.
		*/
		get position(): number {
		return this.file_position - this.buffer.length;
		return this._position - this._buffer.length;
		}

		/**
		Calls fs.readSync on the underlying file descriptor with pretty much the same
		argument signature.

		Returns `bytesRead`, the number of bytes that were read into the given Buffer.

		Node.js documentation for fs.read() / fs.readSync():
		> position is an integer specifying where to begin reading from in the file.
		> If position is null, data will be read from the current file position.
		Return the total size (in bytes) of the underlying file.
		*/
		read(buffer: Buffer, offset: number, length: number, position: number): number {
		return fs.readSync(this.fd, buffer, offset, length, position);
		get size(): number {
		return fs.fstatSync(this._fd).size;
		}
		@@ -122,9 +179,11 @@

		This may return without the condition being met of this.buffer.length >= length,
		This may return without the condition being met (this.buffer.length >= length),
		if the end of the underlying file has been reached.

		TODO: pull _fillBuffer into the loop, with the Buffer declaration outside.
		*/
		private ensureLength(length: number): void {
		while (length > this.buffer.length) {
		private _ensureLength(length: number): void {
		while (length > this._buffer.length) {
		// all the action happens only if we need more bytes than are in the buffer
		var EOF = this.fillBuffer(BufferedFileReader.BLOCK_SIZE);
		var EOF = this._fillBuffer(this._block_size);
		if (EOF) {
		@@ -141,35 +200,25 @@ // exit regardless
		Returns false iff EOF has been reached, otherwise returns true. */
		private fillBuffer(length: number): boolean {
		private _fillBuffer(length: number): boolean {
		var buffer = new Buffer(length);
		// always read from the reader's current position
		var bytesRead = this.read(buffer, 0, length, this.file_position);
		// always read from the current position
		var bytesRead = fs.readSync(this._fd, buffer, 0, length, this._position);
		// and update it accordingly
		this.file_position += bytesRead;
		this._position += bytesRead;
		// use the Buffer.concat totalLength argument to slice the fresh buffer if needed
		this.buffer = Buffer.concat([this.buffer, buffer], this.buffer.length + bytesRead);
		this._buffer = Buffer.concat([this._buffer, buffer], this._buffer.length + bytesRead);
		return bytesRead < length;
		}

		peekByte(): number {
		this.ensureLength(1);
		return this.buffer[0];
		next(length: number): Buffer {
		this._ensureLength(length);
		var buffer = this._buffer.slice(0, length);
		this._buffer = this._buffer.slice(length);
		return buffer;
		}

		peekBuffer(length: number): Buffer {
		this.ensureLength(length);
		return this.buffer.slice(0, length);
		peek(length: number): Buffer {
		this._ensureLength(length);
		return this._buffer.slice(0, length);
		}

		readByte(): number {
		var byte = this.peekByte();
		this.buffer = this.buffer.slice(1);
		return byte;
		}

		readBuffer(length: number): Buffer {
		var buffer = this.peekBuffer(length);
		this.buffer = this.buffer.slice(length);
		return buffer;
		}

		/**
		@@ -180,6 +229,6 @@ Skip over the next `length` characters, returning the number of skipped
		skip(length: number): number {
		this.ensureLength(length);
		this._ensureLength(length);
		// we cannot skip more than `this.buffer.length` bytes
		var bytesSkipped = Math.min(length, this.buffer.length);
		this.buffer = this.buffer.slice(length);
		var bytesSkipped = Math.min(length, this._buffer.length);
		this._buffer = this._buffer.slice(length);
		return bytesSkipped;
		@@ -192,33 +241,77 @@ }
		// LEXERS
		// (the good stuff)

		export interface RuleAction<T> { (match: RegExpMatchArray): [string, T]; }
		export interface Rule<T> extends Array<RegExp \| RuleAction<T>> { 0: RegExp; 1: RuleAction<T>; }
		/**
		Commonly used special case.
		*/
		export interface BufferIterable extends StatefulChunkedIterable<Buffer> { }

		export class BufferedLexer<T> {
		reader: BufferedReader;
		states: string[];
		/**
		Tokenizer#map() and Combiner#map() both return Token iterators.

		constructor(private default_rules: Rule<T>[], private state_rules: {[index: string]: Rule<T>[]}) {
		this.reset();
		Tokens with a null name and null Tokens should be treated the same way (as
		insignificant / ignorable objects that should be skipped).

		Tokens with an undefined name and undefined Tokens are always errors.
		*/
		export interface Token<T> {
		name: string;
		value: T;
		}

		/**
		Another generic but frequently used alias.
		*/
		export interface TokenIterable<T> extends Iterable<Token<T>> { }

		export function Token<T>(name: string, value: T = null): Token<T> {
		return {name: name, value: value};
		}

		// -----------------------------------------------------------------------------
		// TOKENIZER

		export interface RegexAction<T> { (match: RegExpMatchArray): Token<T>; }
		export interface RegexRule<T> extends Array<RegExp \| RegexAction<T>> { 0: RegExp; 1: RegexAction<T>; }

		/**
		The type T is the type of each token value, usually `any` (the token name is
		always a string).

		BufferIterable
		*/
		export class Tokenizer<T> {
		constructor(private default_rules: RegexRule<T>[],
		private state_rules: {[index: string]: RegexRule<T>[]} = {}) { }

		getRules(state_name: string): RegexRule<T>[] {
		return (state_name === undefined) ? this.default_rules : this.state_rules[state_name];
		}

		/**
		Reset the Lexer back to its initial state.
		Create a closure around the iterable.

		Unfortunately, it seems that TypeScript doesn't like inline functions, so we
		use a helper class (TokenizerIterator).
		*/
		reset(): void {
		this.states = [];
		map(iterable: BufferIterable, states: string[] = []): TokenIterable<T> {
		return new TokenizerIterator(this, iterable, states);
		}
		}

		class TokenizerIterator<T> implements TokenIterable<T> {
		constructor(private tokenizer: Tokenizer<T>,
		public iterable: BufferIterable,
		public states: string[]) { }

		/**
		Returns the next available pair from the input reader (usually [token, data]).
		Returns the next available Token from the input reader.
		If the matching rule's action returns null, this will return null.

		If the matching rule's action returns null, this will return null.
		TODO: optimize string conversion; abstract out the peek + toString, back into the reader?
		*/
		read(): T {
		// TODO: abstract out the peekBuffer + toString, back into the reader?
		// optimize string conversion
		var input = this.reader.peekBuffer(256).toString('utf8');

		private _next(): Token<T> {
		var state = this.states[this.states.length - 1];
		var rules = state ? this.state_rules[state] : this.default_rules;
		var rules = this.tokenizer.getRules(state);
		var input = this.iterable.peek(256).toString('utf8');
		for (var i = 0, rule; (rule = rules[i]); i++) {
		@@ -228,3 +321,3 @@ var match = input.match(rule[0]);
		var byteLength = Buffer.byteLength(match[0], 'utf8');
		this.reader.skip(byteLength);
		this.iterable.skip(byteLength);
		return rule[1].call(this, match);
		@@ -241,13 +334,88 @@ }

		This will never return null.
		This will never return null, but may return undefined if one of the rules
		returns undefined, which the rule should not do! It will never a Token with
		a null name.
		*/
		next(): T {
		var result;
		do {
		result = this.read();
		} while (result === null);
		return result;
		public next(): Token<T> {
		while (1) {
		var token = this._next();
		if (token !== null && token.name !== null) {
		return token;
		}
		}
		}
		}

		// -----------------------------------------------------------------------------
		// COMBINER

		export interface CombinerAction<T, U> { (tokens: Token<T>[]): Token<U>; }
		export interface CombinerRule<T, U> extends Array<string \| CombinerAction<T, U>> { 0: string; 1: CombinerAction<T, U>; }

		/**
		Recombine a stream of tokens using a stack of lists, e.g.,

		WORD:BT START:STRING CHAR:A CHAR:b CHAR:c END:STRING WORD:ET

		becomes:

		WORD:BT STRING:Abc WORD:ET

		*/
		export class Combiner<T> {
		constructor(private rules: CombinerRule<T, T>[]) { }

		findRule(name: string): CombinerRule<T, T> {
		for (var i = 0, rule; (rule = this.rules[i]); i++) {
		if (rule[0] === name) {
		return rule;
		}
		}
		throw new Error(`No combiner rule found with the name: ${name}`);
		}

		map(iterable: TokenIterable<T>, stack: Array<Array<Token<T>>> = []): TokenIterable<T> {
		return new CombinerIterator(this, iterable, stack);
		}
		}

		class CombinerIterator<T> implements TokenIterable<T> {
		constructor(private combiner: Combiner<T>,
		public iterable: TokenIterable<T>,
		public stack: Array<Array<Token<T>>>) { }

		/**
		Returns the next available pair from the input reader (usually [token, data]).

		If the matching rule's action returns null, this will return null.
		*/
		next(): Token<T> {
		var token = this.iterable.next();

		if (token.name == 'END') {
		// TODO: check that the preceding START token has the same value
		var tokens = this.stack.pop();
		// type hack with <any>
		var rule = this.combiner.findRule(<any>token.value);
		// reduce into combined token
		token = rule[1](tokens);
		}

		if (token.name == 'START') {
		// TODO: store the START token's value somewhere so that we can verify the END token's value matches
		this.stack.push([]);
		return this.next();
		}
		else if (this.stack.length > 0) {
		// push it onto the list at the top of the stack
		this.stack[this.stack.length - 1].push(token);
		return this.next();
		}
		else {
		// tokens at root level pass through transparently
		return token;
		}
		}
		}

		//// }

225

lexing.d.ts

		/// <reference path="../../type_declarations/DefinitelyTyped/node/node.d.ts" />
		declare module "lexing" {
		interface Reader {
		/**
		Any sort of sequence can implement Iterable<T>. It's a lot like Array<T>, but
		read-only, and without random access.
		*/
		interface Iterable<T> {
		next(): T;
		}
		interface StatefulIterable<T> extends Iterable<T> {
		position: number;
		size: number;
		peek(): T;
		skip(): boolean;
		}
		/**
		In some cases, it makes more sense to iterate in batches, or chunks, through an
		iterable of a particular type. The T in ChunkedIterable<T> should itself be a
		sequence type, like string[] or Buffer.
		*/
		interface ChunkedIterable<T> {
		next(length: number): T;
		}
		interface StatefulChunkedIterable<T> extends ChunkedIterable<T> {
		position: number;
		size: number;
		peek(length: number): T;
		skip(length: number): number;
		}
		/**
		Wraps a Buffer as a stateful iterable.
		*/
		class BufferIterator implements StatefulChunkedIterable<Buffer> {
		private _buffer;
		private _position;
		constructor(_buffer: Buffer, _position?: number);
		static fromString(str: string, encoding?: string): BufferIterator;
		/**
		Reads a single byte.
		Return the current position within the underlying Buffer.
		*/
		readByte(): number;
		position: number;
		/**
		Reads a series of bytes.
		Return the total length of the underlying Buffer.
		*/
		readBuffer(length: number): Buffer;
		}
		interface BufferedReader extends Reader {
		peekByte(): number;
		peekBuffer(length: number): Buffer;
		skip(length: number): number;
		}
		class BufferedBufferReader implements BufferedReader {
		private buffer;
		constructor(buffer: Buffer);
		peekByte(): number;
		peekBuffer(length: number): Buffer;
		readByte(): number;
		readBuffer(length: number): Buffer;
		size: number;
		/**
		Read the next `length` bytes from the underlying Buffer, or fewer iff we reach
		EOF, without advancing our position within the Buffer. Returns a Buffer slice.
		*/
		peek(length: number): Buffer;
		/**
		Read the next `length` bytes from the underlying Buffer, or fewer iff we reach
		EOF, and advance our position within the Buffer. Returns a Buffer slice.

		Buffer#slice never returns entries beyond the end of the buffer:

		`new Buffer([1, 2, 3, 4]).slice(2, 10)` produces `<Buffer 03 04>`
		*/
		next(length: number): Buffer;
		/**
		Skip over the next `length` characters, returning the number of skipped
		characters (which may be < `length` iff EOF has been reached).

		We do not allow skipping beyond the end of the buffer.
		*/
		skip(length: number): number;
		toString(): string;
		}
		class BufferedStringReader extends BufferedBufferReader {
		constructor(input: string, encoding?: string);
		/**
		Wrap an Array as an iterable.
		*/
		class ArrayIterator<T> implements StatefulIterable<T> {
		private _array;
		position: number;
		constructor(_array: Array<T>, position?: number);
		size: number;
		next(): T;
		peek(): T;
		skip(): boolean;
		}
		/**
		Provide buffered (and Buffer-friendly) access to a file.
		Provide iterative access to a file.

		It is buffered, which means you can call `peek(same_number)` repeatedly without
		triggering a `read(2)` system call on the underlying file each time. Likewise,
		calling `read(small_number)` repeatedly will issue a `read(2)` system call only
		when the buffer doesn't have enough data.

		When calling `read()` on the underlying file, it will read batches of
		`_block_size` (default: 1024) bytes.
		*/
		class BufferedFileReader implements BufferedReader {
		private fd;
		private file_position;
		static BLOCK_SIZE: number;
		private buffer;
		constructor(fd: number, file_position?: number);
		static open(filepath: string): BufferedFileReader;
		class FileIterator implements StatefulChunkedIterable<Buffer> {
		private _fd;
		private _position;
		private _block_size;
		private _buffer;
		constructor(_fd: number, _position?: number, _block_size?: number);
		static open(filepath: string): FileIterator;
		close(): void;
		/**
		Return the position in the file that would be read from if we called
		readBuffer(...). This is different from the internally-held position, which
		read(...). This is different from the internally-held position, which
		points to the end of the currently held buffer.
		@@ -53,19 +106,14 @@ */
		/**
		Calls fs.readSync on the underlying file descriptor with pretty much the same
		argument signature.

		Returns `bytesRead`, the number of bytes that were read into the given Buffer.

		Node.js documentation for fs.read() / fs.readSync():
		> position is an integer specifying where to begin reading from in the file.
		> If position is null, data will be read from the current file position.
		Return the total size (in bytes) of the underlying file.
		*/
		read(buffer: Buffer, offset: number, length: number, position: number): number;
		size: number;
		/**
		Ensure that the available buffer is at least `length` bytes long.

		This may return without the condition being met of this.buffer.length >= length,
		This may return without the condition being met (this.buffer.length >= length),
		if the end of the underlying file has been reached.

		TODO: pull _fillBuffer into the loop, with the Buffer declaration outside.
		*/
		private ensureLength(length);
		private _ensureLength(length);
		/**
		@@ -75,7 +123,5 @@ Read data from the underlying file and append it to the buffer.
		Returns false iff EOF has been reached, otherwise returns true. */
		private fillBuffer(length);
		peekByte(): number;
		peekBuffer(length: number): Buffer;
		readByte(): number;
		readBuffer(length: number): Buffer;
		private _fillBuffer(length);
		next(length: number): Buffer;
		peek(length: number): Buffer;
		/**
		@@ -87,35 +133,76 @@ Skip over the next `length` characters, returning the number of skipped
		}
		interface RuleAction<T> {
		(match: RegExpMatchArray): [string, T];
		/**
		Commonly used special case.
		*/
		interface BufferIterable extends StatefulChunkedIterable<Buffer> {
		}
		interface Rule<T> extends Array<RegExp \| RuleAction<T>> {
		/**
		Tokenizer#map() and Combiner#map() both return Token iterators.

		Tokens with a null name and null Tokens should be treated the same way (as
		insignificant / ignorable objects that should be skipped).

		Tokens with an undefined name and undefined Tokens are always errors.
		*/
		interface Token<T> {
		name: string;
		value: T;
		}
		/**
		Another generic but frequently used alias.
		*/
		interface TokenIterable<T> extends Iterable<Token<T>> {
		}
		function Token<T>(name: string, value?: T): Token<T>;
		interface RegexAction<T> {
		(match: RegExpMatchArray): Token<T>;
		}
		interface RegexRule<T> extends Array<RegExp \| RegexAction<T>> {
		0: RegExp;
		1: RuleAction<T>;
		1: RegexAction<T>;
		}
		class BufferedLexer<T> {
		/**
		The type T is the type of each token value, usually `any` (the token name is
		always a string).

		BufferIterable
		*/
		class Tokenizer<T> {
		private default_rules;
		private state_rules;
		reader: BufferedReader;
		states: string[];
		constructor(default_rules: Rule<T>[], state_rules: {
		[index: string]: Rule<T>[];
		constructor(default_rules: RegexRule<T>[], state_rules?: {
		[index: string]: RegexRule<T>[];
		});
		getRules(state_name: string): RegexRule<T>[];
		/**
		Reset the Lexer back to its initial state.
		*/
		reset(): void;
		/**
		Returns the next available pair from the input reader (usually [token, data]).
		Create a closure around the iterable.

		If the matching rule's action returns null, this will return null.
		Unfortunately, it seems that TypeScript doesn't like inline functions, so we
		use a helper class (TokenizerIterator).
		*/
		read(): T;
		/**
		Returns the next available non-null token / symbol output from the input
		reader (usually a token_data: [string, any] tuple).

		This will never return null.
		*/
		next(): T;
		map(iterable: BufferIterable, states?: string[]): TokenIterable<T>;
		}
		interface CombinerAction<T, U> {
		(tokens: Token<T>[]): Token<U>;
		}
		interface CombinerRule<T, U> extends Array<string \| CombinerAction<T, U>> {
		0: string;
		1: CombinerAction<T, U>;
		}
		/**
		Recombine a stream of tokens using a stack of lists, e.g.,

		WORD:BT START:STRING CHAR:A CHAR:b CHAR:c END:STRING WORD:ET

		becomes:

		WORD:BT STRING:Abc WORD:ET

		*/
		class Combiner<T> {
		private rules;
		constructor(rules: CombinerRule<T, T>[]);
		findRule(name: string): CombinerRule<T, T>;
		map(iterable: TokenIterable<T>, stack?: Array<Array<Token<T>>>): TokenIterable<T>;
		}
		}

package.json

		{
		"name": "lexing",
		"version": "0.1.4",
		"version": "0.2.0",
		"description": "Regex-based lexer",
		"keywords": [],
		"keywords": [
		"lexer",
		"parser"
		],
		"homepage": "https://github.com/chbrown/lexing",
		@@ -11,4 +14,8 @@ "repository": "git://github.com/chbrown/lexing.git",
		"devDependencies": {
		"mocha": "*",
		"typescript": "*"
		},
		"scripts": {
		"test": "make test"
		}
		}

README.md

		@@ -26,6 +26,27 @@ # lexing

		The `new lexing.BufferedLexer(default_rules [, state_rules])` implementation provided in this module represents state as a stack of things (hopefully just strings), but this could be abused. The lexer constructor takes an optional second argument: an object mapping state names to lists of rules that apply only in those states. These operate like exclusive conditional states in `flex`, except there are no exceptions to the exclusivity, i.e., there is no `<*>` condition specifier. The current state is the last (top) state in the state stack. The `default_rules` rules apply only when the state stack is empty (the default).

		The lexer has one main function, `lexer.read()`. This reads an input_string from the `lexer.reader` instance, and iterates over the rules that apply in the current state.
		## Implementation

		The `new lexing.Tokenizer(default_rules [, state_rules])` implementation provided in this module is the most basic lexer provided, representing state as a stack of strings. The `lexing.Tokenizer` constructor takes an optional second argument: an object mapping state names to lists of rules that apply only in those states. These operate like exclusive conditional states in `flex`, except there are no exceptions to the exclusivity, i.e., there is no `<*>` condition specifier. The current state is the last (top) state in the state stack. The `default_rules` rules apply only when the state stack is empty (the default).

		The tokenizer has one main function, `tokenizer.map(buffer_iterable)`, which returns a `TokenIterable`. `buffer_iterable` should implement the `BufferIterable` interface, i.e.:

		interface BufferIterable {
		position: number;
		size: number;
		next(length: number): Buffer;
		peek(length: number): Buffer;
		skip(length: number): number;
		}

		The following readers defined in `lexing` all return instances implementing the `BufferIterable` interface:

		* `new lexing.BufferIterator(buffer)`
		* `lexing.BufferIterator.fromString(str, encoding)`
		* `new lexing.FileIterator(file_descriptor)`
		* `lexing.FileIterator.open(file_path)`

		The `TokenIterable` instance returned by `tokenizer.map(...)` has one method: `next()`, which returns a non-null `Token`.
		Every `Token` has a non-null `name` field (a string) and a `value` field (of any type; potentially null or undefined).

		Each rule is a `[RegExp, Function]` tuple. When a rule's regular expression matches the input, the following happens:
		@@ -35,9 +56,7 @@
		2. The input cursor is advanced over the length of the full match (`match[0]`).
		3. The lexer returns the result of calling `input_string.match(rule[0])`, with the lexer bound as `this` inside the function.
		3. The tokenizer returns the result of calling `input_string.match(rule[0])`, with the tokenizer bound as `this` inside the rule's function.

		If no rules in the current state match the current input, the lexer will throw an "Invalid language" error.
		If no rules in the current state match the current input, the tokenizer will throw an "Invalid language" error.

		The lexer has another function: `lexer.next()`, which calls `lexer.read()` in a loop until it returns a non-null result, and returns that result.


		## Quickstart
		@@ -47,3 +66,3 @@

		npm install lexing --save
		npm installl lexing --save

		@@ -58,3 +77,3 @@ In your code:
		[/^$/, function(match) {
		return ['EOF', null];
		return lexing.Token('EOF', null);
		}],
		@@ -64,18 +83,18 @@ [/^\s+/, function(match) {
		}],
		[/^[^!"#$%&'()*+,\-./:;<=>?@[\\\]\^_`{\|}~]+/, function(match) {
		return ['TOKEN', match[0]];
		[/^[^!"#$%&'()*+,\-./:;<=>?@[\\\]\^_`{\|}~\s]+/, function(match) {
		return lexing.Token('WORD', match[0]);
		}],
		[/^./, function(match) {
		return ['PUNCTUATION', match[0]];
		return lexing.Token('PUNCTUATION', match[0]);
		}],
		];

		var lexer = new lexing.BufferedLexer(rules);
		var tokenizer = new lexing.Tokenizer(rules);
		var input = lexing.BufferIterator.fromString("'It wasn't at all my fault', I cried.");
		var output = tokenizer.map(input);

		lexer.reader = new lexing.BufferedStringReader("'It wasn't at all my fault', I cried.");

		do {
		var token_value = lexer.next();
		console.log('token=%s => %j', token_value[0], token_value[1]);
		} while (token_value[0] !== 'EOF');
		var token = output.next();
		console.log('token=%s => %j', token.name, token.value);
		} while (token.name !== 'EOF');

		@@ -82,0 +101,0 @@

Makefile

Sorry, the diff of this file is not supported yet

lexing - npm Package Compare versions

Improved metrics

Worsened metrics