@@ -20,10 +20,27 @@ /// <reference path="../libs/lodash.d.ts" />
		declare module chevrotain.tokens {
		function getTokName(tokType: Function): string;
		function tokenName(clazz: Function): string;
		type TokenClass = Function;
		class Token {
		image: string;
		offset: number;
		startLine: number;
		startColumn: number;
		image: string;
		endLine: number;
		endColumn: number;
		isInsertedInRecovery: boolean;
		constructor(startLine: number, startColumn: number, image: string);
		/**
		* @param {string} image the textual representation of the Token as it appeared in the text
		* @param {number} offset offset of the first character of the Token
		* @param {number} startLine line of the first character of the Token
		* @param {number} startColumn column of the first character of the Token
		* @param {number} endLine line of the last character of the Token
		* @param {number} endColumn column of the last character of the Token
		*
		* Things to note:
		* * "do" {startColumn : 1, endColumn: 2} --> the range is inclusive to exclusive 1...2 (2 chars long).
		* * "\n" {startLine : 1, endLine: 1} --> a lineTerminator as the last character does not effect the Token's line numbering.
		* * "'hello\tworld\uBBBB'" {image: "'hello\tworld\uBBBB'"} --> a Token's image is the "literal" text
		* (unicode escaping is untouched).
		*/
		constructor(image: string, offset: number, startLine: number, startColumn: number, endLine?: number, endColumn?: number);
		}
		@@ -34,17 +51,7 @@ type VirtualTokenClass = Function;
		}
		function INVALID_LINE(): number;
		function INVALID_COLUMN(): number;
		class NoneToken extends Token {
		private static _instance;
		constructor();
		static getInstance(): any;
		}
		function NONE_TOKEN(): Token;
		}
		declare module chevrotain.lexer {
		import tok = chevrotain.tokens;
		var NA: RegExp;
		interface ILexingResult {
		tokens: tok.Token[];
		ignored: tok.Token[];
		errors: ILexingError[];
		@@ -62,10 +69,14 @@ }
		* concerns such as performance/extendability/modularity are ignored in this implementation.
		*
		*/
		class SimpleLexer {
		protected tokenClasses: TokenConstructor[];
		protected matchPatterns: RegExp[];
		protected ignorePatterns: RegExp[];
		static SKIPPED: {
		description: string;
		};
		static NA: RegExp;
		protected allPatterns: RegExp[];
		protected patternToClass: {};
		protected patternIdxToClass: Function[];
		protected patternIdxToSkipped: boolean[];
		protected patternIdxToLongerAltIdx: number[];
		protected patternIdxToCanLineTerminator: boolean[];
		/**
		@@ -93,6 +104,31 @@ * @param {Function[]} tokenClasses constructor functions for the Tokens types this scanner will support
		*
		* The Lexer will try to locate the longest match each time. if two patterns both match and with the same match length
		* The pattern defined first will "win". for example: if an Identifier's pattern is /\w+/ and we also have keywords such
		* as /while/ /for/ ... the Identifier constructor must appear AFTER all keywords constructors in the 'tokenClasses' arg.
		* The Lexer will identify the first pattern the matches, Therefor the order of Token Constructors passed
		* To the SimpleLexer's constructor is meaningful. If two patterns may match the same string, the longer one
		* should be before the shorter one.
		*
		* Note that there are situations in which we may wish to place the longer pattern after the shorter one.
		* For example: keywords vs Identifiers.
		* 'do'(/do/) and 'done'(/w+)
		*
		* * If the Identifier pattern appears before the 'do' pattern both 'do' and 'done'
		* will be lexed as an Identifier.
		*
		* * If the 'do' pattern appears before the Identifier pattern 'do' will be lexed correctly as a keyword.
		* however 'done' will be lexed as TWO tokens keyword 'do' and identifier 'ne'.
		*
		* To resolve this problem, add a static property on the keyword's Tokens constructor named: LONGER_ALT
		* example:
		*
		* export class Identifier extends Keyword { static PATTERN = /[_a-zA-Z][_a-zA-Z0-9]/ }
		* export class Keyword extends tok.Token {
		* static PATTERN = lex.NA
		* static LONGER_ALT = Identifier
		* }
		* export class Do extends Keyword { static PATTERN = /do/ }
		* export class While extends Keyword { static PATTERN = /while/ }
		* export class Return extends Keyword { static PATTERN = /return/ }
		*
		* The lexer will then also attempt to match a (longer) Identifier each time a keyword is matched
		*
		*
		*/
		@@ -106,19 +142,12 @@ constructor(tokenClasses: TokenConstructor[]);
		* @param {string} text the string to lex
		* @returns {{tokens: {Token}[], ignored: {Token}[], errors: string[]}}
		* @returns {{tokens: {Token}[], errors: string[]}}
		*/
		tokenize(text: string): ILexingResult;
		}
		interface IConsumeResult {
		token: tok.Token;
		remainingInput: string;
		offset: number;
		}
		function NOTHING_CONSUMED(): IConsumeResult;
		function tokenizeOne(input: string, offset: number, patterns: RegExp[], patternsToConstructor: any): IConsumeResult;
		interface IAnalyzeResult {
		matchPatterns: RegExp[];
		ignorePatterns: RegExp[];
		patternToClass: {
		[pattern: string]: RegExp;
		};
		allPatterns: RegExp[];
		patternIdxToClass: Function[];
		patternIdxToSkipped: boolean[];
		patternIdxToLongerAltIdx: number[];
		patternIdxToCanLineTerminator: boolean[];
		}
		@@ -133,8 +162,3 @@ function analyzeTokenClasses(tokenClasses: TokenConstructor[]): IAnalyzeResult;
		function addStartOfInput(pattern: RegExp): RegExp;
		interface ILineColumn {
		line: number;
		column: number;
		}
		type OffsetToLineColumn = ILineColumn[];
		function buildOffsetToLineColumnDict(text: string): OffsetToLineColumn;
		function countLineTerminators(text: string): number;
		}
		@@ -390,10 +414,16 @@ declare module chevrotain.tree {
		var CLASS_TO_GRAMMAR_PRODUCTIONS: lang.HashTable<lang.HashTable<gast.TOP_LEVEL>>;
		function getProductionsForClass(classInstance: any): lang.HashTable<gast.TOP_LEVEL>;
		function getProductionsForClass(className: string): lang.HashTable<gast.TOP_LEVEL>;
		var CLASS_TO_RESYNC_FOLLOW_SETS: lang.HashTable<lang.HashTable<Function[]>>;
		function getResyncFollowsForClass(classInstance: any): lang.HashTable<Function[]>;
		function setResyncFollowsForClass(classInstance: any, followSet: lang.HashTable<Function[]>): void;
		function getResyncFollowsForClass(className: string): lang.HashTable<Function[]>;
		function setResyncFollowsForClass(className: string, followSet: lang.HashTable<Function[]>): void;
		var CLASS_TO_LOOKAHEAD_FUNCS: lang.HashTable<lang.HashTable<Function>>;
		function getLookaheadFuncsForClass(classInstance: any): lang.HashTable<Function>;
		function getLookaheadFuncsForClass(className: string): lang.HashTable<Function>;
		var CLASS_TO_FIRST_AFTER_REPETITION: lang.HashTable<lang.HashTable<interpreter.IFirstAfterRepetition>>;
		function getFirstAfterRepForClass(classInstance: any): lang.HashTable<interpreter.IFirstAfterRepetition>;
		function getFirstAfterRepForClass(className: string): lang.HashTable<interpreter.IFirstAfterRepetition>;
		var CLASS_TO_OR_LA_CACHE: lang.HashTable<lang.HashTable<string>[]>;
		var CLASS_TO_MANY_LA_CACHE: lang.HashTable<lang.HashTable<string>[]>;
		var CLASS_TO_AT_LEAST_ONE_LA_CACHE: lang.HashTable<lang.HashTable<string>[]>;
		var CLASS_TO_OPTION_LA_CACHE: lang.HashTable<lang.HashTable<string>[]>;
		var MAX_OCCURRENCE_INDEX: number;
		function initLookAheadKeyCache(className: any): void;
		}
		@@ -458,2 +488,3 @@ declare module chevrotain.lookahead {
		import gast = chevrotain.gast;
		import interp = chevrotain.interpreter;
		import lang = chevrotain.lang;
		@@ -495,3 +526,2 @@ import gastBuilder = chevrotain.gastBuilder;
		RULE_STACK: string[];
		FOLLOW_STACK: Function[][];
		}
		@@ -512,2 +542,3 @@ type LookAheadFunc = () => boolean;
		protected isBackTrackingStack: any[];
		protected className: string;
		constructor(input?: tok.Token[]);
		@@ -539,5 +570,10 @@ input: tok.Token[];
		function InRuleRecoveryException(message: string): void;
		interface IFollowKey {
		ruleName: string;
		idxInCallingRule: number;
		inRule: string;
		}
		/**
		* A Recognizer capable of self analysis to determine it's grammar structure
		* This is used for more advanced features requiring this information.
		* This is used for more advanced features requiring such information.
		* for example: Error Recovery, Automatic lookahead calculation
		@@ -549,5 +585,10 @@ */
		protected RULE_OCCURRENCE_STACK: number[];
		protected FOLLOW_STACK: Function[][];
		protected tokensMap: gastBuilder.ITerminalNameToConstructor;
		constructor(input: tok.Token[], tokensMap: gastBuilder.ITerminalNameToConstructor);
		protected firstAfterRepMap: lang.HashTable<interp.IFirstAfterRepetition>;
		protected classLAFuncs: lang.HashTable<Function>;
		protected orLookaheadKeys: lang.HashTable<string>[];
		protected manyLookaheadKeys: lang.HashTable<string>[];
		protected atLeastOneLookaheadKeys: lang.HashTable<string>[];
		protected optionLookaheadKeys: lang.HashTable<string>[];
		constructor(input: tok.Token[], tokensMapOrArr: gastBuilder.ITerminalNameToConstructor \| Function[]);
		reset(): void;
		@@ -841,2 +882,4 @@ /**
		protected RULE<T>(ruleName: string, impl: (...implArgs: any[]) => T, invalidRet?: () => T, doReSync?: boolean): (idxInCallingRule?: number, ...args: any[]) => T;
		protected ruleInvocationStateUpdate(ruleName: string, idxInCallingRule: number): void;
		protected ruleFinallyStateUpdate(): void;
		private defaultInvalidReturn();
		@@ -861,4 +904,8 @@ protected ruleNamePattern: RegExp;
		protected findReSyncTokenType(): Function;
		protected getCurrFollowKey(): IFollowKey;
		protected buildFullFollowKeyStack(): IFollowKey[];
		protected flattenFollowSet(): Function[];
		protected getFollowSetFromFollowKey(followKey: IFollowKey): Function[];
		protected reSyncTo(tokClass: Function): void;
		private attemptInRepetitionRecovery(prodFunc, args, lookaheadFunc, prodName, prodOccurrence, nextToksWalker);
		private attemptInRepetitionRecovery(prodFunc, args, lookaheadFunc, prodName, prodOccurrence, nextToksWalker, prodKeys);
		private atLeastOneInternal(prodFunc, prodName, prodOccurrence, lookAheadFunc, action, errMsg?);
		@@ -880,2 +927,3 @@ private manyInternal(prodFunc, prodName, prodOccurrence, lookAheadFunc, action?);
		protected consumeInternal(tokClass: Function, idx: number): tok.Token;
		protected getKeyForAutomaticLookahead(prodName: string, prodKeys: lang.HashTable<string>[], occurrence: number): string;
		protected getLookaheadFuncForOption(occurence: number): () => boolean;
		@@ -886,3 +934,3 @@ protected getLookaheadFuncForOr(occurence: number, ignoreErrors: boolean): () => number;
		protected isNextRule<T>(ruleName: string): boolean;
		protected getLookaheadFuncFor<T>(prodType: string, occurrence: number, laFuncBuilder: (number, any) => () => T, extraArgs?: any[]): () => T;
		protected getLookaheadFuncFor<T>(key: string, occurrence: number, laFuncBuilder: (number, any) => () => T, extraArgs?: any[]): () => T;
		protected saveRecogState(): IErrorRecoveryRecogState;
		@@ -893,1 +941,9 @@ protected reloadRecogState(newState: IErrorRecoveryRecogState): void;
		}
		/**
		* defines the public API of Chevrotain.
		* changes here may require major version change. (semVer)
		*/
		declare var CHEV_TEST_MODE: any;
		declare var global: any;
		declare var testMode: any;
		declare var API: any;

package.json

		{
		"name": "chevrotain",
		"version": "0.1.0",
		"description": "Javascript/Typescript parsing framework",
		"version": "0.2.0",
		"description": "Chevrotain is a high performance fault Tolerant Javascript parsing DSL for building recursive decent parsers",
		"keywords": [
		"parser",
		"syntax",
		"lexical",
		"analysis",
		"grammar",
		"lexer",
		"tokenizer"
		"tokenizer",
		"generator",
		"compiler",
		"fault",
		"tolerant"
		],
		@@ -12,0 +18,0 @@ "bugs": {

251

readme.md

		[![Build Status](https://travis-ci.org/SAP/chevrotain.svg?branch=master)](https://travis-ci.org/SAP/chevrotain)
		[![Coverage Status](https://coveralls.io/repos/SAP/chevrotain/badge.svg?branch=master)](https://coveralls.io/r/SAP/chevrotain?branch=master)
		[![NPM](https://nodei.co/npm/chevrotain.png?mini=true)](https://npmjs.org/package/chevrotain)

		#Chevrotain
		# Chevrotain

		Chevrotain is a Javascript/Typescript parsing framework which aims to make it easier to write "hand built" recursive decent parsers.
		Chevrotain is a high performance fault Tolerant Javascript parsing DSL for building recursive decent parsers.

		Chevrotain is NOT a parser generator. it solves the same kind of problems as a parser generator, just without
		the code generation phase.

		###Features
		* DSL for creating the parsing rules.
		* Automatic lookahead calculation for LL(1) grammars
		* For other grammars custom lookahead functions can be provided.
		## Features
		* Lexer engine based on RexExps.
		* Supports Token location tracking.
		* Supports Token skipping (whitespace/comments/...)
		* Allows prioritising shorter matches (Keywords vs Identifiers).
		* No code generation The Lexer does not require any code generation phase.

		* Parsing DSL for creating the parsing rules.
		* No code generation - the DSL is just javascript not a new external language, what is written is what will be run, this speeds up development,
		makes debugging trivial and provides great flexibility for inserting custom actions into the grammar.
		* Strong Error Recovery capabilities based on Antlr3's algorithms.
		* Automatic lookahead calculation for LL(1) grammars.
		* In addition custom lookahead logic can be provided explicitly.
		* Backtracking support.
		* Strong Error Recovery capabilities based on Antlr3's algorithms.
		* Grammar Introspection, the grammar's structure is known and exposed this can be used to implement features such
		as automatically generated syntax diagrams or Syntactic error recovery
		* No generated code - what you write is what will be run, this makes debugging easier and provides great flexibility. For example this could be used to implement grammar composition.
		* Well tested with 100% code coverage

		* High performance see: [performance comparison](http://jsperf.com/json-parsers-comparison/6)

		* Grammar Introspection, the grammar's structure is known and exposed this can be used to implement features such as automatically generated syntax diagrams or Syntactic error recovery.

		* Well tested with ~100% code coverage

		###At a Glance, simple json parsing rules

		* using ES6 fat arrow '=>'
		## Installation
		* ```npm install chevrotain```
		* or download the javascript source directly from github releases:
		* [0.2.0 zip](https://github.com/SAP/chevrotain/releases/download/v0.2.0/chevrotain-binaries-0.2.0.zip)
		* [0.2.0 tar.gz](https://github.com/SAP/chevrotain/releases/download/v0.2.0/chevrotain-binaries-0.2.0.tar.gz)

		```JavaScript

		## Usage example JSON Parser:

		* The following example uses several features of ES6 (fat arrow/classes).
		These are not mandatory for using Chevrotain, they just make the example clearer.
		The example is also provided in [ES5 syntax](https://github.com/Chevrotain/examples_nodejs)

		#### step 1: define your Tokens:

		```JavaScript

		var Token = require("chevrotain").Token

		class Keyword extends Token { static PATTERN = NA }
		class True extends Keyword { static PATTERN = /true/ }
		class False extends Keyword { static PATTERN = /false/ }
		class Null extends Keyword { static PATTERN = /null/ }
		class LCurly extends Token { static PATTERN = /{/ }
		class RCurly extends Token { static PATTERN = /}/ }
		class LSquare extends Token { static PATTERN = /\[/ }
		class RSquare extends Token { static PATTERN = /]/ }
		class Comma extends Token { static PATTERN = /,/ }
		class Colon extends Token { static PATTERN = /:/ }
		class StringLiteral extends Token { static PATTERN = /"(:?[^\\"]+\|\\(:?[bfnrtv"\\/]\|u[0-9a-fA-F]{4}))*"/}
		class NumberLiteral extends Token { static PATTERN = /-?(0\|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?/ }
		class WhiteSpace extends Token {
		static PATTERN = /\s+/
		static GROUP = SKIPPED
		}
		```

		#### step 2: create a lexer from the Token definitions:
		```JavaScript

		var Lexer = require("chevrotain").Lexer

		var JsonLexer = new chevrotain.Lexer([WhiteSpace, NumberLiteral, StringLiteral,
		RCurly, LCurly, LSquare, RSquare, Comma, Colon, True, False, Null];);

		```

		#### step 3: define the parsing rules:


		```JavaScript

		var object = this.RULE("object", () => {
		this.CONSUME(LCurlyTok)
		this.OPTION(() => {
		this.SUBRULE(this.objectItem)
		this.MANY(() => {
		this.CONSUME(CommaTok)
		this.SUBRULE1(this.objectItem)
		})
		var Parser = require("chevrotain").Parser

		class JsonParser extends Parser {

		constructor(input) {
		Parser.performSelfAnalysis(this)
		}

		public object = this.RULE("object", () => {
		this.CONSUME(LCurly)
		this.OPTION(() => {
		this.SUBRULE(this.objectItem)
		this.MANY(() => {
		this.CONSUME(Comma)
		this.SUBRULE2(this.objectItem)
		})
		this.CONSUME(RCurlyTok)
		})

		var objectItem = this.RULE("objectItem", () => {
		this.CONSUME(StringTok)
		this.CONSUME(ColonTok)
		this.CONSUME(RCurly)
		})

		public objectItem = this.RULE("objectItem", () => {
		this.CONSUME(StringLiteral)
		this.CONSUME(Colon)
		this.SUBRULE(this.value)
		})

		public array = this.RULE("array", () => {
		this.CONSUME(LSquare)
		this.OPTION(() => {
		this.SUBRULE(this.value)
		})

		var array = this.RULE("array", () => {
		this.CONSUME(LSquareTok)
		this.OPTION(() => {
		this.SUBRULE(this.value)
		this.MANY(() => {
		this.CONSUME(CommaTok)
		this.SUBRULE2(this.value)
		})
		this.MANY(() => {
		this.CONSUME(Comma)
		this.SUBRULE2(this.value)
		})
		this.CONSUME(RSquareTok)
		})

		var value = this.RULE("value", () => {
		this.OR([
		{ALT: () => {this.CONSUME(StringTok)}},
		{ALT: () => {this.CONSUME(NumberTok)}},
		{ALT: () => {this.SUBRULE(this.object)}},
		{ALT: () => {this.SUBRULE(this.array)}},
		{ALT: () => {this.CONSUME(TrueTok)}},
		{ALT: () => {this.CONSUME(FalseTok)}},
		{ALT: () => {this.CONSUME(NullTok)}}
		], "a value")
		})
		this.CONSUME(RSquare)
		})

		public value = this.RULE("value", () => {
		this.OR([
		{ALT: () => {this.CONSUME(StringLiteral)}},
		{ALT: () => {this.CONSUME(NumberLiteral)}},
		{ALT: () => {this.SUBRULE(this.object)}},
		{ALT: () => {this.SUBRULE(this.array)}},
		{ALT: () => {this.CONSUME(True)}},
		{ALT: () => {this.CONSUME(False)}},
		{ALT: () => {this.CONSUME(Null)}}
		], "a value")
		})
		}
		```

		###Getting Started
		The best place to start is the examples folder.
		The most basic one is: [Json Parser](https://github.com/SAP/chevrotain/blob/master/examples/json/json_parser.ts)
		A more complex one is: [ECMAScript5 Parser](https://github.com/SAP/chevrotain/blob/master/examples/examples/ecmascript5_parser.ts)
		#### step 4: add custom actions to the grammar defined in step 3

		Note that the examples are written in Typescript.
		* this shows the modification for just two grammar rules.

		```JavaScript

		public object = this.RULE("object", () => {
		var items = []

		this.CONSUME(LCurly)
		this.OPTION(() => {
		items.push(this.SUBRULE(this.objectItem)) // .push to collect the objectItems
		this.MANY(() => {
		this.CONSUME(Comma)
		items.push(this.SUBRULE2(this.objectItem)) // .push to collect the objectItems
		})
		})
		this.CONSUME(RCurly)

		// merge all the objectItems
		var obj = {}
		items.forEach((item) => {
		obj[item.itemName] = item.itemValue
		})
		return obj
		})

		public objectItem = this.RULE("objectItem", () => {
		var nameToken = this.CONSUME(StringLiteral)
		this.CONSUME(Colon)
		var value = this.SUBRULE(this.value) // assumes SUBRULE(this.value) returns the JS value (null/number/string/...)

		var itemNameString = nameToken.image // nameToken.image to get the literalString the lexer consumed
		var itemName = itemNameString.substr(1, itemNameString.length - 2) // chop off the string quotes
		return {itemName:itemName, itemValue:value}
		})
		...

		```

		#### step 5: wrap it all together


		```JavaScript

		function lexAndParse(text) {
		var lexResult = JsonLexer.tokenize(text);
		var parser = new JsonParser(lexResult.tokens);
		return parser.object();
		}

		```

		## Getting Started
		The best place to start is the [examples folder](https://github.com/SAP/chevrotain/tree/master/examples):
		* The most basic one is: [Json Parser](https://github.com/SAP/chevrotain/blob/master/examples/json/json_parser.ts)
		* A more complex one is: [ECMAScript5 Parser](https://github.com/SAP/chevrotain/blob/master/examples/examples/ecmascript5_parser.ts)
		* Plain Javascript example in node.js: [Json Parser Plain.js](https://github.com/Chevrotain/examples_nodejs)
		* Using jison-lex for the lexing phase: [Plain.js with jison-lex](https://github.com/Chevrotain/examples_jison-lex)

		Note that The examples in the examples folder are written in Typescript.
		To see the generated(readable) javascript code:
		@@ -96,13 +221,8 @@

		Installation
		------------
		TODO

		Dependencies
		-------------
		## Dependencies
		Only a single dependency to [lodash](https://lodash.com/).

		Compatibility
		-------------
		The Generated artifact(chevrotain.js) should run any modern Javascript ES5 runtime.
		## Compatibility
		The Generated artifact(chevrotain.js) should run on any modern Javascript ES5 runtime.
		* The CI build runs the tests under Node.js.
		@@ -114,4 +234,3 @@ * additionally local testing is done on latest versions of Chrome/Firefox/IE.

		Development
		-----------
		Chevrotain was originally developed and is maintained by Shahar Soel
		## Development
		Chevrotain was originally developed and is maintained by Shahar Soel

bin/chevrotain.js

Sorry, the diff of this file is too big to display

chevrotain - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics