Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

chevrotain

Package Overview
Dependencies
Maintainers
1
Versions
167
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

chevrotain - npm Package Compare versions

Comparing version 0.1.0 to 0.2.0

156

bin/chevrotain.d.ts

@@ -20,10 +20,27 @@ /// <reference path="../libs/lodash.d.ts" />

declare module chevrotain.tokens {
function getTokName(tokType: Function): string;
function tokenName(clazz: Function): string;
type TokenClass = Function;
class Token {
image: string;
offset: number;
startLine: number;
startColumn: number;
image: string;
endLine: number;
endColumn: number;
isInsertedInRecovery: boolean;
constructor(startLine: number, startColumn: number, image: string);
/**
* @param {string} image the textual representation of the Token as it appeared in the text
* @param {number} offset offset of the first character of the Token
* @param {number} startLine line of the first character of the Token
* @param {number} startColumn column of the first character of the Token
* @param {number} endLine line of the last character of the Token
* @param {number} endColumn column of the last character of the Token
*
* Things to note:
* * "do" {startColumn : 1, endColumn: 2} --> the range is inclusive to exclusive 1...2 (2 chars long).
* * "\n" {startLine : 1, endLine: 1} --> a lineTerminator as the last character does not effect the Token's line numbering.
* * "'hello\tworld\uBBBB'" {image: "'hello\tworld\uBBBB'"} --> a Token's image is the "literal" text
* (unicode escaping is untouched).
*/
constructor(image: string, offset: number, startLine: number, startColumn: number, endLine?: number, endColumn?: number);
}

@@ -34,17 +51,7 @@ type VirtualTokenClass = Function;

}
function INVALID_LINE(): number;
function INVALID_COLUMN(): number;
class NoneToken extends Token {
private static _instance;
constructor();
static getInstance(): any;
}
function NONE_TOKEN(): Token;
}
declare module chevrotain.lexer {
import tok = chevrotain.tokens;
var NA: RegExp;
interface ILexingResult {
tokens: tok.Token[];
ignored: tok.Token[];
errors: ILexingError[];

@@ -62,10 +69,14 @@ }

* concerns such as performance/extendability/modularity are ignored in this implementation.
*
*/
class SimpleLexer {
protected tokenClasses: TokenConstructor[];
protected matchPatterns: RegExp[];
protected ignorePatterns: RegExp[];
static SKIPPED: {
description: string;
};
static NA: RegExp;
protected allPatterns: RegExp[];
protected patternToClass: {};
protected patternIdxToClass: Function[];
protected patternIdxToSkipped: boolean[];
protected patternIdxToLongerAltIdx: number[];
protected patternIdxToCanLineTerminator: boolean[];
/**

@@ -93,6 +104,31 @@ * @param {Function[]} tokenClasses constructor functions for the Tokens types this scanner will support

*
* The Lexer will try to locate the longest match each time. if two patterns both match and with the same match length
* The pattern defined first will "win". for example: if an Identifier's pattern is /\w+/ and we also have keywords such
* as /while/ /for/ ... the Identifier constructor must appear AFTER all keywords constructors in the 'tokenClasses' arg.
* The Lexer will identify the first pattern the matches, Therefor the order of Token Constructors passed
* To the SimpleLexer's constructor is meaningful. If two patterns may match the same string, the longer one
* should be before the shorter one.
*
* Note that there are situations in which we may wish to place the longer pattern after the shorter one.
* For example: keywords vs Identifiers.
* 'do'(/do/) and 'done'(/w+)
*
* * If the Identifier pattern appears before the 'do' pattern both 'do' and 'done'
* will be lexed as an Identifier.
*
* * If the 'do' pattern appears before the Identifier pattern 'do' will be lexed correctly as a keyword.
* however 'done' will be lexed as TWO tokens keyword 'do' and identifier 'ne'.
*
* To resolve this problem, add a static property on the keyword's Tokens constructor named: LONGER_ALT
* example:
*
* export class Identifier extends Keyword { static PATTERN = /[_a-zA-Z][_a-zA-Z0-9]/ }
* export class Keyword extends tok.Token {
* static PATTERN = lex.NA
* static LONGER_ALT = Identifier
* }
* export class Do extends Keyword { static PATTERN = /do/ }
* export class While extends Keyword { static PATTERN = /while/ }
* export class Return extends Keyword { static PATTERN = /return/ }
*
* The lexer will then also attempt to match a (longer) Identifier each time a keyword is matched
*
*
*/

@@ -106,19 +142,12 @@ constructor(tokenClasses: TokenConstructor[]);

* @param {string} text the string to lex
* @returns {{tokens: {Token}[], ignored: {Token}[], errors: string[]}}
* @returns {{tokens: {Token}[], errors: string[]}}
*/
tokenize(text: string): ILexingResult;
}
interface IConsumeResult {
token: tok.Token;
remainingInput: string;
offset: number;
}
function NOTHING_CONSUMED(): IConsumeResult;
function tokenizeOne(input: string, offset: number, patterns: RegExp[], patternsToConstructor: any): IConsumeResult;
interface IAnalyzeResult {
matchPatterns: RegExp[];
ignorePatterns: RegExp[];
patternToClass: {
[pattern: string]: RegExp;
};
allPatterns: RegExp[];
patternIdxToClass: Function[];
patternIdxToSkipped: boolean[];
patternIdxToLongerAltIdx: number[];
patternIdxToCanLineTerminator: boolean[];
}

@@ -133,8 +162,3 @@ function analyzeTokenClasses(tokenClasses: TokenConstructor[]): IAnalyzeResult;

function addStartOfInput(pattern: RegExp): RegExp;
interface ILineColumn {
line: number;
column: number;
}
type OffsetToLineColumn = ILineColumn[];
function buildOffsetToLineColumnDict(text: string): OffsetToLineColumn;
function countLineTerminators(text: string): number;
}

@@ -390,10 +414,16 @@ declare module chevrotain.tree {

var CLASS_TO_GRAMMAR_PRODUCTIONS: lang.HashTable<lang.HashTable<gast.TOP_LEVEL>>;
function getProductionsForClass(classInstance: any): lang.HashTable<gast.TOP_LEVEL>;
function getProductionsForClass(className: string): lang.HashTable<gast.TOP_LEVEL>;
var CLASS_TO_RESYNC_FOLLOW_SETS: lang.HashTable<lang.HashTable<Function[]>>;
function getResyncFollowsForClass(classInstance: any): lang.HashTable<Function[]>;
function setResyncFollowsForClass(classInstance: any, followSet: lang.HashTable<Function[]>): void;
function getResyncFollowsForClass(className: string): lang.HashTable<Function[]>;
function setResyncFollowsForClass(className: string, followSet: lang.HashTable<Function[]>): void;
var CLASS_TO_LOOKAHEAD_FUNCS: lang.HashTable<lang.HashTable<Function>>;
function getLookaheadFuncsForClass(classInstance: any): lang.HashTable<Function>;
function getLookaheadFuncsForClass(className: string): lang.HashTable<Function>;
var CLASS_TO_FIRST_AFTER_REPETITION: lang.HashTable<lang.HashTable<interpreter.IFirstAfterRepetition>>;
function getFirstAfterRepForClass(classInstance: any): lang.HashTable<interpreter.IFirstAfterRepetition>;
function getFirstAfterRepForClass(className: string): lang.HashTable<interpreter.IFirstAfterRepetition>;
var CLASS_TO_OR_LA_CACHE: lang.HashTable<lang.HashTable<string>[]>;
var CLASS_TO_MANY_LA_CACHE: lang.HashTable<lang.HashTable<string>[]>;
var CLASS_TO_AT_LEAST_ONE_LA_CACHE: lang.HashTable<lang.HashTable<string>[]>;
var CLASS_TO_OPTION_LA_CACHE: lang.HashTable<lang.HashTable<string>[]>;
var MAX_OCCURRENCE_INDEX: number;
function initLookAheadKeyCache(className: any): void;
}

@@ -458,2 +488,3 @@ declare module chevrotain.lookahead {

import gast = chevrotain.gast;
import interp = chevrotain.interpreter;
import lang = chevrotain.lang;

@@ -495,3 +526,2 @@ import gastBuilder = chevrotain.gastBuilder;

RULE_STACK: string[];
FOLLOW_STACK: Function[][];
}

@@ -512,2 +542,3 @@ type LookAheadFunc = () => boolean;

protected isBackTrackingStack: any[];
protected className: string;
constructor(input?: tok.Token[]);

@@ -539,5 +570,10 @@ input: tok.Token[];

function InRuleRecoveryException(message: string): void;
interface IFollowKey {
ruleName: string;
idxInCallingRule: number;
inRule: string;
}
/**
* A Recognizer capable of self analysis to determine it's grammar structure
* This is used for more advanced features requiring this information.
* This is used for more advanced features requiring such information.
* for example: Error Recovery, Automatic lookahead calculation

@@ -549,5 +585,10 @@ */

protected RULE_OCCURRENCE_STACK: number[];
protected FOLLOW_STACK: Function[][];
protected tokensMap: gastBuilder.ITerminalNameToConstructor;
constructor(input: tok.Token[], tokensMap: gastBuilder.ITerminalNameToConstructor);
protected firstAfterRepMap: lang.HashTable<interp.IFirstAfterRepetition>;
protected classLAFuncs: lang.HashTable<Function>;
protected orLookaheadKeys: lang.HashTable<string>[];
protected manyLookaheadKeys: lang.HashTable<string>[];
protected atLeastOneLookaheadKeys: lang.HashTable<string>[];
protected optionLookaheadKeys: lang.HashTable<string>[];
constructor(input: tok.Token[], tokensMapOrArr: gastBuilder.ITerminalNameToConstructor | Function[]);
reset(): void;

@@ -841,2 +882,4 @@ /**

protected RULE<T>(ruleName: string, impl: (...implArgs: any[]) => T, invalidRet?: () => T, doReSync?: boolean): (idxInCallingRule?: number, ...args: any[]) => T;
protected ruleInvocationStateUpdate(ruleName: string, idxInCallingRule: number): void;
protected ruleFinallyStateUpdate(): void;
private defaultInvalidReturn();

@@ -861,4 +904,8 @@ protected ruleNamePattern: RegExp;

protected findReSyncTokenType(): Function;
protected getCurrFollowKey(): IFollowKey;
protected buildFullFollowKeyStack(): IFollowKey[];
protected flattenFollowSet(): Function[];
protected getFollowSetFromFollowKey(followKey: IFollowKey): Function[];
protected reSyncTo(tokClass: Function): void;
private attemptInRepetitionRecovery(prodFunc, args, lookaheadFunc, prodName, prodOccurrence, nextToksWalker);
private attemptInRepetitionRecovery(prodFunc, args, lookaheadFunc, prodName, prodOccurrence, nextToksWalker, prodKeys);
private atLeastOneInternal(prodFunc, prodName, prodOccurrence, lookAheadFunc, action, errMsg?);

@@ -880,2 +927,3 @@ private manyInternal(prodFunc, prodName, prodOccurrence, lookAheadFunc, action?);

protected consumeInternal(tokClass: Function, idx: number): tok.Token;
protected getKeyForAutomaticLookahead(prodName: string, prodKeys: lang.HashTable<string>[], occurrence: number): string;
protected getLookaheadFuncForOption(occurence: number): () => boolean;

@@ -886,3 +934,3 @@ protected getLookaheadFuncForOr(occurence: number, ignoreErrors: boolean): () => number;

protected isNextRule<T>(ruleName: string): boolean;
protected getLookaheadFuncFor<T>(prodType: string, occurrence: number, laFuncBuilder: (number, any) => () => T, extraArgs?: any[]): () => T;
protected getLookaheadFuncFor<T>(key: string, occurrence: number, laFuncBuilder: (number, any) => () => T, extraArgs?: any[]): () => T;
protected saveRecogState(): IErrorRecoveryRecogState;

@@ -893,1 +941,9 @@ protected reloadRecogState(newState: IErrorRecoveryRecogState): void;

}
/**
* defines the public API of Chevrotain.
* changes here may require major version change. (semVer)
*/
declare var CHEV_TEST_MODE: any;
declare var global: any;
declare var testMode: any;
declare var API: any;
{
"name": "chevrotain",
"version": "0.1.0",
"description": "Javascript/Typescript parsing framework",
"version": "0.2.0",
"description": "Chevrotain is a high performance fault Tolerant Javascript parsing DSL for building recursive decent parsers",
"keywords": [
"parser",
"syntax",
"lexical",
"analysis",
"grammar",
"lexer",
"tokenizer"
"tokenizer",
"generator",
"compiler",
"fault",
"tolerant"
],

@@ -12,0 +18,0 @@ "bugs": {

[![Build Status](https://travis-ci.org/SAP/chevrotain.svg?branch=master)](https://travis-ci.org/SAP/chevrotain)
[![Coverage Status](https://coveralls.io/repos/SAP/chevrotain/badge.svg?branch=master)](https://coveralls.io/r/SAP/chevrotain?branch=master)
[![NPM](https://nodei.co/npm/chevrotain.png?mini=true)](https://npmjs.org/package/chevrotain)
#Chevrotain
# Chevrotain
Chevrotain is a Javascript/Typescript parsing framework which aims to make it easier to write "hand built" recursive decent parsers.
Chevrotain is a high performance fault Tolerant Javascript parsing DSL for building recursive decent parsers.
Chevrotain is **NOT** a parser generator. it solves the same kind of problems as a parser generator, just without
the code generation phase.
###Features
* **DSL** for creating the parsing rules.
* Automatic lookahead calculation for LL(1) grammars
* For other grammars custom lookahead functions can be provided.
## Features
* **Lexer engine** based on RexExps.
* Supports Token location tracking.
* Supports Token skipping (whitespace/comments/...)
* Allows prioritising shorter matches (Keywords vs Identifiers).
* **No code generation** The Lexer does not require any code generation phase.
* **Parsing DSL** for creating the parsing rules.
* **No code generation** - the DSL is just javascript not a new external language, what is written is what will be run, this speeds up development,
makes debugging trivial and provides great flexibility for inserting custom actions into the grammar.
* Strong **Error Recovery** capabilities based on Antlr3's algorithms.
* Automatic lookahead calculation for LL(1) grammars.
* In addition custom lookahead logic can be provided explicitly.
* Backtracking support.
* Strong **Error Recovery** capabilities based on Antlr3's algorithms.
* **Grammar Introspection**, the grammar's structure is known and **exposed** this can be used to implement features such
as automatically generated syntax diagrams or Syntactic error recovery
* **No generated code** - what you write is what will be run, this makes debugging easier and provides great flexibility. For example this could be used to implement grammar composition.
* Well tested with **100% code coverage**
* **High performance** see: [performance comparison](http://jsperf.com/json-parsers-comparison/6)
* **Grammar Introspection**, the grammar's structure is known and **exposed** this can be used to implement features such as automatically generated syntax diagrams or Syntactic error recovery.
* Well tested with **~100% code coverage**
###At a Glance, simple json parsing rules
* using ES6 fat arrow '=>'
## Installation
* ```npm install chevrotain```
* or download the javascript source directly from github releases:
* [0.2.0 zip](https://github.com/SAP/chevrotain/releases/download/v0.2.0/chevrotain-binaries-0.2.0.zip)
* [0.2.0 tar.gz](https://github.com/SAP/chevrotain/releases/download/v0.2.0/chevrotain-binaries-0.2.0.tar.gz)
```JavaScript
## Usage example JSON Parser:
* The following example uses several features of ES6 (fat arrow/classes).
These are not mandatory for using Chevrotain, they just make the example clearer.
The example is also provided in [ES5 syntax](https://github.com/Chevrotain/examples_nodejs)
#### step 1: define your Tokens:
```JavaScript
var Token = require("chevrotain").Token
class Keyword extends Token { static PATTERN = NA }
class True extends Keyword { static PATTERN = /true/ }
class False extends Keyword { static PATTERN = /false/ }
class Null extends Keyword { static PATTERN = /null/ }
class LCurly extends Token { static PATTERN = /{/ }
class RCurly extends Token { static PATTERN = /}/ }
class LSquare extends Token { static PATTERN = /\[/ }
class RSquare extends Token { static PATTERN = /]/ }
class Comma extends Token { static PATTERN = /,/ }
class Colon extends Token { static PATTERN = /:/ }
class StringLiteral extends Token { static PATTERN = /"(:?[^\\"]+|\\(:?[bfnrtv"\\/]|u[0-9a-fA-F]{4}))*"/}
class NumberLiteral extends Token { static PATTERN = /-?(0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?/ }
class WhiteSpace extends Token {
static PATTERN = /\s+/
static GROUP = SKIPPED
}
```
#### step 2: create a lexer from the Token definitions:
```JavaScript
var Lexer = require("chevrotain").Lexer
var JsonLexer = new chevrotain.Lexer([WhiteSpace, NumberLiteral, StringLiteral,
RCurly, LCurly, LSquare, RSquare, Comma, Colon, True, False, Null];);
```
#### step 3: define the parsing rules:
```JavaScript
var object = this.RULE("object", () => {
this.CONSUME(LCurlyTok)
this.OPTION(() => {
this.SUBRULE(this.objectItem)
this.MANY(() => {
this.CONSUME(CommaTok)
this.SUBRULE1(this.objectItem)
})
var Parser = require("chevrotain").Parser
class JsonParser extends Parser {
constructor(input) {
Parser.performSelfAnalysis(this)
}
public object = this.RULE("object", () => {
this.CONSUME(LCurly)
this.OPTION(() => {
this.SUBRULE(this.objectItem)
this.MANY(() => {
this.CONSUME(Comma)
this.SUBRULE2(this.objectItem)
})
this.CONSUME(RCurlyTok)
})
var objectItem = this.RULE("objectItem", () => {
this.CONSUME(StringTok)
this.CONSUME(ColonTok)
this.CONSUME(RCurly)
})
public objectItem = this.RULE("objectItem", () => {
this.CONSUME(StringLiteral)
this.CONSUME(Colon)
this.SUBRULE(this.value)
})
public array = this.RULE("array", () => {
this.CONSUME(LSquare)
this.OPTION(() => {
this.SUBRULE(this.value)
})
var array = this.RULE("array", () => {
this.CONSUME(LSquareTok)
this.OPTION(() => {
this.SUBRULE(this.value)
this.MANY(() => {
this.CONSUME(CommaTok)
this.SUBRULE2(this.value)
})
this.MANY(() => {
this.CONSUME(Comma)
this.SUBRULE2(this.value)
})
this.CONSUME(RSquareTok)
})
var value = this.RULE("value", () => {
this.OR([
{ALT: () => {this.CONSUME(StringTok)}},
{ALT: () => {this.CONSUME(NumberTok)}},
{ALT: () => {this.SUBRULE(this.object)}},
{ALT: () => {this.SUBRULE(this.array)}},
{ALT: () => {this.CONSUME(TrueTok)}},
{ALT: () => {this.CONSUME(FalseTok)}},
{ALT: () => {this.CONSUME(NullTok)}}
], "a value")
})
this.CONSUME(RSquare)
})
public value = this.RULE("value", () => {
this.OR([
{ALT: () => {this.CONSUME(StringLiteral)}},
{ALT: () => {this.CONSUME(NumberLiteral)}},
{ALT: () => {this.SUBRULE(this.object)}},
{ALT: () => {this.SUBRULE(this.array)}},
{ALT: () => {this.CONSUME(True)}},
{ALT: () => {this.CONSUME(False)}},
{ALT: () => {this.CONSUME(Null)}}
], "a value")
})
}
```
###Getting Started
The best place to start is the examples folder.
The most basic one is: [Json Parser](https://github.com/SAP/chevrotain/blob/master/examples/json/json_parser.ts)
A more complex one is: [ECMAScript5 Parser](https://github.com/SAP/chevrotain/blob/master/examples/examples/ecmascript5_parser.ts)
#### step 4: add custom actions to the grammar defined in step 3
Note that the examples are written in Typescript.
* this shows the modification for just two grammar rules.
```JavaScript
public object = this.RULE("object", () => {
var items = []
this.CONSUME(LCurly)
this.OPTION(() => {
items.push(this.SUBRULE(this.objectItem)) // .push to collect the objectItems
this.MANY(() => {
this.CONSUME(Comma)
items.push(this.SUBRULE2(this.objectItem)) // .push to collect the objectItems
})
})
this.CONSUME(RCurly)
// merge all the objectItems
var obj = {}
items.forEach((item) => {
obj[item.itemName] = item.itemValue
})
return obj
})
public objectItem = this.RULE("objectItem", () => {
var nameToken = this.CONSUME(StringLiteral)
this.CONSUME(Colon)
var value = this.SUBRULE(this.value) // assumes SUBRULE(this.value) returns the JS value (null/number/string/...)
var itemNameString = nameToken.image // nameToken.image to get the literalString the lexer consumed
var itemName = itemNameString.substr(1, itemNameString.length - 2) // chop off the string quotes
return {itemName:itemName, itemValue:value}
})
...
```
#### step 5: wrap it all together
```JavaScript
function lexAndParse(text) {
var lexResult = JsonLexer.tokenize(text);
var parser = new JsonParser(lexResult.tokens);
return parser.object();
}
```
## Getting Started
The best place to start is the [examples folder](https://github.com/SAP/chevrotain/tree/master/examples):
* The most basic one is: [Json Parser](https://github.com/SAP/chevrotain/blob/master/examples/json/json_parser.ts)
* A more complex one is: [ECMAScript5 Parser](https://github.com/SAP/chevrotain/blob/master/examples/examples/ecmascript5_parser.ts)
* Plain Javascript example in node.js: [Json Parser Plain.js](https://github.com/Chevrotain/examples_nodejs)
* Using jison-lex for the lexing phase: [Plain.js with jison-lex](https://github.com/Chevrotain/examples_jison-lex)
Note that The examples in the examples folder are written in Typescript.
To see the generated(readable) javascript code:

@@ -96,13 +221,8 @@

Installation
------------
**TODO**
Dependencies
-------------
## Dependencies
Only a single dependency to [lodash](https://lodash.com/).
Compatibility
-------------
The Generated artifact(chevrotain.js) should run any modern Javascript ES5 runtime.
## Compatibility
The Generated artifact(chevrotain.js) should run on any modern Javascript ES5 runtime.
* The CI build runs the tests under Node.js.

@@ -114,4 +234,3 @@ * additionally local testing is done on latest versions of Chrome/Firefox/IE.

Development
-----------
Chevrotain was originally developed and is maintained by Shahar Soel
## Development
Chevrotain was originally developed and is maintained by Shahar Soel

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc