@@ -13,25 +13,3 @@ (function(root, factory) {
		var hasOwnProperty = Object.prototype.hasOwnProperty

		// polyfill assign(), so we support IE9+
		var assign = typeof Object.assign === 'function' ? Object.assign :
		// https://tc39.github.io/ecma262/#sec-object.assign
		function(target, sources) {
		if (target == null) {
		throw new TypeError('Target cannot be null or undefined');
		}
		target = Object(target)

		for (var i = 1; i < arguments.length; i++) {
		var source = arguments[i]
		if (source == null) continue

		for (var key in source) {
		if (hasOwnProperty.call(source, key)) {
		target[key] = source[key]
		}
		}
		}
		return target
		}

		var toString = Object.prototype.toString
		var hasSticky = typeof new RegExp().sticky === 'boolean'
		@@ -41,4 +19,4 @@

		function isRegExp(o) { return o && o.constructor === RegExp }
		function isObject(o) { return o && typeof o === 'object' && o.constructor !== RegExp && !Array.isArray(o) }
		function isRegExp(o) { return o && toString.call(o) === '[object RegExp]' }
		function isObject(o) { return o && typeof o === 'object' && !isRegExp(o) && !Array.isArray(o) }

		@@ -56,2 +34,3 @@ function reEscape(s) {
		function reUnion(regexps) {
		if (!regexps.length) return '(?!)'
		var source = regexps.map(function(s) {
		@@ -69,10 +48,11 @@ return "(?:" + s + ")"
		// TODO: consider /u support
		if (obj.ignoreCase) { throw new Error('RegExp /i flag not allowed') }
		if (obj.global) { throw new Error('RegExp /g flag is implied') }
		if (obj.sticky) { throw new Error('RegExp /y flag is implied') }
		if (obj.multiline) { throw new Error('RegExp /m flag is implied') }
		if (obj.ignoreCase) throw new Error('RegExp /i flag not allowed')
		if (obj.global) throw new Error('RegExp /g flag is implied')
		if (obj.sticky) throw new Error('RegExp /y flag is implied')
		if (obj.multiline) throw new Error('RegExp /m flag is implied')
		if (obj.unicode) throw new Error('RegExp /u flag is not allowed')
		return obj.source

		} else {
		throw new Error('not a pattern: ' + obj)
		throw new Error('Not a pattern: ' + obj)
		}
		@@ -84,6 +64,12 @@ }
		var result = []
		for (var i=0; i<keys.length; i++) {
		for (var i = 0; i < keys.length; i++) {
		var key = keys[i]
		var thing = object[key]
		var rules = Array.isArray(thing) ? thing : [thing]
		var rules = [].concat(thing)
		if (key === 'include') {
		for (var j = 0; j < rules.length; j++) {
		result.push({include: rules[j]})
		}
		continue
		}
		var match = []
		@@ -106,8 +92,15 @@ rules.forEach(function(rule) {
		var result = []
		for (var i=0; i<array.length; i++) {
		for (var i = 0; i < array.length; i++) {
		var obj = array[i]
		if (!obj.name) {
		throw new Error('Rule has no name: ' + JSON.stringify(obj))
		if (obj.include) {
		var include = [].concat(obj.include)
		for (var j = 0; j < include.length; j++) {
		result.push({include: include[j]})
		}
		continue
		}
		result.push(ruleOptions(obj.name, obj))
		if (!obj.type) {
		throw new Error('Rule has no type: ' + JSON.stringify(obj))
		}
		result.push(ruleOptions(obj.type, obj))
		}
		@@ -117,11 +110,14 @@ return result

		function ruleOptions(name, obj) {
		if (typeof obj !== 'object' \|\| Array.isArray(obj) \|\| isRegExp(obj)) {
		function ruleOptions(type, obj) {
		if (!isObject(obj)) {
		obj = { match: obj }
		}
		if (obj.include) {
		throw new Error('Matching rules cannot also include states')
		}

		// nb. error implies lineBreaks
		var options = assign({
		tokenType: name,
		lineBreaks: !!obj.error,
		// nb. error and fallback imply lineBreaks
		var options = {
		defaultType: type,
		lineBreaks: !!obj.error \|\| !!obj.fallback,
		pop: false,
		@@ -131,6 +127,20 @@ next: null,
		error: false,
		fallback: false,
		value: null,
		getType: null,
		}, obj)
		type: null,
		shouldThrow: false,
		}

		// Avoid Object.assign(), so we support IE9+
		for (var key in obj) {
		if (hasOwnProperty.call(obj, key)) {
		options[key] = obj[key]
		}
		}

		// type transform cannot be a string
		if (typeof options.type === 'string' && type !== options.type) {
		throw new Error("Type transform cannot be a string (type '" + options.type + "' for token '" + type + "')")
		}

		// convert to array
		@@ -143,20 +153,40 @@ var match = options.match
		})
		if (options.keywords) {
		options.getType = keywordTransform(options.keywords)
		}
		return options
		}

		function toRules(spec) {
		return Array.isArray(spec) ? arrayToRules(spec) : objectToRules(spec)
		}

		var defaultErrorRule = ruleOptions('error', {lineBreaks: true, shouldThrow: true})
		function compileRules(rules, hasStates) {
		rules = Array.isArray(rules) ? arrayToRules(rules) : objectToRules(rules)

		var errorRule = null
		var fast = Object.create(null)
		var fastAllowed = true
		var groups = []
		var parts = []
		for (var i=0; i<rules.length; i++) {

		// If there is a fallback rule, then disable fast matching
		for (var i = 0; i < rules.length; i++) {
		if (rules[i].fallback) {
		fastAllowed = false
		}
		}

		for (var i = 0; i < rules.length; i++) {
		var options = rules[i]

		if (options.error) {
		if (options.include) {
		// all valid inclusions are removed by states() preprocessor
		throw new Error('Inheritance is not allowed in stateless lexers')
		}

		if (options.error \|\| options.fallback) {
		// errorRule can only be set once
		if (errorRule) {
		throw new Error("Multiple error rules not allowed: (for token '" + options.tokenType + "')")
		if (!options.fallback === !errorRule.fallback) {
		throw new Error("Multiple " + (options.fallback ? "fallback" : "error") + " rules not allowed (for token '" + options.defaultType + "')")
		} else {
		throw new Error("fallback and error are mutually exclusive (for token '" + options.defaultType + "')")
		}
		}
		@@ -166,10 +196,30 @@ errorRule = options

		// skip rules with no match
		if (options.match.length === 0) {
		var match = options.match
		if (fastAllowed) {
		while (match.length && typeof match[0] === 'string' && match[0].length === 1) {
		var word = match.shift()
		fast[word.charCodeAt(0)] = options
		}
		}

		// Warn about inappropriate state-switching options
		if (options.pop \|\| options.push \|\| options.next) {
		if (!hasStates) {
		throw new Error("State-switching options are not allowed in stateless lexers (for token '" + options.defaultType + "')")
		}
		if (options.fallback) {
		throw new Error("State-switching options are not allowed on fallback tokens (for token '" + options.defaultType + "')")
		}
		}

		// Only rules with a .match are included in the RegExp
		if (match.length === 0) {
		continue
		}
		fastAllowed = false

		groups.push(options)

		// convert to RegExp
		var pat = reUnion(options.match.map(regexpOrLiteral))
		var pat = reUnion(match.map(regexpOrLiteral))

		@@ -185,5 +235,2 @@ // validate
		}
		if (!hasStates && (options.pop \|\| options.push \|\| options.next)) {
		throw new Error("State-switching options are not allowed in stateless lexers (for token '" + options.tokenType + "')")
		}

		@@ -199,36 +246,84 @@ // try and detect rules matching newlines

		var suffix = hasSticky ? '' : '\|(?:)'
		var flags = hasSticky ? 'ym' : 'gm'

		// If there's no fallback rule, use the sticky flag so we only look for
		// matches at the current index.
		//
		// If we don't support the sticky flag, then fake it using an irrefutable
		// match (i.e. an empty pattern).
		var fallbackRule = errorRule && errorRule.fallback
		var flags = hasSticky && !fallbackRule ? 'ym' : 'gm'
		var suffix = hasSticky \|\| fallbackRule ? '' : '\|'
		var combined = new RegExp(reUnion(parts) + suffix, flags)

		return {regexp: combined, groups: groups, error: errorRule}
		return {regexp: combined, groups: groups, fast: fast, error: errorRule \|\| defaultErrorRule}
		}

		function compile(rules) {
		var result = compileRules(rules)
		var result = compileRules(toRules(rules))
		return new Lexer({start: result}, 'start')
		}

		function checkStateGroup(g, name, map) {
		var state = g && (g.push \|\| g.next)
		if (state && !map[state]) {
		throw new Error("Missing state '" + state + "' (in token '" + g.defaultType + "' of state '" + name + "')")
		}
		if (g && g.pop && +g.pop !== 1) {
		throw new Error("pop must be 1 (in token '" + g.defaultType + "' of state '" + name + "')")
		}
		}
		function compileStates(states, start) {
		var all = states.$all ? toRules(states.$all) : []
		delete states.$all

		var keys = Object.getOwnPropertyNames(states)
		if (!start) start = keys[0]

		var ruleMap = Object.create(null)
		for (var i = 0; i < keys.length; i++) {
		var key = keys[i]
		ruleMap[key] = toRules(states[key]).concat(all)
		}
		for (var i = 0; i < keys.length; i++) {
		var key = keys[i]
		var rules = ruleMap[key]
		var included = Object.create(null)
		for (var j = 0; j < rules.length; j++) {
		var rule = rules[j]
		if (!rule.include) continue
		var splice = [j, 1]
		if (rule.include !== key && !included[rule.include]) {
		included[rule.include] = true
		var newRules = ruleMap[rule.include]
		if (!newRules) {
		throw new Error("Cannot include nonexistent state '" + rule.include + "' (in state '" + key + "')")
		}
		for (var k = 0; k < newRules.length; k++) {
		var newRule = newRules[k]
		if (rules.indexOf(newRule) !== -1) continue
		splice.push(newRule)
		}
		}
		rules.splice.apply(rules, splice)
		j--
		}
		}

		var map = Object.create(null)
		for (var i=0; i<keys.length; i++) {
		for (var i = 0; i < keys.length; i++) {
		var key = keys[i]
		map[key] = compileRules(states[key], true)
		map[key] = compileRules(ruleMap[key], true)
		}

		for (var i=0; i<keys.length; i++) {
		var groups = map[keys[i]].groups
		for (var j=0; j<groups.length; j++) {
		var g = groups[j]
		var state = g && (g.push \|\| g.next)
		if (state && !map[state]) {
		throw new Error("Missing state '" + state + "' (in token '" + g.tokenType + "' of state '" + keys[i] + "')")
		}
		if (g && g.pop && +g.pop !== 1) {
		throw new Error("pop must be 1 (in token '" + g.tokenType + "' of state '" + keys[i] + "')")
		}
		for (var i = 0; i < keys.length; i++) {
		var name = keys[i]
		var state = map[name]
		var groups = state.groups
		for (var j = 0; j < groups.length; j++) {
		checkStateGroup(groups[j], name, map)
		}
		var fastKeys = Object.getOwnPropertyNames(state.fast)
		for (var j = 0; j < fastKeys.length; j++) {
		checkStateGroup(state.fast[fastKeys[j]], name, map)
		}
		}
		@@ -243,3 +338,3 @@
		var types = Object.getOwnPropertyNames(map)
		for (var i=0; i<types.length; i++) {
		for (var i = 0; i < types.length; i++) {
		var tokenType = types[i]
		@@ -261,3 +356,2 @@ var item = map[tokenType]
		var source = ''
		source += '(function(value) {\n'
		source += 'switch (value.length) {\n'
		@@ -275,4 +369,3 @@ for (var length in byLength) {
		source += '}\n'
		source += '})'
		return eval(source) // getType
		return Function('value', source) // type
		}
		@@ -295,3 +388,6 @@
		this.col = info ? info.col : 1
		this.queuedToken = info ? info.queuedToken : null
		this.queuedThrow = info ? info.queuedThrow : null
		this.setState(info ? info.state : this.startState)
		this.stack = info && info.stack ? info.stack.slice() : []
		return this
		@@ -305,2 +401,5 @@ }
		state: this.state,
		stack: this.stack.slice(),
		queuedToken: this.queuedToken,
		queuedThrow: this.queuedThrow,
		}
		@@ -314,4 +413,5 @@ }
		this.groups = info.groups
		this.error = info.error \|\| {lineBreaks: true, shouldThrow: true}
		this.error = info.error
		this.re = info.regexp
		this.fast = info.fast
		}
		@@ -328,6 +428,6 @@

		Lexer.prototype._eat = hasSticky ? function(re) { // assume re is /y
		return re.exec(this.buffer)
		} : function(re) { // assume re is /g
		var match = re.exec(this.buffer)
		var eat = hasSticky ? function(re, buffer) { // assume re is /y
		return re.exec(buffer)
		} : function(re, buffer) { // assume re is /g
		var match = re.exec(buffer)
		// will always match, since we used the \|(?:) trick
		@@ -341,13 +441,9 @@ if (match[0].length === 0) {
		Lexer.prototype._getGroup = function(match) {
		if (match === null) {
		return -1
		}

		var groupCount = this.groups.length
		for (var i = 0; i < groupCount; i++) {
		if (match[i + 1] !== undefined) {
		return i
		return this.groups[i]
		}
		}
		throw new Error('oops')
		throw new Error('Cannot find token type for matched text')
		}
		@@ -360,6 +456,13 @@
		Lexer.prototype.next = function() {
		var re = this.re
		var index = this.index

		// If a fallback token matched, we don't need to re-run the RegExp
		if (this.queuedGroup) {
		var token = this._token(this.queuedGroup, this.queuedText, index)
		this.queuedGroup = null
		this.queuedText = ""
		return token
		}

		var buffer = this.buffer

		var index = re.lastIndex = this.index
		if (index === buffer.length) {
		@@ -369,17 +472,34 @@ return // EOF

		var match = this._eat(re)
		var i = this._getGroup(match)
		// Fast matching for single characters
		var group = this.fast[buffer.charCodeAt(index)]
		if (group) {
		return this._token(group, buffer.charAt(index), index)
		}

		var group, text
		if (i === -1) {
		group = this.error
		// Execute RegExp
		var re = this.re
		re.lastIndex = index
		var match = eat(re, buffer)

		// consume rest of buffer
		text = buffer.slice(index)
		// Error tokens match the remaining buffer
		var error = this.error
		if (match == null) {
		return this._token(error, buffer.slice(index, buffer.length), index)
		}

		} else {
		text = match[0]
		group = this.groups[i]
		var group = this._getGroup(match)
		var text = match[0]

		if (error.fallback && match.index !== index) {
		this.queuedGroup = group
		this.queuedText = text

		// Fallback tokens contain the unmatched portion of the buffer
		return this._token(error, buffer.slice(index, match.index), index)
		}

		return this._token(group, text, index)
		}

		Lexer.prototype._token = function(group, text, offset) {
		// count line breaks
		@@ -398,7 +518,7 @@ var lineBreaks = 0
		var token = {
		type: (group.getType && group.getType(text)) \|\| group.tokenType,
		value: group.value ? group.value(text) : text,
		type: (typeof group.type === 'function' && group.type(text)) \|\| group.defaultType,
		value: typeof group.value === 'function' ? group.value(text) : text,
		text: text,
		toString: tokenToString,
		offset: index,
		offset: offset,
		lineBreaks: lineBreaks,
		@@ -418,2 +538,3 @@ line: this.line,
		}

		// throw, if no rule with {error: true}
		@@ -427,2 +548,3 @@ if (group.shouldThrow) {
		else if (group.next) this.setState(group.next)

		return token
		@@ -451,3 +573,3 @@ }
		Lexer.prototype.formatError = function(token, message) {
		var value = token.value
		var value = token.text
		var index = token.offset
		@@ -468,13 +590,3 @@ var eol = token.lineBreaks ? value.indexOf('\n') : value.length
		Lexer.prototype.has = function(tokenType) {
		for (var s in this.states) {
		var groups = this.states[s].groups
		for (var i=0; i<groups.length; i++) {
		var group = groups[i]
		if (group.tokenType === tokenType) return true
		if (group.keywords && hasOwnProperty.call(group.keywords, tokenType)) {
		return true
		}
		}
		}
		return false
		return true
		}
		@@ -487,4 +599,6 @@
		error: Object.freeze({error: true}),
		fallback: Object.freeze({fallback: true}),
		keywords: keywordTransform,
		}

		}))
		}));

package.json

		{
		"name": "moo",
		"version": "0.4.3",
		"version": "0.5.0",
		"description": "Optimised tokenizer/lexer generator! 🐄 Much performance. Moo!",
		@@ -17,3 +17,2 @@ "main": "moo.js",
		"test": "jest .",
		"lint": "eslint moo.js",
		"benchmark": "benchr test/benchmark.js",
		@@ -25,4 +24,3 @@ "moo": "echo 'Mooooo!'"
		"chevrotain": "^0.27.1",
		"eslint": "^3.17.1",
		"jest": "^19.0.2",
		"jest": "^23.6.0",
		"lex": "^1.7.9",
		@@ -29,0 +27,0 @@ "lexing": "^0.8.0",

README.md

		@@ -86,5 +86,5 @@ ![](cow.png)
		```


		Better:


		```js
		@@ -140,3 +140,4 @@ let lexer = moo.compile({
		* `type`: the name of the group, as passed to compile.
		* `value`: the match contents.
		* `text`: the string that was matched.
		* `value`: the string that was matched, transformed by your `value` function (if any).
		* `offset`: the number of bytes from the start of the buffer where the match starts.
		@@ -148,2 +149,19 @@ * `lineBreaks`: the number of line breaks found in the match. (Always zero if this rule has `lineBreaks: false`.)

		### Value vs. Text ###

		The `value` is the same as the `text`, unless you provide a [value transform](#transform).

		```js
		const moo = require('moo')

		const lexer = moo.compile({
		ws: /[ \t]+/,
		string: {match: /"(?:\\["\\]\|[^\n"\\])*"/, value: s => s.slice(1, -1)},
		})

		lexer.reset('"test"')
		lexer.next() /* { value: 'test', text: '"test"', ... } */
		```


		### Reset ###
		@@ -181,9 +199,9 @@

		Keywords should be written using the `keywords` attribute.
		Keywords should be written using the `keywords` transform.

		```js
		moo.compile({
		IDEN: {match: /[a-zA-Z]+/, keywords: {
		KW: ['while', 'if', 'else', 'moo', 'cows']),
		}},
		IDEN: {match: /[a-zA-Z]+/, type: moo.keywords({
		KW: ['while', 'if', 'else', 'moo', 'cows'],
		})},
		SPACE: {match: /\s+/, lineBreaks: true},
		@@ -216,7 +234,7 @@ })
		let lexer = moo.compile({
		name: {match: /[a-zA-Z]+/, keywords: {
		name: {match: /[a-zA-Z]+/, type: moo.keywords({
		'kw-class': 'class',
		'kw-def': 'def',
		'kw-if': 'if',
		}},
		})},
		// ...
		@@ -242,4 +260,14 @@ })

		Sometimes you want your lexer to support different states. This is useful for string interpolation, for example: to tokenize `a${{c: d}}e`, you might use:
		Moo allows you to define multiple lexer states. Each state defines its own separate set of token rules. Your lexer will start off in the first state given to `moo.states({})`.

		Rules can be annotated with `next`, `push`, and `pop`, to change the current state after that token is matched. A "stack" of past states is kept, which is used by `push` and `pop`.

		* `next: 'bar'` moves to the state named `bar`. (The stack is not changed.)
		* `push: 'bar'` moves to the state named `bar`, and pushes the old state onto the stack.
		* `pop: 1` removes one state from the top of the stack, and moves to that state. (Only `1` is supported.)

		Only rules from the current state can be matched. You need to copy your rule into all the states you want it to be matched in.

		For example, to tokenize JS-style string interpolation such as `a${{c: d}}e`, you might use:

		```js
		@@ -266,17 +294,11 @@ let lexer = moo.states({

		It's also nice to let states inherit rules from other states and be able to count things, e.g. the interpolated expression state needs a `}` rule that can tell if it's a closing brace or the end of the interpolation, but is otherwise identical to the normal expression state.
		The `rbrace` rule is annotated with `pop`, so it moves from the `main` state into either `lit` or `main`, depending on the stack.

		To support this, Moo allows annotating tokens with `push`, `pop` and `next`:

		* `push` moves the lexer to a new state, and pushes the old state onto the stack.
		* `pop` returns to a previous state, by removing one or more states from the stack.
		* `next` moves to a new state, but does not affect the stack.


		Errors
		------

		If no token matches, Moo will throw an Error.
		If none of your rules match, Moo will throw an Error; since it doesn't know what else to do.

		If you'd rather treat errors as just another kind of token, you can ask Moo to do so.
		If you prefer, you can have moo return an error token instead of throwing an exception. The error token will contain the whole of the rest of the buffer.

		@@ -288,8 +310,8 @@ ```js
		})


		moo.reset('invalid')
		moo.next() // -> { type: 'myError', value: 'invalid' }
		moo.next() // -> { type: 'myError', value: 'invalid', text: 'invalid', offset: 0, lineBreaks: 0, line: 1, col: 1 }
		moo.next() // -> undefined
		```


		You can have a token type that both matches tokens _and_ contains error values.
		@@ -304,2 +326,4 @@

		### Formatting errors ###

		If you want to throw an error from your parser, you might find `formatError` helpful. Call it with the offending token:
		@@ -311,3 +335,3 @@

		And it returns a string with a pretty error message.
		It returns a string with a pretty error message.

		@@ -314,0 +338,0 @@ ```

moo - npm Package Compare versions

Improved metrics