Socket
Socket
Sign inDemoInstall

moo

Package Overview
Dependencies
Maintainers
2
Versions
15
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

moo - npm Package Compare versions

Comparing version 0.4.3 to 0.5.0

352

moo.js

@@ -13,25 +13,3 @@ (function(root, factory) {

var hasOwnProperty = Object.prototype.hasOwnProperty
// polyfill assign(), so we support IE9+
var assign = typeof Object.assign === 'function' ? Object.assign :
// https://tc39.github.io/ecma262/#sec-object.assign
function(target, sources) {
if (target == null) {
throw new TypeError('Target cannot be null or undefined');
}
target = Object(target)
for (var i = 1; i < arguments.length; i++) {
var source = arguments[i]
if (source == null) continue
for (var key in source) {
if (hasOwnProperty.call(source, key)) {
target[key] = source[key]
}
}
}
return target
}
var toString = Object.prototype.toString
var hasSticky = typeof new RegExp().sticky === 'boolean'

@@ -41,4 +19,4 @@

function isRegExp(o) { return o && o.constructor === RegExp }
function isObject(o) { return o && typeof o === 'object' && o.constructor !== RegExp && !Array.isArray(o) }
function isRegExp(o) { return o && toString.call(o) === '[object RegExp]' }
function isObject(o) { return o && typeof o === 'object' && !isRegExp(o) && !Array.isArray(o) }

@@ -56,2 +34,3 @@ function reEscape(s) {

function reUnion(regexps) {
if (!regexps.length) return '(?!)'
var source = regexps.map(function(s) {

@@ -69,10 +48,11 @@ return "(?:" + s + ")"

// TODO: consider /u support
if (obj.ignoreCase) { throw new Error('RegExp /i flag not allowed') }
if (obj.global) { throw new Error('RegExp /g flag is implied') }
if (obj.sticky) { throw new Error('RegExp /y flag is implied') }
if (obj.multiline) { throw new Error('RegExp /m flag is implied') }
if (obj.ignoreCase) throw new Error('RegExp /i flag not allowed')
if (obj.global) throw new Error('RegExp /g flag is implied')
if (obj.sticky) throw new Error('RegExp /y flag is implied')
if (obj.multiline) throw new Error('RegExp /m flag is implied')
if (obj.unicode) throw new Error('RegExp /u flag is not allowed')
return obj.source
} else {
throw new Error('not a pattern: ' + obj)
throw new Error('Not a pattern: ' + obj)
}

@@ -84,6 +64,12 @@ }

var result = []
for (var i=0; i<keys.length; i++) {
for (var i = 0; i < keys.length; i++) {
var key = keys[i]
var thing = object[key]
var rules = Array.isArray(thing) ? thing : [thing]
var rules = [].concat(thing)
if (key === 'include') {
for (var j = 0; j < rules.length; j++) {
result.push({include: rules[j]})
}
continue
}
var match = []

@@ -106,8 +92,15 @@ rules.forEach(function(rule) {

var result = []
for (var i=0; i<array.length; i++) {
for (var i = 0; i < array.length; i++) {
var obj = array[i]
if (!obj.name) {
throw new Error('Rule has no name: ' + JSON.stringify(obj))
if (obj.include) {
var include = [].concat(obj.include)
for (var j = 0; j < include.length; j++) {
result.push({include: include[j]})
}
continue
}
result.push(ruleOptions(obj.name, obj))
if (!obj.type) {
throw new Error('Rule has no type: ' + JSON.stringify(obj))
}
result.push(ruleOptions(obj.type, obj))
}

@@ -117,11 +110,14 @@ return result

function ruleOptions(name, obj) {
if (typeof obj !== 'object' || Array.isArray(obj) || isRegExp(obj)) {
function ruleOptions(type, obj) {
if (!isObject(obj)) {
obj = { match: obj }
}
if (obj.include) {
throw new Error('Matching rules cannot also include states')
}
// nb. error implies lineBreaks
var options = assign({
tokenType: name,
lineBreaks: !!obj.error,
// nb. error and fallback imply lineBreaks
var options = {
defaultType: type,
lineBreaks: !!obj.error || !!obj.fallback,
pop: false,

@@ -131,6 +127,20 @@ next: null,

error: false,
fallback: false,
value: null,
getType: null,
}, obj)
type: null,
shouldThrow: false,
}
// Avoid Object.assign(), so we support IE9+
for (var key in obj) {
if (hasOwnProperty.call(obj, key)) {
options[key] = obj[key]
}
}
// type transform cannot be a string
if (typeof options.type === 'string' && type !== options.type) {
throw new Error("Type transform cannot be a string (type '" + options.type + "' for token '" + type + "')")
}
// convert to array

@@ -143,20 +153,40 @@ var match = options.match

})
if (options.keywords) {
options.getType = keywordTransform(options.keywords)
}
return options
}
function toRules(spec) {
return Array.isArray(spec) ? arrayToRules(spec) : objectToRules(spec)
}
var defaultErrorRule = ruleOptions('error', {lineBreaks: true, shouldThrow: true})
function compileRules(rules, hasStates) {
rules = Array.isArray(rules) ? arrayToRules(rules) : objectToRules(rules)
var errorRule = null
var fast = Object.create(null)
var fastAllowed = true
var groups = []
var parts = []
for (var i=0; i<rules.length; i++) {
// If there is a fallback rule, then disable fast matching
for (var i = 0; i < rules.length; i++) {
if (rules[i].fallback) {
fastAllowed = false
}
}
for (var i = 0; i < rules.length; i++) {
var options = rules[i]
if (options.error) {
if (options.include) {
// all valid inclusions are removed by states() preprocessor
throw new Error('Inheritance is not allowed in stateless lexers')
}
if (options.error || options.fallback) {
// errorRule can only be set once
if (errorRule) {
throw new Error("Multiple error rules not allowed: (for token '" + options.tokenType + "')")
if (!options.fallback === !errorRule.fallback) {
throw new Error("Multiple " + (options.fallback ? "fallback" : "error") + " rules not allowed (for token '" + options.defaultType + "')")
} else {
throw new Error("fallback and error are mutually exclusive (for token '" + options.defaultType + "')")
}
}

@@ -166,10 +196,30 @@ errorRule = options

// skip rules with no match
if (options.match.length === 0) {
var match = options.match
if (fastAllowed) {
while (match.length && typeof match[0] === 'string' && match[0].length === 1) {
var word = match.shift()
fast[word.charCodeAt(0)] = options
}
}
// Warn about inappropriate state-switching options
if (options.pop || options.push || options.next) {
if (!hasStates) {
throw new Error("State-switching options are not allowed in stateless lexers (for token '" + options.defaultType + "')")
}
if (options.fallback) {
throw new Error("State-switching options are not allowed on fallback tokens (for token '" + options.defaultType + "')")
}
}
// Only rules with a .match are included in the RegExp
if (match.length === 0) {
continue
}
fastAllowed = false
groups.push(options)
// convert to RegExp
var pat = reUnion(options.match.map(regexpOrLiteral))
var pat = reUnion(match.map(regexpOrLiteral))

@@ -185,5 +235,2 @@ // validate

}
if (!hasStates && (options.pop || options.push || options.next)) {
throw new Error("State-switching options are not allowed in stateless lexers (for token '" + options.tokenType + "')")
}

@@ -199,36 +246,84 @@ // try and detect rules matching newlines

var suffix = hasSticky ? '' : '|(?:)'
var flags = hasSticky ? 'ym' : 'gm'
// If there's no fallback rule, use the sticky flag so we only look for
// matches at the current index.
//
// If we don't support the sticky flag, then fake it using an irrefutable
// match (i.e. an empty pattern).
var fallbackRule = errorRule && errorRule.fallback
var flags = hasSticky && !fallbackRule ? 'ym' : 'gm'
var suffix = hasSticky || fallbackRule ? '' : '|'
var combined = new RegExp(reUnion(parts) + suffix, flags)
return {regexp: combined, groups: groups, error: errorRule}
return {regexp: combined, groups: groups, fast: fast, error: errorRule || defaultErrorRule}
}
function compile(rules) {
var result = compileRules(rules)
var result = compileRules(toRules(rules))
return new Lexer({start: result}, 'start')
}
function checkStateGroup(g, name, map) {
var state = g && (g.push || g.next)
if (state && !map[state]) {
throw new Error("Missing state '" + state + "' (in token '" + g.defaultType + "' of state '" + name + "')")
}
if (g && g.pop && +g.pop !== 1) {
throw new Error("pop must be 1 (in token '" + g.defaultType + "' of state '" + name + "')")
}
}
function compileStates(states, start) {
var all = states.$all ? toRules(states.$all) : []
delete states.$all
var keys = Object.getOwnPropertyNames(states)
if (!start) start = keys[0]
var ruleMap = Object.create(null)
for (var i = 0; i < keys.length; i++) {
var key = keys[i]
ruleMap[key] = toRules(states[key]).concat(all)
}
for (var i = 0; i < keys.length; i++) {
var key = keys[i]
var rules = ruleMap[key]
var included = Object.create(null)
for (var j = 0; j < rules.length; j++) {
var rule = rules[j]
if (!rule.include) continue
var splice = [j, 1]
if (rule.include !== key && !included[rule.include]) {
included[rule.include] = true
var newRules = ruleMap[rule.include]
if (!newRules) {
throw new Error("Cannot include nonexistent state '" + rule.include + "' (in state '" + key + "')")
}
for (var k = 0; k < newRules.length; k++) {
var newRule = newRules[k]
if (rules.indexOf(newRule) !== -1) continue
splice.push(newRule)
}
}
rules.splice.apply(rules, splice)
j--
}
}
var map = Object.create(null)
for (var i=0; i<keys.length; i++) {
for (var i = 0; i < keys.length; i++) {
var key = keys[i]
map[key] = compileRules(states[key], true)
map[key] = compileRules(ruleMap[key], true)
}
for (var i=0; i<keys.length; i++) {
var groups = map[keys[i]].groups
for (var j=0; j<groups.length; j++) {
var g = groups[j]
var state = g && (g.push || g.next)
if (state && !map[state]) {
throw new Error("Missing state '" + state + "' (in token '" + g.tokenType + "' of state '" + keys[i] + "')")
}
if (g && g.pop && +g.pop !== 1) {
throw new Error("pop must be 1 (in token '" + g.tokenType + "' of state '" + keys[i] + "')")
}
for (var i = 0; i < keys.length; i++) {
var name = keys[i]
var state = map[name]
var groups = state.groups
for (var j = 0; j < groups.length; j++) {
checkStateGroup(groups[j], name, map)
}
var fastKeys = Object.getOwnPropertyNames(state.fast)
for (var j = 0; j < fastKeys.length; j++) {
checkStateGroup(state.fast[fastKeys[j]], name, map)
}
}

@@ -243,3 +338,3 @@

var types = Object.getOwnPropertyNames(map)
for (var i=0; i<types.length; i++) {
for (var i = 0; i < types.length; i++) {
var tokenType = types[i]

@@ -261,3 +356,2 @@ var item = map[tokenType]

var source = ''
source += '(function(value) {\n'
source += 'switch (value.length) {\n'

@@ -275,4 +369,3 @@ for (var length in byLength) {

source += '}\n'
source += '})'
return eval(source) // getType
return Function('value', source) // type
}

@@ -295,3 +388,6 @@

this.col = info ? info.col : 1
this.queuedToken = info ? info.queuedToken : null
this.queuedThrow = info ? info.queuedThrow : null
this.setState(info ? info.state : this.startState)
this.stack = info && info.stack ? info.stack.slice() : []
return this

@@ -305,2 +401,5 @@ }

state: this.state,
stack: this.stack.slice(),
queuedToken: this.queuedToken,
queuedThrow: this.queuedThrow,
}

@@ -314,4 +413,5 @@ }

this.groups = info.groups
this.error = info.error || {lineBreaks: true, shouldThrow: true}
this.error = info.error
this.re = info.regexp
this.fast = info.fast
}

@@ -328,6 +428,6 @@

Lexer.prototype._eat = hasSticky ? function(re) { // assume re is /y
return re.exec(this.buffer)
} : function(re) { // assume re is /g
var match = re.exec(this.buffer)
var eat = hasSticky ? function(re, buffer) { // assume re is /y
return re.exec(buffer)
} : function(re, buffer) { // assume re is /g
var match = re.exec(buffer)
// will always match, since we used the |(?:) trick

@@ -341,13 +441,9 @@ if (match[0].length === 0) {

Lexer.prototype._getGroup = function(match) {
if (match === null) {
return -1
}
var groupCount = this.groups.length
for (var i = 0; i < groupCount; i++) {
if (match[i + 1] !== undefined) {
return i
return this.groups[i]
}
}
throw new Error('oops')
throw new Error('Cannot find token type for matched text')
}

@@ -360,6 +456,13 @@

Lexer.prototype.next = function() {
var re = this.re
var index = this.index
// If a fallback token matched, we don't need to re-run the RegExp
if (this.queuedGroup) {
var token = this._token(this.queuedGroup, this.queuedText, index)
this.queuedGroup = null
this.queuedText = ""
return token
}
var buffer = this.buffer
var index = re.lastIndex = this.index
if (index === buffer.length) {

@@ -369,17 +472,34 @@ return // EOF

var match = this._eat(re)
var i = this._getGroup(match)
// Fast matching for single characters
var group = this.fast[buffer.charCodeAt(index)]
if (group) {
return this._token(group, buffer.charAt(index), index)
}
var group, text
if (i === -1) {
group = this.error
// Execute RegExp
var re = this.re
re.lastIndex = index
var match = eat(re, buffer)
// consume rest of buffer
text = buffer.slice(index)
// Error tokens match the remaining buffer
var error = this.error
if (match == null) {
return this._token(error, buffer.slice(index, buffer.length), index)
}
} else {
text = match[0]
group = this.groups[i]
var group = this._getGroup(match)
var text = match[0]
if (error.fallback && match.index !== index) {
this.queuedGroup = group
this.queuedText = text
// Fallback tokens contain the unmatched portion of the buffer
return this._token(error, buffer.slice(index, match.index), index)
}
return this._token(group, text, index)
}
Lexer.prototype._token = function(group, text, offset) {
// count line breaks

@@ -398,7 +518,7 @@ var lineBreaks = 0

var token = {
type: (group.getType && group.getType(text)) || group.tokenType,
value: group.value ? group.value(text) : text,
type: (typeof group.type === 'function' && group.type(text)) || group.defaultType,
value: typeof group.value === 'function' ? group.value(text) : text,
text: text,
toString: tokenToString,
offset: index,
offset: offset,
lineBreaks: lineBreaks,

@@ -418,2 +538,3 @@ line: this.line,

}
// throw, if no rule with {error: true}

@@ -427,2 +548,3 @@ if (group.shouldThrow) {

else if (group.next) this.setState(group.next)
return token

@@ -451,3 +573,3 @@ }

Lexer.prototype.formatError = function(token, message) {
var value = token.value
var value = token.text
var index = token.offset

@@ -468,13 +590,3 @@ var eol = token.lineBreaks ? value.indexOf('\n') : value.length

Lexer.prototype.has = function(tokenType) {
for (var s in this.states) {
var groups = this.states[s].groups
for (var i=0; i<groups.length; i++) {
var group = groups[i]
if (group.tokenType === tokenType) return true
if (group.keywords && hasOwnProperty.call(group.keywords, tokenType)) {
return true
}
}
}
return false
return true
}

@@ -487,4 +599,6 @@

error: Object.freeze({error: true}),
fallback: Object.freeze({fallback: true}),
keywords: keywordTransform,
}
}))
}));
{
"name": "moo",
"version": "0.4.3",
"version": "0.5.0",
"description": "Optimised tokenizer/lexer generator! πŸ„ Much performance. Moo!",

@@ -17,3 +17,2 @@ "main": "moo.js",

"test": "jest .",
"lint": "eslint moo.js",
"benchmark": "benchr test/benchmark.js",

@@ -25,4 +24,3 @@ "moo": "echo 'Mooooo!'"

"chevrotain": "^0.27.1",
"eslint": "^3.17.1",
"jest": "^19.0.2",
"jest": "^23.6.0",
"lex": "^1.7.9",

@@ -29,0 +27,0 @@ "lexing": "^0.8.0",

@@ -86,5 +86,5 @@ ![](cow.png)

```
Better:
```js

@@ -140,3 +140,4 @@ let lexer = moo.compile({

* **`type`**: the name of the group, as passed to compile.
* **`value`**: the match contents.
* **`text`**: the string that was matched.
* **`value`**: the string that was matched, transformed by your `value` function (if any).
* **`offset`**: the number of bytes from the start of the buffer where the match starts.

@@ -148,2 +149,19 @@ * **`lineBreaks`**: the number of line breaks found in the match. (Always zero if this rule has `lineBreaks: false`.)

### Value vs. Text ###
The `value` is the same as the `text`, unless you provide a [value transform](#transform).
```js
const moo = require('moo')
const lexer = moo.compile({
ws: /[ \t]+/,
string: {match: /"(?:\\["\\]|[^\n"\\])*"/, value: s => s.slice(1, -1)},
})
lexer.reset('"test"')
lexer.next() /* { value: 'test', text: '"test"', ... } */
```
### Reset ###

@@ -181,9 +199,9 @@

**Keywords** should be written using the `keywords` attribute.
**Keywords** should be written using the `keywords` transform.
```js
moo.compile({
IDEN: {match: /[a-zA-Z]+/, keywords: {
KW: ['while', 'if', 'else', 'moo', 'cows']),
}},
IDEN: {match: /[a-zA-Z]+/, type: moo.keywords({
KW: ['while', 'if', 'else', 'moo', 'cows'],
})},
SPACE: {match: /\s+/, lineBreaks: true},

@@ -216,7 +234,7 @@ })

let lexer = moo.compile({
name: {match: /[a-zA-Z]+/, keywords: {
name: {match: /[a-zA-Z]+/, type: moo.keywords({
'kw-class': 'class',
'kw-def': 'def',
'kw-if': 'if',
}},
})},
// ...

@@ -242,4 +260,14 @@ })

Sometimes you want your lexer to support different states. This is useful for string interpolation, for example: to tokenize `a${{c: d}}e`, you might use:
Moo allows you to define multiple lexer **states**. Each state defines its own separate set of token rules. Your lexer will start off in the first state given to `moo.states({})`.
Rules can be annotated with `next`, `push`, and `pop`, to change the current state after that token is matched. A "stack" of past states is kept, which is used by `push` and `pop`.
* **`next: 'bar'`** moves to the state named `bar`. (The stack is not changed.)
* **`push: 'bar'`** moves to the state named `bar`, and pushes the old state onto the stack.
* **`pop: 1`** removes one state from the top of the stack, and moves to that state. (Only `1` is supported.)
Only rules from the current state can be matched. You need to copy your rule into all the states you want it to be matched in.
For example, to tokenize JS-style string interpolation such as `a${{c: d}}e`, you might use:
```js

@@ -266,17 +294,11 @@ let lexer = moo.states({

It's also nice to let states inherit rules from other states and be able to count things, e.g. the interpolated expression state needs a `}` rule that can tell if it's a closing brace or the end of the interpolation, but is otherwise identical to the normal expression state.
The `rbrace` rule is annotated with `pop`, so it moves from the `main` state into either `lit` or `main`, depending on the stack.
To support this, Moo allows annotating tokens with `push`, `pop` and `next`:
* **`push`** moves the lexer to a new state, and pushes the old state onto the stack.
* **`pop`** returns to a previous state, by removing one or more states from the stack.
* **`next`** moves to a new state, but does not affect the stack.
Errors
------
If no token matches, Moo will throw an Error.
If none of your rules match, Moo will throw an Error; since it doesn't know what else to do.
If you'd rather treat errors as just another kind of token, you can ask Moo to do so.
If you prefer, you can have moo return an error token instead of throwing an exception. The error token will contain the whole of the rest of the buffer.

@@ -288,8 +310,8 @@ ```js

})
moo.reset('invalid')
moo.next() // -> { type: 'myError', value: 'invalid' }
moo.next() // -> { type: 'myError', value: 'invalid', text: 'invalid', offset: 0, lineBreaks: 0, line: 1, col: 1 }
moo.next() // -> undefined
```
You can have a token type that both matches tokens _and_ contains error values.

@@ -304,2 +326,4 @@

### Formatting errors ###
If you want to throw an error from your parser, you might find `formatError` helpful. Call it with the offending token:

@@ -311,3 +335,3 @@

And it returns a string with a pretty error message.
It returns a string with a pretty error message.

@@ -314,0 +338,0 @@ ```

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚑️ by Socket Inc