Socket
Socket
Sign inDemoInstall

moo

Package Overview
Dependencies
Maintainers
2
Versions
15
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

moo - npm Package Compare versions

Comparing version 0.3.3 to 0.4.0

LICENSE

253

moo.js

@@ -13,2 +13,4 @@ (function(root, factory) {

var hasOwnProperty = Object.prototype.hasOwnProperty
// polyfill assign(), so we support IE9+
var assign = typeof Object.assign === 'function' ? Object.assign :

@@ -37,5 +39,7 @@ // https://tc39.github.io/ecma262/#sec-object.assign

/***************************************************************************/
function isRegExp(o) { return o && o.constructor === RegExp }
function isObject(o) { return o && typeof o === 'object' && o.constructor !== RegExp && !Array.isArray(o) }
function reEscape(s) {

@@ -58,3 +62,2 @@ return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&')

function compareLength(a, b) {

@@ -86,3 +89,15 @@ return b.length - a.length

var key = keys[i]
result.push(ruleOptions(key, object[key]))
var thing = object[key]
var rules = Array.isArray(thing) ? thing : [thing]
var match = []
rules.forEach(function(rule) {
if (isObject(rule)) {
if (match.length) result.push(ruleOptions(key, match))
result.push(ruleOptions(key, rule))
match = []
} else {
match.push(rule)
}
})
if (match.length) result.push(ruleOptions(key, match))
}

@@ -117,4 +132,5 @@ return result

error: false,
value: null,
getType: null,
}, obj)
options.keywords = null

@@ -124,57 +140,15 @@ // convert to array

options.match = Array.isArray(match) ? match : match ? [match] : []
options.match.sort(function(a, b) {
return isRegExp(a) && isRegExp(b) ? 0
: isRegExp(b) ? -1 : isRegExp(a) ? +1 : b.length - a.length
})
if (options.keywords) {
options.getType = keywordTransform(options.keywords)
}
return options
}
function sortRules(rules) {
var result = []
for (var i=0; i<rules.length; i++) {
var options = rules[i]
var match = options.match
// sort literals by length to ensure longest match
var capturingPatterns = []
var patterns = []
var literals = []
for (var j=0; j<match.length; j++) {
var obj = match[j]
if (!isRegExp(obj)) literals.push(obj)
else if (reGroups(obj.source) > 0) capturingPatterns.push(obj)
else patterns.push(obj)
}
literals.sort(compareLength)
// append regexps to the end
options.match = literals.concat(patterns)
result.push(options)
// add each capturing regexp as a separate rule
for (var j=0; j<capturingPatterns.length; j++) {
result.push(assign({}, options, {
match: [capturingPatterns[j]],
}))
}
}
return result
}
function getIdentifier(literal, otherRules) {
for (var i=0; i<otherRules.length; i++) {
var rule = otherRules[i]
var match = rule.match
for (var j=0; j<match.length; j++) {
var pat = match[j]
if (!isRegExp(pat)) { continue }
var m = pat.exec(literal)
if (m && m[0] === literal) {
return rule
}
}
}
}
function compileRules(rules, hasStates) {
rules = Array.isArray(rules) ? arrayToRules(rules) : objectToRules(rules)
rules = sortRules(rules)
var errorRule = null

@@ -193,22 +167,2 @@ var groups = []

// look for keywords
var match = options.match
var notKeywords = []
for (var j=0; j<match.length; j++) {
var word = match[j]
if (typeof word === 'string') {
// does it match an existing rule (e.g. identifier?)
var other = getIdentifier(word, rules)
if (other) {
if (!other.keywords) {
other.keywords = Object.create(null)
}
other.keywords[word] = options
continue
}
}
notKeywords.push(word)
}
options.match = notKeywords
// skip rules with no match

@@ -229,4 +183,4 @@ if (options.match.length === 0) {

var groupCount = reGroups(pat)
if (groupCount > 1) {
throw new Error("RegExp has more than one capture group: " + regexp)
if (groupCount > 0) {
throw new Error("RegExp has capture groups: " + regexp + "\nUse (?: … ) instead")
}

@@ -243,5 +197,3 @@ if (!hasStates && (options.pop || options.push || options.next)) {

// store regex
var isCapture = !!groupCount
if (!isCapture) pat = reCapture(pat)
parts.push(pat)
parts.push(reCapture(pat))
}

@@ -274,3 +226,3 @@

for (var j=0; j<groups.length; j++) {
var g = groups[i]
var g = groups[j]
var state = g && (g.push || g.next)

@@ -280,2 +232,6 @@ if (state && !map[state]) {

}
var pop = g && g.pop
if (pop && typeof pop !== 'boolean') {
throw new Error("pop must be true (in token '" + g.tokenType + "' of state '" + keys[i] + "')")
}
}

@@ -287,3 +243,42 @@ }

function keywordTransform(map) {
var reverseMap = Object.create(null)
var byLength = Object.create(null)
var types = Object.getOwnPropertyNames(map)
for (var i=0; i<types.length; i++) {
var tokenType = types[i]
var item = map[tokenType]
var keywordList = Array.isArray(item) ? item : [item]
keywordList.forEach(function(keyword) {
(byLength[keyword.length] = byLength[keyword.length] || []).push(keyword)
reverseMap[keyword] = tokenType
})
}
// fast string lookup
// https://jsperf.com/string-lookups
function str(x) { return JSON.stringify(x) }
var source = ''
source += '(function(value) {\n'
source += 'switch (value.length) {\n'
for (var length in byLength) {
var keywords = byLength[length]
source += 'case ' + length + ':\n'
source += 'switch (value) {\n'
keywords.forEach(function(keyword) {
var tokenType = reverseMap[keyword]
if (typeof tokenType !== 'string') {
throw new Error('keyword type must be string: ' + name)
}
source += 'case ' + str(keyword) + ': return ' + str(tokenType) + '\n'
})
source += '}\n'
}
source += '}\n'
source += '})'
return eval(source) // getType
}
/***************************************************************************/
var Lexer = function(states, state) {

@@ -297,2 +292,19 @@ this.startState = state

Lexer.prototype.reset = function(data, info) {
this.buffer = data || ''
this.index = 0
this.line = info ? info.line : 1
this.col = info ? info.col : 1
this.setState(info ? info.state : this.startState)
return this
}
Lexer.prototype.save = function() {
return {
line: this.line,
col: this.col,
state: this.state,
}
}
Lexer.prototype.setState = function(state) {

@@ -303,3 +315,3 @@ if (!state || this.state === state) return

this.groups = info.groups
this.error = info.error
this.error = info.error || {lineBreaks: true, shouldThrow: true}
this.re = info.regexp

@@ -317,3 +329,3 @@ }

Lexer.prototype.eat = hasSticky ? function(re) { // assume re is /y
Lexer.prototype._eat = hasSticky ? function(re) { // assume re is /y
return re.exec(this.buffer)

@@ -329,4 +341,18 @@ } : function(re) { // assume re is /g

Lexer.prototype._getGroup = function(match) {
if (match === null) {
return -1
}
var groupCount = this.groups.length
for (var i = 0; i < groupCount; i++) {
if (match[i + 1] !== undefined) {
return i
}
}
throw new Error('oops')
}
function tokenToString() {
return this.value || this.type
return this.value
}

@@ -343,27 +369,15 @@

var match = this.eat(re)
var group, value, text
if (match === null) {
var match = this._eat(re)
var i = this._getGroup(match)
var group, value
if (i === -1) {
group = this.error
// consume rest of buffer
text = value = buffer.slice(index)
value = buffer.slice(index)
} else {
text = match[0]
var groups = this.groups
for (var i = 0; i < groups.length; i++) {
value = match[i + 1]
if (value !== undefined) {
group = groups[i]
// TODO is `buffer` being leaked here?
break
}
}
// assert(i < groupCount)
// check for keywords
if (group.keywords) {
group = group.keywords[text] || group
}
value = match[0] // i+1
group = this.groups[i]
}

@@ -373,16 +387,16 @@

var lineBreaks = 0
if (!group || group.lineBreaks) {
if (group.lineBreaks) {
var matchNL = /\n/g
var nl = 1
if (text === '\n') {
if (value === '\n') {
lineBreaks = 1
} else {
while (matchNL.exec(text)) { lineBreaks++; nl = matchNL.lastIndex }
while (matchNL.exec(value)) { lineBreaks++; nl = matchNL.lastIndex }
}
}
var size = text.length
var size = value.length
var token = {
type: group && group.tokenType,
value: value,
type: (group.getType && group.getType(value)) || group.tokenType,
value: (group.value && group.value(value)) || value,
toString: tokenToString,

@@ -404,3 +418,3 @@ offset: index,

// throw, if no rule with {error: true}
if (!group) {
if (group.shouldThrow) {
throw new Error(this.formatError(token, "invalid syntax"))

@@ -425,2 +439,6 @@ }

LexerIterator.prototype[Symbol.iterator] = function() {
return this
}
Lexer.prototype[Symbol.iterator] = function() {

@@ -443,19 +461,2 @@ return new LexerIterator(this)

Lexer.prototype.reset = function(data, info) {
this.buffer = data || ''
this.index = 0
this.line = info ? info.line : 1
this.col = info ? info.col : 1
this.setState(info ? info.state : this.startState)
return this
}
Lexer.prototype.save = function() {
return {
line: this.line,
col: this.col,
state: this.state,
}
}
Lexer.prototype.clone = function() {

@@ -462,0 +463,0 @@ return new Lexer(this.states, this.state)

{
"name": "moo",
"version": "0.3.3",
"version": "0.4.0",
"description": "Optimised tokenizer/lexer generator! πŸ„ Uses /y for performance. Moo!",

@@ -5,0 +5,0 @@ "main": "moo.js",

@@ -47,4 +47,4 @@ ![](cow.png)

comment: /\/\/.*?$/,
number: /(0|[1-9][0-9]*)/,
string: /"((?:\\["\\]|[^\n"\\])*)"/,
number: /0|[1-9][0-9]*/,
string: /"(?:\\["\\]|[^\n"\\])*"/,
lparen: '(',

@@ -80,3 +80,3 @@ rparen: ')',

let lexer = moo.compile({
string: /"(.*)"/, // greedy quantifier *
string: /".*"/, // greedy quantifier *
// ...

@@ -93,3 +93,3 @@ })

let lexer = moo.compile({
string: /"(.*?)"/, // non-greedy quantifier *?
string: /".*?"/, // non-greedy quantifier *?
// ...

@@ -118,7 +118,5 @@ })

(Note: moo [special-cases keywords](#keywords); in which case order is ignored.)
* Moo uses **multiline RegExps**. This has a few quirks: for example, the **dot `/./` doesn't include newlines**. Use `[^]` instead if you want to match newlines too.
* Since excluding capture groups like `/[^ ]/` (no spaces) _will_ include newlines, you have to be careful not to include them by accident! In particular, the whitespace metacharacter `\s` includes newlines.
* Since an excluding character ranges like `/[^ ]/` (which matches anything but a space) _will_ include newlines, you have to be careful not to include them by accident! In particular, the whitespace metacharacter `\s` includes newlines.

@@ -136,4 +134,3 @@

* **`type`**: the name of the group, as passed to compile.
* **`value`**: the contents of the capturing group (or the whole match, if the token RegExp doesn't define a capture).
* **`size`**: the total length of the match (`value` may be shorter if you have capturing groups).
* **`value`**: the match contents.
* **`offset`**: the number of bytes from the start of the buffer where the match starts.

@@ -165,3 +162,3 @@ * **`lineBreaks`**: the number of line breaks found in the match. (Always zero if this rule has `lineBreaks: false`.)

Moo makes it convenient to define literals and keywords.
Moo makes it convenient to define literals.

@@ -178,26 +175,50 @@ ```js

Important! **Always write your literals like this:**
**Keywords** should be written using the `keywords` attribute.
```js
['while', 'if', 'else', 'moo', 'cows']
moo.compile({
IDEN: {match: /[a-zA-Z]+/, keywords: {
KW: ['while', 'if', 'else', 'moo', 'cows']),
}},
SPACE: {match: /\s+/, lineBreaks: true},
})
```
And **not** like this:
You need to do this to ensure the **longest match** principle applies, even in edge cases.
Imagine trying to parse the input `className` with the following rules:
```js
/while|if|else|moo|cows/
['keyword', ['class']],
['identifier', /[a-zA-Z]+/],
```
### Why? ###
You'll get _two_ tokens β€” `['class', 'Name']` -- which is _not_ what you want! If you swap the order of the rules, you'll fix this example; but now you'll lex `class` wrong (as an `identifier`).
The reason: Moo special-cases keywords to ensure the **longest match** principle applies, even in edge cases.
The keywords helper checks matches against the list of keywords; if any of them match, it uses the type `'keyword'` instead of `'identifier'` (for this example).
Imagine trying to parse the input `className` with the following rules:
Keywords can also have **individual types**.
['keyword', ['class']],
['identifier', /[a-zA-Z]+/],
```js
let lexer = moo.compile({
name: {match: /[a-zA-Z]+/, keywords: {
'kw-class': 'class',
'kw-def': 'def',
'kw-if': 'if',
}},
// ...
})
lexer.reset('def foo')
lexer.next() // -> { type: 'kw-def', value: 'def' }
lexer.next() // space
lexer.next() // -> { type: 'name', value: 'foo' }
```
You'll get _two_ tokens β€” `['class', 'Name']` -- which is _not_ what you want! If you swap the order of the rules, you'll fix this example; but now you'll lex `class` wrong (as an `identifier`).
Use [itt](https://github.com/nathan/itt)'s iterator adapters to make constructing keyword objects easier:
Moo solves this by checking to see if any of your literals can be matched by one of your other rules; if so, it doesn't lex the keyword separately, but instead handles it at a later stage (by checking identifiers against a list of keywords).
```js
itt(['class', 'def', 'if'])
.map(k => ['kw-' + k, k])
.toObject()
```

@@ -216,3 +237,3 @@

lbrace: {match: '{', push: 'main'},
rbrace: {match: '}', pop: 1},
rbrace: {match: '}', pop: true},
colon: ':',

@@ -224,3 +245,3 @@ space: {match: /\s+/, lineBreaks: true},

escape: /\\./,
strend: {match: '`', pop: 1},
strend: {match: '`', pop: true},
const: {match: /(?:[^$`]|\$(?!\{))+/, lineBreaks: true},

@@ -311,2 +332,19 @@ },

Transform
---------
Moo doesn't allow capturing groups, but you can supply a transform function, `value()`, which will be called on the value before storing it in the Token object.
```js
moo.compile({
STRING: [
{match: /"""[^]*?"""/, lineBreaks: true, value: x => x.slice(3, -3)},
{match: /"(?:\\["\\rn]|[^"\\])*?"/, lineBreaks: true, value: x => x.slice(1, -1)},
{match: /'(?:\\['\\rn]|[^'\\])*?'/, lineBreaks: true, value: x => x.slice(1, -1)},
],
// ...
})
```
Contributing

@@ -313,0 +351,0 @@ ------------

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚑️ by Socket Inc