Comparing version 0.2.8 to 0.3.0
@@ -6,4 +6,4 @@ exports.error = function(options) { | ||
var Error = function(options) { | ||
this._expected = options.expected; | ||
this._actual = options.actual; | ||
this.expected = options.expected; | ||
this.actual = options.actual; | ||
this._location = options.location; | ||
@@ -14,3 +14,11 @@ }; | ||
var locationDescription = this._location ? this._location.describe() + ":\n" : ""; | ||
return locationDescription + "Expected " + this._expected + "\nbut got " + this._actual; | ||
return locationDescription + "Expected " + this.expected + "\nbut got " + this.actual; | ||
}; | ||
Error.prototype.lineNumber = function() { | ||
return this._location.lineNumber(); | ||
}; | ||
Error.prototype.characterNumber = function() { | ||
return this._location.characterNumber(); | ||
}; |
@@ -20,5 +20,5 @@ var Token = require("./Token"); | ||
while (index < input.length) { | ||
var nextToken = readNextToken(input, index, source); | ||
index += nextToken.value.length; | ||
tokens.push(nextToken); | ||
var result = readNextToken(input, index, source); | ||
index = result.endIndex; | ||
tokens.push(result.token); | ||
} | ||
@@ -35,16 +35,23 @@ | ||
var result = regex.exec(string); | ||
if (result && result.index === startIndex && result[0].length) { | ||
var value = result[0]; | ||
return new Token( | ||
rules[i].name, | ||
value, | ||
source.range(startIndex, startIndex + value.length) | ||
); | ||
if (result) { | ||
var endIndex = startIndex + result[0].length; | ||
if (result.index === startIndex && endIndex > startIndex) { | ||
var value = result[1]; | ||
var token = new Token( | ||
rules[i].name, | ||
value, | ||
source.range(startIndex, endIndex) | ||
); | ||
return {token: token, endIndex: endIndex}; | ||
} | ||
} | ||
} | ||
return new Token( | ||
var endIndex = startIndex + 1; | ||
var token = new Token( | ||
"unrecognisedCharacter", | ||
string.substring(startIndex, startIndex + 1), | ||
source.range(startIndex, startIndex + 1) | ||
string.substring(startIndex, endIndex), | ||
source.range(startIndex, endIndex) | ||
); | ||
return {token: token, endIndex: endIndex}; | ||
} | ||
@@ -51,0 +58,0 @@ |
@@ -28,2 +28,17 @@ var util = require("util"); | ||
StringSourceRange.prototype.describe = function() { | ||
var position = this._position(); | ||
var description = this._description ? this._description + "\n" : ""; | ||
return util.format("%sLine number: %s\nCharacter number: %s", | ||
description, position.lineNumber, position.characterNumber); | ||
}; | ||
StringSourceRange.prototype.lineNumber = function() { | ||
return this._position().lineNumber; | ||
}; | ||
StringSourceRange.prototype.characterNumber = function() { | ||
return this._position().characterNumber; | ||
}; | ||
StringSourceRange.prototype._position = function() { | ||
var self = this; | ||
@@ -41,4 +56,3 @@ var index = 0; | ||
var characterNumber = this._startIndex - index + 1; | ||
var description = this._description ? this._description + "\n" : ""; | ||
return util.format("%sLine number: %s\nCharacter number: %s", description, lineNumber, characterNumber); | ||
return {lineNumber: lineNumber, characterNumber: characterNumber}; | ||
}; |
{ | ||
"name": "lop", | ||
"version": "0.2.8", | ||
"version": "0.3.0", | ||
"author": "Michael Williamson <mike@zwobble.org>", | ||
@@ -5,0 +5,0 @@ "description": "Create parsers using parser combinators with helpful error messages", |
@@ -59,5 +59,54 @@ # lop -- parsing library for JavaScript | ||
When using a parser built with lop, the input is an array of tokens. A token can be any value so long as it has the property `source`, which must be a `StringSourceRange`. For instance, to create a simple tokeniser that generates a stream of words tokens separated by whitespace tokens: | ||
When using a parser built with lop, the input is an array of tokens. A token can be any value so long as it has the property `source`, which must be a `StringSourceRange`. | ||
### Regex tokeniser | ||
The easiest way to create a tokeniser is using lop's regex tokeniser. | ||
A regex tokeniser can be constructed by calling `new lop.RegexTokeniser(rules)`, | ||
where `rules` is a list of token rules. | ||
A token rule should have a `name` property that uniquely identifies that rule, | ||
and a `regex` property that is an instance of `RegExp` describing the token. | ||
Calling `tokenise` with a string will return a list of tokens. | ||
Each token has three properties: | ||
* `type` | ||
* `value` | ||
* `source` | ||
The tokeniser will apply the regex from each rule in order at the current position. | ||
The current position is initially zero, the start of the string. | ||
The first rule with a matching regex is used to produce a token, | ||
with the token's `value` being the first capture of the regex, | ||
or `undefined` if the regex does not define any capture groups. | ||
The current position is incremented to the index of the first character unmatched by the regex. | ||
If no rule matches at the current position, | ||
a single character `unrecognisedCharacter` token is produced, | ||
and the current position is incremented by one. | ||
For instance, to create a simple tokeniser that generates a stream of words tokens separated by whitespace tokens. | ||
```javascript | ||
var lop = require("lop"); | ||
var rules = [ | ||
{ | ||
name: "identifier", | ||
regex: /(\s+)/ | ||
}, | ||
{ | ||
name: "whitespace", | ||
regex: /(\S+)/ | ||
} | ||
]; | ||
var tokeniser = new lop.RegexTokeniser(rules); | ||
tokeniser.tokenise(input); | ||
``` | ||
### Custom tokenisers | ||
You can also create your own tokeniser. | ||
For instance, to create a simple tokeniser that generates a stream of words tokens separated by whitespace tokens: | ||
```javascript | ||
var StringSource = require("lop").StringSource; | ||
@@ -64,0 +113,0 @@ |
var errors = require("../lib/errors"); | ||
var StringSource = require("../lib/StringSource"); | ||
@@ -25,1 +26,12 @@ exports.errorDescriptionIncludesLocationAndActualValueAndExpectedValue = function(test) { | ||
}; | ||
exports.canGetPositionFromError = function(test) { | ||
var error = errors.error({ | ||
expected: "Nothing", | ||
actual: "Something", | ||
location: new StringSource("abc\ndef\nghi\n", "").range(6, 8) | ||
}); | ||
test.equal(2, error.lineNumber()); | ||
test.equal(3, error.characterNumber()); | ||
test.done(); | ||
}; |
@@ -50,2 +50,24 @@ var RegexTokeniser = require("../lib/regex-tokeniser").RegexTokeniser; | ||
exports.tokenValueIsFirstCaptureOfRegex = stringIsTokenisedTo('"a"', [ | ||
new Token("string", "a", stringSourceRange('"a"', 0, 3)), | ||
endToken('"a"') | ||
]); | ||
exports.tokenWithNoCaptureHasUndefinedValue = function(test) { | ||
var expectedTokens = [ | ||
new Token("bang", undefined, stringSourceRange("!", 0, 1)), | ||
endToken("!") | ||
]; | ||
var rules = [ | ||
{ | ||
name: "bang", | ||
regex: /!/ | ||
} | ||
]; | ||
var tokeniser = new RegexTokeniser(rules); | ||
test.deepEqual(expectedTokens, tokeniser.tokenise("!")); | ||
test.done(); | ||
}; | ||
function endToken(input) { | ||
@@ -75,11 +97,15 @@ var source = stringSourceRange(input, input.length, input.length); | ||
name: "dot", | ||
regex: /\./ | ||
regex: /(\.)/ | ||
}, | ||
{ | ||
name: "colon1", | ||
regex: /:/ | ||
regex: /(:)/ | ||
}, | ||
{ | ||
name: "colon2", | ||
regex: /:/ | ||
regex: /(:)/ | ||
}, | ||
{ | ||
name: "string", | ||
regex: /"([a-z]*)"/ | ||
} | ||
@@ -86,0 +112,0 @@ ]; |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
75549
1849
249