assemblyscript-regex
Advanced tools
Comparing version 1.5.0 to 1.6.0
@@ -48,1 +48,7 @@ import { expectMatch, expectNotMatch, exec } from "./utils"; | ||
}); | ||
it("non-capturing groups should not capture", () => { | ||
const match = exec("(?:foo)bar(baz)", "foobarbaz"); | ||
expect(match.matches[0]).toBe("foobarbaz"); | ||
expect(match.matches[1]).toBe("baz"); | ||
}); |
@@ -1,55 +0,54 @@ | ||
import { RegExp } from ".."; | ||
import { expectMatch, expectNotMatch, exec } from "./utils"; | ||
import { expectMatch, expectNotMatch } from "./utils"; | ||
it("dot", () => { | ||
expectMatch(".", [" ", "B", "|", "9"]); | ||
expectNotMatch(".", ["", "\n"]); | ||
it("throws an error if no closing bracket is found", () => { | ||
// expect(() => new RegExp("[abce")).toThrow(); | ||
}); | ||
it("digit", () => { | ||
expectMatch("\\d", ["0", "9"]); | ||
expectNotMatch("\\d", ["", "b"]); | ||
it("matches discrete characters", () => { | ||
expectMatch("[abce]", ["a", "b", "c", "e"]); | ||
expectNotMatch("[abce]", ["", "f", "h"]); | ||
}); | ||
it("non-digit", () => { | ||
expectNotMatch("\\D", ["0", "9", ""]); | ||
expectMatch("\\D", ["b", "|"]); | ||
it("matches character ranges", () => { | ||
expectMatch("[a-c]", ["a", "b", "c"]); | ||
expectNotMatch("[a-c]", ["d", "e", ""]); | ||
expectMatch("[K-M]", ["K", "L", "M"]); | ||
expectNotMatch("[K-M]", ["9", "J"]); | ||
expectMatch("[0-9]", ["0", "9"]); | ||
expectNotMatch("[0-9]", ["a", "A"]); | ||
}); | ||
it("word", () => { | ||
expectMatch("\\w", ["A", "a", "Z", "z", "0", "9", "_"]); | ||
expectNotMatch("\\w", ["", "$"]); | ||
it("matches multiple ranges", () => { | ||
expectMatch("[a-ce-f]", ["a", "b", "c", "e", "f"]); | ||
expectNotMatch("[a-ce-f]", ["d"]); | ||
}); | ||
it("not word", () => { | ||
expectNotMatch("\\W", ["A", "a", "Z", "z", "0", "9", "_", ""]); | ||
expectMatch("\\W", ["&", "$"]); | ||
it("supports closing brackets", () => { | ||
expectMatch("[]a]", ["]", "a"]); | ||
}); | ||
it("whitespace", () => { | ||
expectMatch("\\s", ["\f", "\n", "\r", "\t", "\v"]); | ||
expectNotMatch("\\s", ["", "a", "0"]); | ||
it("supports negated sets", () => { | ||
expectNotMatch("[^a-c]", ["a", "b", "c"]); | ||
expectMatch("[^a-c]", ["d", "e"]); | ||
expectNotMatch("[^a-ce-f]", ["a", "b", "c", "e", "f"]); | ||
expectMatch("[^a-ce-f]", ["d"]); | ||
}); | ||
it("not whitespace", () => { | ||
expectNotMatch("\\S", ["", "\f", "\n", "\r", "\t", "\v"]); | ||
expectMatch("\\S", ["a", "0"]); | ||
it("treats - as a literal", () => { | ||
expectMatch("[-abc]", ["-", "a", "b", "c"]); | ||
expectMatch("[abc-]", ["-", "a", "b", "c"]); | ||
}); | ||
it("tab, cr, lf, vt, ff", () => { | ||
expectMatch("\\t", ["\t"]); | ||
expectMatch("\\r", ["\r"]); | ||
expectMatch("\\n", ["\n"]); | ||
expectMatch("\\v", ["\v"]); | ||
expectMatch("\\f", ["\f"]); | ||
expectNotMatch("\\t", ["a", " ", ""]); | ||
it("treats - as a literal in negated sets", () => { | ||
expectNotMatch("[^-abc]", ["-", "a", "b", "c"]); | ||
expectMatch("[^-abc]", ["1", "A"]); | ||
}); | ||
it("escaped dot", () => { | ||
expectMatch("\\.", ["."]); | ||
expectNotMatch("\\.", ["", "a"]); | ||
it("supports case insensitive matching", () => { | ||
// simple ranges | ||
expectMatch("[a-c]", ["A", "C", "a", "c"], "i"); | ||
expectNotMatch("[a-c]", ["D", "d"], "i"); | ||
// complex | ||
expectMatch("[W-c]", ["W", "w", "C", "c"], "i"); | ||
expectNotMatch("[W-c]", ["V", "v", "D", "d"], "i"); | ||
}); | ||
it("unrecognised character classes are treated as characters", () => { | ||
expectMatch("\\g\\m", ["gm"]); | ||
}); |
@@ -1,54 +0,55 @@ | ||
import { expectMatch, expectNotMatch } from "./utils"; | ||
import { RegExp } from ".."; | ||
import { expectMatch, expectNotMatch, exec } from "./utils"; | ||
it("throws an error if no closing bracket is found", () => { | ||
// expect(() => new RegExp("[abce")).toThrow(); | ||
it("dot", () => { | ||
expectMatch(".", [" ", "B", "|", "9"]); | ||
expectNotMatch(".", ["", "\n"]); | ||
}); | ||
it("matches discrete characters", () => { | ||
expectMatch("[abce]", ["a", "b", "c", "e"]); | ||
expectNotMatch("[abce]", ["", "f", "h"]); | ||
it("digit", () => { | ||
expectMatch("\\d", ["0", "9"]); | ||
expectNotMatch("\\d", ["", "b"]); | ||
}); | ||
it("matches character ranges", () => { | ||
expectMatch("[a-c]", ["a", "b", "c"]); | ||
expectNotMatch("[a-c]", ["d", "e", ""]); | ||
expectMatch("[K-M]", ["K", "L", "M"]); | ||
expectNotMatch("[K-M]", ["9", "J"]); | ||
expectMatch("[0-9]", ["0", "9"]); | ||
expectNotMatch("[0-9]", ["a", "A"]); | ||
it("non-digit", () => { | ||
expectNotMatch("\\D", ["0", "9", ""]); | ||
expectMatch("\\D", ["b", "|"]); | ||
}); | ||
it("matches multiple ranges", () => { | ||
expectMatch("[a-ce-f]", ["a", "b", "c", "e", "f"]); | ||
expectNotMatch("[a-ce-f]", ["d"]); | ||
it("word", () => { | ||
expectMatch("\\w", ["A", "a", "Z", "z", "0", "9", "_"]); | ||
expectNotMatch("\\w", ["", "$"]); | ||
}); | ||
it("supports closing brackets", () => { | ||
expectMatch("[]a]", ["]", "a"]); | ||
it("not word", () => { | ||
expectNotMatch("\\W", ["A", "a", "Z", "z", "0", "9", "_", ""]); | ||
expectMatch("\\W", ["&", "$"]); | ||
}); | ||
it("supports negated sets", () => { | ||
expectNotMatch("[^a-c]", ["a", "b", "c"]); | ||
expectMatch("[^a-c]", ["d", "e"]); | ||
expectNotMatch("[^a-ce-f]", ["a", "b", "c", "e", "f"]); | ||
expectMatch("[^a-ce-f]", ["d"]); | ||
it("whitespace", () => { | ||
expectMatch("\\s", ["\f", "\n", "\r", "\t", "\v"]); | ||
expectNotMatch("\\s", ["", "a", "0"]); | ||
}); | ||
it("treats - as a literal", () => { | ||
expectMatch("[-abc]", ["-", "a", "b", "c"]); | ||
expectMatch("[abc-]", ["-", "a", "b", "c"]); | ||
it("not whitespace", () => { | ||
expectNotMatch("\\S", ["", "\f", "\n", "\r", "\t", "\v"]); | ||
expectMatch("\\S", ["a", "0"]); | ||
}); | ||
it("treats - as a literal in negated sets", () => { | ||
expectNotMatch("[^-abc]", ["-", "a", "b", "c"]); | ||
expectMatch("[^-abc]", ["1", "A"]); | ||
it("tab, cr, lf, vt, ff", () => { | ||
expectMatch("\\t", ["\t"]); | ||
expectMatch("\\r", ["\r"]); | ||
expectMatch("\\n", ["\n"]); | ||
expectMatch("\\v", ["\v"]); | ||
expectMatch("\\f", ["\f"]); | ||
expectNotMatch("\\t", ["a", " ", ""]); | ||
}); | ||
it("supports case insensitive matching", () => { | ||
// simple ranges | ||
expectMatch("[a-c]", ["A", "C", "a", "c"], "i"); | ||
expectNotMatch("[a-c]", ["D", "d"], "i"); | ||
// complex | ||
expectMatch("[W-c]", ["W", "w", "C", "c"], "i"); | ||
expectNotMatch("[W-c]", ["V", "v", "D", "d"], "i"); | ||
it("escaped dot", () => { | ||
expectMatch("\\.", ["."]); | ||
expectNotMatch("\\.", ["", "a"]); | ||
}); | ||
it("unrecognised character classes are treated as characters", () => { | ||
expectMatch("\\g\\m", ["gm"]); | ||
}); |
@@ -19,2 +19,3 @@ export const enum Char { | ||
Nine = 0x39, | ||
Colon = 0x3a, | ||
Question = 0x3f, // "?" | ||
@@ -21,0 +22,0 @@ A = 0x41, |
@@ -5,4 +5,4 @@ import { isDigit, isAlpha, isWhitespace, Char } from "../char"; | ||
CharacterNode, | ||
CharacterClassNode, | ||
CharacterSetNode, | ||
CharacterClassNode, | ||
CharacterRangeNode, | ||
@@ -17,4 +17,4 @@ NodeType, | ||
CharacterRange, | ||
CharacterSet, | ||
CharacterClass, | ||
CharacterSet, | ||
} | ||
@@ -32,6 +32,6 @@ | ||
static fromCharacterClassNode( | ||
node: CharacterClassNode, | ||
node: CharacterSetNode, | ||
flags: Flags | ||
): CharacterClassMatcher { | ||
return new CharacterClassMatcher(node.charClass, flags.dotAll); | ||
): CharacterSetMatcher { | ||
return new CharacterSetMatcher(node.charClass, flags.dotAll); | ||
} | ||
@@ -50,5 +50,5 @@ | ||
static fromCharacterSetNode( | ||
node: CharacterSetNode, | ||
node: CharacterClassNode, | ||
flags: Flags | ||
): CharacterSetMatcher { | ||
): CharacterClassMatcher { | ||
_flags = flags; | ||
@@ -64,5 +64,5 @@ const matchers = node.expressions.map<Matcher>((exp) => { | ||
return Matcher.fromCharacterNode(exp as CharacterNode, _flags); | ||
case NodeType.CharacterClass: | ||
case NodeType.CharacterSet: | ||
return Matcher.fromCharacterClassNode( | ||
exp as CharacterClassNode, | ||
exp as CharacterSetNode, | ||
_flags | ||
@@ -74,3 +74,3 @@ ); | ||
}); | ||
return new CharacterSetMatcher(matchers, node.negated); | ||
return new CharacterClassMatcher(matchers, node.negated); | ||
} | ||
@@ -135,5 +135,5 @@ | ||
export class CharacterClassMatcher extends Matcher { | ||
export class CharacterSetMatcher extends Matcher { | ||
constructor(public charClass: Char, private dotAll: bool) { | ||
super(MatcherType.CharacterClass); | ||
super(MatcherType.CharacterSet); | ||
} | ||
@@ -181,5 +181,5 @@ | ||
export class CharacterSetMatcher extends Matcher { | ||
export class CharacterClassMatcher extends Matcher { | ||
constructor(public matchers: Matcher[], public negated: bool) { | ||
super(MatcherType.CharacterSet); | ||
super(MatcherType.CharacterClass); | ||
} | ||
@@ -200,9 +200,9 @@ | ||
case MatcherType.CharacterSet: | ||
match = (matcher as CharacterSetMatcher).matches(code); | ||
break; | ||
case MatcherType.CharacterClass: | ||
match = (matcher as CharacterClassMatcher).matches(code); | ||
break; | ||
case MatcherType.CharacterSet: | ||
match = (matcher as CharacterSetMatcher).matches(code); | ||
break; | ||
} | ||
@@ -209,0 +209,0 @@ if (match) break; |
@@ -8,4 +8,4 @@ import { | ||
AlternationNode, | ||
CharacterClassNode, | ||
CharacterSetNode, | ||
CharacterClassNode, | ||
GroupNode, | ||
@@ -46,3 +46,3 @@ NodeType, | ||
constructor(next: State, public groupId: i32) { | ||
constructor(next: State, public capturing: bool, public groupId: i32) { | ||
super(); | ||
@@ -65,6 +65,8 @@ this.transitions.push(next); | ||
matches(input: string, position: u32): MatchResult { | ||
this.startMarker.capture = input.substring( | ||
this.startMarker.location, | ||
position | ||
); | ||
if (this.startMarker.capturing) { | ||
this.startMarker.capture = input.substring( | ||
this.startMarker.location, | ||
position | ||
); | ||
} | ||
return MatchResult.Ignore; | ||
@@ -170,6 +172,6 @@ } | ||
function group(nfa: Automata, id: i32): Automata { | ||
function group(nfa: Automata, capturing: bool, id: i32): Automata { | ||
// groups are implemented by wrapping the automata with | ||
// a pair of markers that record matches | ||
const startMarker = new GroupStartMarkerState(nfa.start, id); | ||
const startMarker = new GroupStartMarkerState(nfa.start, capturing, id); | ||
const end = new State(); | ||
@@ -229,13 +231,13 @@ const endMarker = new GroupEndMarkerState(end, startMarker); | ||
} | ||
case NodeType.CharacterSet: | ||
case NodeType.CharacterClass: | ||
return Automata.fromMatcher( | ||
Matcher.fromCharacterSetNode( | ||
expression as CharacterSetNode, | ||
expression as CharacterClassNode, | ||
this.flags | ||
) | ||
); | ||
case NodeType.CharacterClass: | ||
case NodeType.CharacterSet: | ||
return Automata.fromMatcher( | ||
Matcher.fromCharacterClassNode( | ||
expression as CharacterClassNode, | ||
expression as CharacterSetNode, | ||
this.flags | ||
@@ -246,3 +248,7 @@ ) | ||
const node = expression as GroupNode; | ||
return group(this.automataForNode(node.expression), node.id); | ||
return group( | ||
this.automataForNode(node.expression), | ||
node.capturing, | ||
node.id | ||
); | ||
} | ||
@@ -249,0 +255,0 @@ case NodeType.Assertion: |
@@ -10,4 +10,4 @@ import { Char } from "../char"; | ||
Character, | ||
CharacterClass, | ||
CharacterSet, | ||
CharacterClass, | ||
CharacterRange, | ||
@@ -76,9 +76,9 @@ Repetition, | ||
export class CharacterSetNode extends Node { | ||
export class CharacterClassNode extends Node { | ||
constructor(public expressions: Node[], public negated: bool) { | ||
super(NodeType.CharacterSet); | ||
super(NodeType.CharacterClass); | ||
} | ||
clone(): Node { | ||
return new CharacterSetNode( | ||
return new CharacterClassNode( | ||
this.expressions.slice(0).map<Node>((s) => s.clone()), | ||
@@ -131,9 +131,9 @@ this.negated | ||
export class CharacterClassNode extends Node { | ||
export class CharacterSetNode extends Node { | ||
constructor(public charClass: Char) { | ||
super(NodeType.CharacterClass); | ||
super(NodeType.CharacterSet); | ||
} | ||
clone(): Node { | ||
return new CharacterClassNode(this.charClass); | ||
return new CharacterSetNode(this.charClass); | ||
} | ||
@@ -215,3 +215,7 @@ } | ||
export class GroupNode extends Node { | ||
constructor(public expression: Node, public id: i32 = -1) { | ||
constructor( | ||
public expression: Node, | ||
public capturing: bool, | ||
public id: i32 = -1 | ||
) { | ||
super(NodeType.Group); | ||
@@ -228,3 +232,3 @@ if (id == -1) { | ||
clone(): Node { | ||
return new GroupNode(this.expression.clone(), this.id); | ||
return new GroupNode(this.expression.clone(), this.capturing, this.id); | ||
} | ||
@@ -231,0 +235,0 @@ |
@@ -8,3 +8,3 @@ import { isDigit, Char, isHexadecimalDigit } from "../char"; | ||
AssertionNode, | ||
CharacterClassNode, | ||
CharacterSetNode, | ||
CharacterNode, | ||
@@ -15,3 +15,3 @@ Node, | ||
RepetitionNode, | ||
CharacterSetNode, | ||
CharacterClassNode, | ||
CharacterRangeNode, | ||
@@ -143,3 +143,3 @@ } from "./node"; | ||
} else if (isCharacterClass(token)) { | ||
return new CharacterClassNode(this.eatToken()); | ||
return new CharacterSetNode(this.eatToken()); | ||
} else { | ||
@@ -156,3 +156,3 @@ return new CharacterNode(this.eatToken()); | ||
this.eatToken(Char.Dot); | ||
return new CharacterClassNode(Char.Dot); | ||
return new CharacterSetNode(Char.Dot); | ||
} | ||
@@ -211,2 +211,14 @@ | ||
private isCapturing(): bool { | ||
if ( | ||
this.iterator.current == Char.Question && | ||
this.iterator.lookahead(1) == Char.Colon | ||
) { | ||
this.eatToken(Char.Question); | ||
this.eatToken(Char.Colon); | ||
return false; | ||
} | ||
return true; | ||
} | ||
// parses a sequence of chars | ||
@@ -226,3 +238,4 @@ private parseSequence(): Node { | ||
this.eatToken(Char.LeftParenthesis); | ||
nodes.push(new GroupNode(this.parseSequence())); | ||
const capturing = this.isCapturing(); | ||
nodes.push(new GroupNode(this.parseSequence(), capturing)); | ||
this.eatToken(Char.RightParenthesis); | ||
@@ -252,3 +265,3 @@ // @ts-ignore | ||
} else if (token == Char.LeftSquareBracket) { | ||
nodes.push(this.parseCharacterSet()); | ||
nodes.push(this.parseCharacterClass()); | ||
} else { | ||
@@ -269,3 +282,3 @@ nodes.push(this.parseCharacter()); | ||
private parseCharacterSet(): CharacterSetNode { | ||
private parseCharacterClass(): CharacterClassNode { | ||
this.eatToken(Char.LeftSquareBracket); | ||
@@ -299,3 +312,3 @@ | ||
// otherwise this is a character class | ||
nodes.push(new CharacterClassNode(this.eatToken())); | ||
nodes.push(new CharacterSetNode(this.eatToken())); | ||
} | ||
@@ -312,4 +325,4 @@ } else { | ||
this.eatToken(Char.RightSquareBracket); | ||
return new CharacterSetNode(nodes, negated); | ||
return new CharacterClassNode(nodes, negated); | ||
} | ||
} |
@@ -92,5 +92,5 @@ import { State, Automata, GroupStartMarkerState, MatchResult } from "./nfa/nfa"; | ||
// the value of the current state of the string being matched. | ||
// Repeated capture groups, via rage repetitions (e.g. {2,3}) share the same 'id'. The | ||
// Repeated capture groups, via range repetitions (e.g. {2,3}) share the same 'id'. The | ||
// returned regex should only return the value of the final repetition. | ||
function filterCaptures(groupMarkers: GroupStartMarkerState[]): string[] { | ||
function lastCapturesForGroup(groupMarkers: GroupStartMarkerState[]): string[] { | ||
if (!groupMarkers.length) { | ||
@@ -143,3 +143,6 @@ return []; | ||
if (state instanceof GroupStartMarkerState) { | ||
gm.push(state as GroupStartMarkerState); | ||
const startMarker = state as GroupStartMarkerState; | ||
if (startMarker.capturing) { | ||
gm.push(state as GroupStartMarkerState); | ||
} | ||
} | ||
@@ -186,3 +189,3 @@ }); | ||
const match = new Match( | ||
[matchStr!].concat(filterCaptures(groupMarkers)), | ||
[matchStr!].concat(lastCapturesForGroup(groupMarkers)), | ||
matchIndex, | ||
@@ -189,0 +192,0 @@ str |
{ | ||
"name": "assemblyscript-regex", | ||
"version": "1.5.0", | ||
"version": "1.6.0", | ||
"description": "A regex engine built with AssemblyScript", | ||
@@ -5,0 +5,0 @@ "ascMain": "assembly/index.ts", |
@@ -36,3 +36,3 @@ # assemblyscript-regex | ||
**Character classes** | ||
**Character sets** | ||
@@ -81,3 +81,3 @@ - [x] . | ||
- [ ] (?<Name>x) named capturing group | ||
- [ ] (?:x) Non-capturing group | ||
- [x] (?:x) Non-capturing group | ||
@@ -84,0 +84,0 @@ **Quantifiers** |
@@ -25,2 +25,3 @@ const fs = require("fs"); | ||
], | ||
"peformance issue": [1313, 1314], | ||
@@ -51,3 +52,3 @@ /* -------- issues with the tests ------------ */ | ||
"test regex contains syntax not supported in JS": [82, 1158, 281], | ||
"the test behaviour differs between PCRE and JS": [290], | ||
"the test behaviour differs between PCRE and JS": [290, 1278], | ||
}; | ||
@@ -113,7 +114,17 @@ | ||
if (["(?"].some((f) => regex.includes(f))) { | ||
testCase += `xit("line: ${index} - non capturing groups not supported", () => {});`; | ||
if (["(?!", "(?="].some((f) => regex.includes(f))) { | ||
testCase += `xit("line: ${index} - lookaheads not supported", () => {});`; | ||
return; | ||
} | ||
if (["(?m", "(?s", "(?ms"].some((f) => regex.includes(f))) { | ||
testCase += `xit("line: ${index} - JS regex does not support mode modifiers", () => {});`; | ||
return; | ||
} | ||
if (["(?#"].some((f) => regex.includes(f))) { | ||
testCase += `xit("line: ${index} - JS regex does not support comments", () => {});`; | ||
return; | ||
} | ||
if (regex.match(/\\\\\d{1}/)) { | ||
@@ -120,0 +131,0 @@ testCase += `xit("line: ${index} - back references are not supported", () => {});`; |
@@ -8,11 +8,6 @@ import "assemblyscript/std/portable/index"; | ||
const regexObj = new RegExp("abc$", "m"); | ||
let match = regexObj.exec("abc\n"); | ||
const regexObj = new RegExp("word (?:[a-zA-Z0-9]+ ){0,300}otherword", ""); | ||
let match = regexObj.exec( | ||
"word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope" | ||
); | ||
console.log(JSON.stringify(match, null, 2)); | ||
// match = regexObj.exec("f1\nbar\nbaz\nf2"); | ||
// console.log(JSON.stringify(match, null, 2)); | ||
// const regex = new RegExp("^f\\d{1}$", "gm"); | ||
// let match = regex.exec("f1\nbar\nbaz\nf2"); | ||
// expect(match!.matches[0]).toBe("f1"); |
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
268792
4805