assemblyscript-regex
Advanced tools
Comparing version 1.2.0 to 1.3.0
@@ -36,1 +36,7 @@ import { expectMatch, expectNotMatch, exec } from "./utils"; | ||
}); | ||
it("repeated capture groups should return the last match", () => { | ||
const match = exec("([a-c])+", "ac"); | ||
expect(match.matches[0]).toBe("ac"); | ||
expect(match.matches[1]).toBe("c"); | ||
}); |
@@ -54,8 +54,8 @@ import { expectMatch, expectNotMatch, exec } from "./utils"; | ||
it("zero or one supports non-greedy mode", () => { | ||
expectMatch("a?", ["a"]); | ||
let match = exec("a?", "bc"); | ||
expect(match).not.toBeNull(); | ||
expect(match.matches[0]).toStrictEqual(""); | ||
}); | ||
// it("zero or one supports non-greedy mode", () => { | ||
// expectMatch("a?", ["a"]); | ||
// let match = exec("a??", "bc"); | ||
// expect(match).not.toBeNull(); | ||
// expect(match.matches[0]).toStrictEqual(""); | ||
// }); | ||
}); |
@@ -43,2 +43,7 @@ /* eslint-disable no-invalid-regexp */ | ||
it("handles nongreedy quantifiers", () => { | ||
const match = exec("a{2,4}?", "aaaaaaaaaa"); | ||
expect(match.matches[0]).toBe("aa"); | ||
}); | ||
it("throws if quantifying a quantifier!", () => { | ||
@@ -45,0 +50,0 @@ expect(() => { |
@@ -8,2 +8,3 @@ export const enum Char { | ||
LineFeed = 0x0a, | ||
Space = 0x20, | ||
Dollar = 0x24, // "$" | ||
@@ -18,2 +19,3 @@ LeftParenthesis = 0x28, | ||
Zero = 0x30, | ||
Nine = 0x39, | ||
Question = 0x3f, // "?" | ||
@@ -24,2 +26,3 @@ A = 0x41, | ||
W = 0x57, | ||
Z = 0x5a, | ||
LeftSquareBracket = 0x5b, // "[" | ||
@@ -41,36 +44,54 @@ Backslash = 0x5c, // "\" | ||
x = 0x78, | ||
z = 0x7a, | ||
LeftCurlyBrace = 0x7b /* { */, | ||
VerticalBar = 0x7c /* | */, | ||
RightCurlyBrace = 0x7d /* */, | ||
RightCurlyBrace = 0x7d /* { */, | ||
NonBreakingSpace = 0xa0, | ||
} | ||
// @ts-ignore | ||
@inline | ||
function inRange(value: u32, from: u32, to: u32): bool { | ||
if (ASC_TARGET == 1) { | ||
// makes use of unsigned integer operations, making this | ||
// approach a little faster when compiled to WASM | ||
return value - from < (to - from + 1); | ||
} else { | ||
return value >= from && value <= to; | ||
} | ||
} | ||
export function isDigit(code: u32): bool { | ||
return code - Char.Zero < 10; | ||
return inRange(code, Char.Zero, Char.Nine); | ||
} | ||
export function isHexadecimalDigit(code: u32): bool { | ||
return isDigit(code) || code - Char.a < 6; | ||
return isDigit(code) || inRange(code, Char.a, Char.f); | ||
} | ||
export function isLowercaseAlpha(code: u32): bool { | ||
return code - Char.a < 26; | ||
return inRange(code, Char.a, Char.z); | ||
} | ||
export function isUppercaseAlpha(code: u32): bool { | ||
return code - Char.A < 26; | ||
return inRange(code, Char.A, Char.Z); | ||
} | ||
export function isAlpha(code: u32): bool { | ||
return (code | 32) - Char.a < 26; | ||
if (ASC_TARGET == 1) { | ||
return (code | 32) - Char.a < 26; | ||
} else { | ||
return inRange(code, Char.a, Char.z) || inRange(code, Char.A, Char.Z); | ||
} | ||
} | ||
export function isWhitespace(code: u32): bool { | ||
if (code < 0x1680) { | ||
// < <LS> (1) | ||
// <SP>, <TAB>, <LF>, <VT>, <FF>, <CR> and <NBSP> | ||
// @ts-ignore: cast | ||
return ((code | 0x80) == 0xa0) | (code - 0x09 <= 0x0d - 0x09); | ||
} | ||
if (code - 0x2000 <= 0x200a - 0x2000) return true; | ||
switch (code) { | ||
case Char.Space: | ||
case Char.HorizontalTab: | ||
case Char.VerticalTab: | ||
case Char.FormFeed: | ||
case Char.LineFeed: | ||
case Char.CarriageReturn: | ||
case Char.NonBreakingSpace: | ||
case 0x1680: // <LS> (1) | ||
@@ -85,3 +106,6 @@ case 0x2028: // <LS> (2) | ||
} | ||
if (inRange(code, 0x2000, 0x200a)) { | ||
return true; | ||
} | ||
return false; | ||
} |
@@ -137,7 +137,12 @@ import { | ||
function zeroOrOne(nfa: Automata): Automata { | ||
function zeroOrOne(nfa: Automata, greedy: bool): Automata { | ||
const start = new State(); | ||
const end = new State(); | ||
start.transitions.push(nfa.start); | ||
start.transitions.push(end); | ||
if (greedy) { | ||
start.transitions.push(nfa.start); | ||
start.transitions.push(end); | ||
} else { | ||
start.transitions.push(end); | ||
start.transitions.push(nfa.start); | ||
} | ||
nfa.end.transitions.push(end); | ||
@@ -186,3 +191,3 @@ return new Automata(start, end); | ||
if (quantifier == Char.Question) { | ||
return zeroOrOne(automata); | ||
return zeroOrOne(automata, node.greedy); | ||
} else if (quantifier == Char.Plus) { | ||
@@ -189,0 +194,0 @@ return oneOrMore(automata, node.greedy); |
@@ -8,4 +8,4 @@ import { State } from "./nfa"; | ||
): void { | ||
if (visited.includes(state)) return; | ||
visitor(state); | ||
if (visited.includes(state)) return; | ||
visited.push(state); | ||
@@ -12,0 +12,0 @@ const nextStates = state.transitions; |
@@ -158,3 +158,8 @@ import { Char } from "../char"; | ||
export class RangeRepetitionNode extends Node { | ||
constructor(public expression: Node, public from: i32, public to: i32) { | ||
constructor( | ||
public expression: Node, | ||
public from: i32, | ||
public to: i32, | ||
public greedy: bool = true | ||
) { | ||
super(NodeType.RangeRepetition); | ||
@@ -161,0 +166,0 @@ if (expression.type == NodeType.RangeRepetition) { |
@@ -77,4 +77,3 @@ import { isDigit, Char, isHexadecimalDigit } from "../char"; | ||
class Range { | ||
from: i32 = -1; | ||
to: i32 = -1; | ||
constructor(public from: i32, public to: i32) {} | ||
} | ||
@@ -161,60 +160,50 @@ | ||
private maybeParseRepetitionRange(): Range | null { | ||
// snapshot | ||
const iteratorCopy = this.iterator.copy(); | ||
this.eatToken(Char.LeftCurlyBrace); | ||
let range = new Range(); | ||
let firstDigit = true; | ||
private maybeParseDigit(): i32 { | ||
let digitStr = ""; | ||
while (this.iterator.more()) { | ||
const token = this.iterator.current; | ||
if (token == Char.RightParenthesis) break; | ||
if (firstDigit) { | ||
if (isDigit(token)) { | ||
// if it is a digit, keep eating | ||
digitStr += this.iterator.currentAsString(); | ||
} else { | ||
range.from = digitStr.length ? <i32>parseInt(digitStr) : -1; | ||
range.to = range.from; | ||
if (token == Char.Comma) { | ||
// if we meet a comma, start parsing the next digit | ||
firstDigit = false; | ||
digitStr = ""; | ||
range.to = -1; | ||
} else if (token == Char.RightCurlyBrace) { | ||
this.eatToken(Char.RightCurlyBrace); | ||
// close brace, this is a single value range | ||
return range; | ||
} else { | ||
// anything else, we got a problem | ||
break; | ||
} | ||
} | ||
if (isDigit(token)) { | ||
digitStr += this.iterator.currentAsString(); | ||
} else { | ||
if (isDigit(token)) { | ||
// if it is a digit, keep eating | ||
digitStr += this.iterator.currentAsString(); | ||
} else { | ||
range.to = digitStr.length ? <i32>parseInt(digitStr) : -1; | ||
if (token == Char.RightCurlyBrace) { | ||
this.eatToken(Char.RightCurlyBrace); | ||
// close brace, end of range | ||
return range; | ||
} else { | ||
// anything else, we got a problem | ||
break; | ||
} | ||
} | ||
return digitStr == "" ? -1 : <i32>parseInt(digitStr); | ||
} | ||
this.eatToken(); | ||
} | ||
return digitStr == "" ? -1 : <i32>parseInt(digitStr); | ||
} | ||
// repetition not found - reset state | ||
private maybeParseRepetitionRange(): Range | null { | ||
// snapshot | ||
const iteratorCopy = this.iterator.copy(); | ||
this.eatToken(Char.LeftCurlyBrace); | ||
const from = this.maybeParseDigit(); | ||
if (from == -1) { | ||
return null; | ||
} | ||
if (this.iterator.current == Char.RightCurlyBrace) { | ||
this.eatToken(); | ||
return new Range(from, from); | ||
} else if (this.iterator.current == Char.Comma) { | ||
this.eatToken(); | ||
const to = this.maybeParseDigit(); | ||
// @ts-ignore | ||
if (this.iterator.current == Char.RightCurlyBrace) { | ||
this.eatToken(); | ||
return new Range(from, to); | ||
} | ||
} | ||
this.iterator = iteratorCopy; | ||
return null; | ||
} | ||
private isGreedy(): bool { | ||
if (this.iterator.current == Char.Question) { | ||
this.eatToken(); | ||
return false; | ||
} | ||
return true; | ||
} | ||
// parses a sequence of chars | ||
@@ -241,3 +230,10 @@ private parseSequence(): Node { | ||
const expression = nodes.pop(); | ||
nodes.push(new RangeRepetitionNode(expression, range.from, range.to)); | ||
nodes.push( | ||
new RangeRepetitionNode( | ||
expression, | ||
range.from, | ||
range.to, | ||
this.isGreedy() | ||
) | ||
); | ||
} else { | ||
@@ -250,8 +246,3 @@ // this is not the start of a repetition, it's just a char! | ||
const quantifier = this.eatToken(); | ||
let greedy = true; | ||
if (this.iterator.current == Char.Question) { | ||
greedy = false; | ||
this.eatToken(); | ||
} | ||
nodes.push(new RepetitionNode(expression, quantifier, greedy)); | ||
nodes.push(new RepetitionNode(expression, quantifier, this.isGreedy())); | ||
// @ts-ignore | ||
@@ -258,0 +249,0 @@ } else if (token == Char.LeftSquareBracket) { |
@@ -16,2 +16,3 @@ export class StringIterator { | ||
if (this.cursor >= u32(this.sourceString.length)) { | ||
this.current = -1; | ||
return false; | ||
@@ -18,0 +19,0 @@ } |
@@ -84,3 +84,9 @@ import { Char } from "../char"; | ||
// a{4,} => aaaaa* | ||
clones.push(new RepetitionNode(expression.clone(), Char.Asterisk)); | ||
clones.push( | ||
new RepetitionNode( | ||
expression.clone(), | ||
Char.Asterisk, | ||
rangeRepNode.greedy | ||
) | ||
); | ||
} else { | ||
@@ -90,3 +96,9 @@ // a{4,6} => aaaaa?a? | ||
for (let i = 0; i < count; i++) { | ||
clones.push(new RepetitionNode(expression.clone(), Char.Question)); | ||
clones.push( | ||
new RepetitionNode( | ||
expression.clone(), | ||
Char.Question, | ||
rangeRepNode.greedy | ||
) | ||
); | ||
} | ||
@@ -93,0 +105,0 @@ } |
{ | ||
"name": "assemblyscript-regex", | ||
"version": "1.2.0", | ||
"version": "1.3.0", | ||
"description": "A regex engine built with AssemblyScript", | ||
@@ -5,0 +5,0 @@ "ascMain": "assembly/index.ts", |
@@ -25,2 +25,8 @@ const fs = require("fs"); | ||
1392, | ||
...range(52, 55), | ||
57, | ||
58, | ||
72, | ||
73, | ||
78, | ||
], | ||
@@ -113,6 +119,6 @@ "lazy quantifiers should still yield the longest overall regex match": [ | ||
if (["}?"].some((f) => regex.includes(f))) { | ||
testCase += `xit("line: ${index} - lazy range repitition quantifiers are not supported", () => { });`; | ||
return; | ||
} | ||
// if (["}?"].some((f) => regex.includes(f))) { | ||
// testCase += `xit("line: ${index} - lazy range repitition quantifiers are not supported", () => { });`; | ||
// return; | ||
// } | ||
@@ -119,0 +125,0 @@ if (["(?"].some((f) => regex.includes(f))) { |
@@ -8,5 +8,5 @@ import "assemblyscript/std/portable/index"; | ||
const regexObj = new RegExp(".*?"); | ||
const match = regexObj.exec("abc"); | ||
const regexObj = new RegExp("ba{0}b"); | ||
const match = regexObj.exec("bb"); | ||
console.log(match); |
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
247471
43
4347