assemblyscript-regex
Advanced tools
Comparing version 1.4.1 to 1.5.0
@@ -72,2 +72,54 @@ import { RegExp } from ".."; | ||
describe("multi-line mode", () => { | ||
it("sets multi-line flag", () => { | ||
expect(new RegExp("\\d+", "m").multiline).toBeTruthy(); | ||
expect(new RegExp("\\d+", "").multiline).toBeFalsy(); | ||
}); | ||
it("matches across multiple lines", () => { | ||
const match = exec("^f\\d{1}$", "f1\nbar\nbaz\nf2", "m"); | ||
expect(match.matches.length).toBe(1); | ||
expect(match.matches[0]).toBe("f1"); | ||
}); | ||
it("matches across multiple lines with global mode", () => { | ||
const regex = new RegExp("^f\\d{1}$", "gm"); | ||
let match = regex.exec("f1\nbar\nbaz\nf2"); | ||
expect(match!.matches[0]).toBe("f1"); | ||
match = regex.exec("f1\nbar\nbaz\nf2"); | ||
expect(match!.matches[0]).toBe("f2"); | ||
match = regex.exec("f1\nbar\nbaz\nf2"); | ||
expect(match).toBeNull(); | ||
}); | ||
it("matches across multiple lines with global mode", () => { | ||
const regex = new RegExp("^[a-c]", "gm"); | ||
let match = regex.exec("a1\nd2\nc3\n"); | ||
expect(match!.matches[0]).toBe("a"); | ||
match = regex.exec("a1\nd2\nc3\n"); | ||
expect(match!.matches[0]).toBe("c"); | ||
match = regex.exec("a1\nd2\nc3\n"); | ||
expect(match).toBeNull(); | ||
}); | ||
it("matches across multiple lines with global mode", () => { | ||
const regex = new RegExp("[a-c]$", "gm"); | ||
let match = regex.exec("1a\n2d\n3c\n"); | ||
expect(match!.matches[0]).toBe("a"); | ||
match = regex.exec("1a\n2d\n3c\n"); | ||
expect(match!.matches[0]).toBe("c"); | ||
match = regex.exec("1a\n2d\n3c\n"); | ||
expect(match).toBeNull(); | ||
}); | ||
}); | ||
describe("non-global mode", () => { | ||
@@ -74,0 +126,0 @@ it("doesn't increment lastIndex", () => { |
export const enum Char { | ||
None = -1, | ||
HorizontalTab = 0x09, | ||
LineFeed = 0x0a, | ||
VerticalTab = 0x0b, | ||
FormFeed = 0x0c, | ||
CarriageReturn = 0x0d, | ||
LineFeed = 0x0a, | ||
Space = 0x20, | ||
@@ -9,0 +9,0 @@ Dollar = 0x24, // "$" |
@@ -121,3 +121,3 @@ import { isDigit, isAlpha, isWhitespace, Char } from "../char"; | ||
for (let i = 0, len = this.ranges.length; i < len; i++) { | ||
if (code >= u32(this.ranges[i].from) && code <= u32(this.ranges[i].to)) { | ||
if (this.ranges[i].contains(code)) { | ||
return true; | ||
@@ -124,0 +124,0 @@ } |
@@ -78,2 +78,3 @@ import { State, Automata, GroupStartMarkerState, MatchResult } from "./nfa/nfa"; | ||
dotAll: bool = false; | ||
multiline: bool = false; | ||
@@ -85,2 +86,3 @@ constructor(flagString: string | null) { | ||
this.dotAll = flagString.includes("s"); | ||
this.multiline = flagString.includes("m"); | ||
} | ||
@@ -164,5 +166,6 @@ } | ||
// search for a match at each index within the string | ||
for ( | ||
let matchIndex = this.lastIndex; | ||
matchIndex < (this.startOfInput ? 1 : len); | ||
matchIndex < (this.startOfInput && !this.multiline ? 1 : len); | ||
matchIndex++ | ||
@@ -191,10 +194,28 @@ ) { | ||
const matchEndIndex = match.index + match.matches[0].length; | ||
if (!this.endOfInput || (this.endOfInput && matchEndIndex == len)) { | ||
if (this.global) { | ||
this.lastIndex = matchEndIndex; | ||
// has the start of input criteria been met? | ||
if (this.startOfInput) { | ||
if (this.flags.multiline && matchIndex != 0) { | ||
if (str.charCodeAt(matchIndex - 1) != Char.LineFeed) continue; | ||
} else if (matchIndex != 0) { | ||
continue; | ||
} | ||
return match; | ||
} | ||
// has the enf of input criteria been met? | ||
if (this.endOfInput) { | ||
if (this.flags.multiline && matchEndIndex != len) { | ||
if (str.charCodeAt(matchEndIndex) != Char.LineFeed) continue; | ||
} else if (matchEndIndex != len) { | ||
continue; | ||
} | ||
} | ||
if (this.global) { | ||
this.lastIndex = matchEndIndex; | ||
} | ||
return match; | ||
} | ||
} | ||
this.lastIndex = 0; | ||
@@ -223,2 +244,6 @@ return null; | ||
} | ||
get multiline(): bool { | ||
return this.flags.multiline; | ||
} | ||
} | ||
@@ -225,0 +250,0 @@ |
@@ -27,2 +27,6 @@ export function last<T>(arr: T[]): T { | ||
} | ||
contains(value: i32): bool { | ||
return value >= this.from && value <= this.to; | ||
} | ||
} |
@@ -57,2 +57,9 @@ global.TextDecoder = require("text-encoding").TextDecoder; | ||
}) | ||
.add("complex regex", () => { | ||
const text = | ||
"<TR BGCOLOR='#DBE9E9'><TD align=left valign=top>43.<a href='joblist.cfm?JobID=94 6735&Keyword='>Word Processor<BR>(N-1286)</a></TD><TD align=left valign=top>Lega lstaff.com</TD><TD align=left valign=top>CA - Statewide</TD></TR>"; | ||
const regex = | ||
"<tr([\\w\\W\\s\\d][^<>]{0,})><TD([\\w\\W\\s\\d][^<>]{0,})>([\\d]{0,}\\.)(.*)((<BR>([\\w\\W\\s\\d][^<>]{0,})|[\\s]{0,}))<\\/a><\\/TD><TD([\\w\\W\\s\\d][^<>]{0,})>([\\w\\W\\s\\d][^<>]{0,})<\\/TD><TD([\\w\\W\\s\\d][^<>]{0,})>([\\w\\W\\s\\d][^<>]{0,})<\\/TD><\\/TR>"; | ||
executeRegex(regex, text, true); | ||
}) | ||
// add listeners | ||
@@ -59,0 +66,0 @@ .on("cycle", (event) => { |
{ | ||
"name": "assemblyscript-regex", | ||
"version": "1.4.1", | ||
"version": "1.5.0", | ||
"description": "A regex engine built with AssemblyScript", | ||
@@ -5,0 +5,0 @@ "ascMain": "assembly/index.ts", |
@@ -97,3 +97,3 @@ # assemblyscript-regex | ||
- [x] case insensitive | ||
- [ ] multiline | ||
- [x] multiline | ||
- [x] dotAll | ||
@@ -100,0 +100,0 @@ - [ ] unicode |
@@ -12,5 +12,20 @@ const fs = require("fs"); | ||
const knownIssues = { | ||
/* ------- features not yet implemented ------- */ | ||
"does not support start of string quantified within an alternation": [ | ||
1363, | ||
1369, | ||
], | ||
"does not support hex notification in character sets": [...range(1147, 1149)], | ||
"does nto support escaped characters in character ranges": [ | ||
...range(1301, 1308), | ||
], | ||
"lazy quantifiers should still yield the longest overall regex match": [ | ||
...range(141, 143), | ||
1288, | ||
], | ||
/* -------- issues with the tests ------------ */ | ||
"test appears to be incorrect?": [203, 204], | ||
"issue with parsing the test itself": [ | ||
1103, | ||
...range(1185, 1188), | ||
...range(1095, 1098), | ||
@@ -20,6 +35,2 @@ ...range(487, 494), | ||
], | ||
"lazy quantifiers should still yield the longest overall regex match": [ | ||
...range(141, 143), | ||
1288, | ||
], | ||
"test contains an octal escape sequence": [1102], | ||
@@ -29,17 +40,15 @@ // the test results measure captured groups using character length / locations | ||
// this is tricky to reproduce | ||
"test requires a substring function": [1087], | ||
"requires triage": [ | ||
1363, | ||
1369, | ||
"test requires a substring function": [1087, 1088], | ||
/* -------- differences between PCRE and JS regex ------------ */ | ||
"test indicates a malformed regex, whereas it appears OK in JS": [ | ||
1189, | ||
...range(1186, 1188), | ||
], | ||
"JS does not support the \\A \\Z syntax for start and end of string": [ | ||
1163, | ||
1088, | ||
1239, | ||
...range(1147, 1149), | ||
1413, | ||
...range(1301, 1308), | ||
1164, | ||
], | ||
"test indicates a malformed regex, whereas it appears OK in JS": [1189], | ||
"test regex contains syntax not supported in JS": [82, 1158, 281], | ||
"the test behaviour differs between PCRE and JS": [290], | ||
"test appears to be incorrect?": [203, 204], | ||
}; | ||
@@ -87,11 +96,10 @@ | ||
? regex | ||
: escapeQuote(parts[1] == "NULL" ? "" : parts[1]); | ||
: escapeQuote(parts[1] == "NULL" ? "" : parts[1]).replaceAll( | ||
"/", | ||
"\\\\/" | ||
); | ||
let str = parts[2] !== "NULL" ? escapeQuote(parts[2]) : ""; | ||
let flags = parts[0].includes("i") ? "is" : "s"; | ||
let flags = "m" + (parts[0].includes("i") ? "i" : ""); | ||
flags += parts[0] !== "En$" && parts[0] !== "E$n" ? "s" : ""; | ||
if (parts[0].includes("n")) { | ||
testCase += `xit("line: ${index} - multi line regex not supported yet!", () => { });`; | ||
return; | ||
} | ||
if (regex.includes("\\b")) { | ||
@@ -107,7 +115,2 @@ testCase += `xit("line: ${index} - word boundary class not supported yet!", () => { });`; | ||
// if (["}?"].some((f) => regex.includes(f))) { | ||
// testCase += `xit("line: ${index} - lazy range repitition quantifiers are not supported", () => { });`; | ||
// return; | ||
// } | ||
if (["(?"].some((f) => regex.includes(f))) { | ||
@@ -138,6 +141,8 @@ testCase += `xit("line: ${index} - non capturing groups not supported", () => {});`; | ||
// create an expect for each capture group | ||
const captures = parts[3].match(/\((\d{1,2}|\?),(\d{1,2}|\?)\)+/g); | ||
const captures = parts[3].match(/\((\d{1,3}|\?),(\d{1,3}|\?)\)+/g); | ||
captures.forEach((capture, index) => { | ||
const digits = capture.match(/\((\d{1,2}|\?),(\d{1,2}|\?)\)/); | ||
nextCase += `expect(match.matches[${index}]).toBe("${str}".substring(${digits[1]}, ${digits[2]}));`; | ||
const digits = capture.match(/\((\d{1,3}|\?),(\d{1,3}|\?)\)/); | ||
if (digits[1] !== "?") { | ||
nextCase += `expect(match.matches[${index}]).toBe("${str}".substring(${digits[1]}, ${digits[2]}));`; | ||
} | ||
}); | ||
@@ -144,0 +149,0 @@ } |
@@ -8,4 +8,11 @@ import "assemblyscript/std/portable/index"; | ||
const regexObj = new RegExp("[a-c]", "i"); | ||
const match = regexObj.exec("A"); | ||
const regexObj = new RegExp("abc$", "m"); | ||
let match = regexObj.exec("abc\n"); | ||
console.log(JSON.stringify(match, null, 2)); | ||
// match = regexObj.exec("f1\nbar\nbaz\nf2"); | ||
// console.log(JSON.stringify(match, null, 2)); | ||
// const regex = new RegExp("^f\\d{1}$", "gm"); | ||
// let match = regex.exec("f1\nbar\nbaz\nf2"); | ||
// expect(match!.matches[0]).toBe("f1"); |
Sorry, the diff of this file is too big to display
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
266377
4735