@@ -72,2 +72,54 @@ import { RegExp } from "..";

		describe("multi-line mode", () => {
		it("sets multi-line flag", () => {
		expect(new RegExp("\\d+", "m").multiline).toBeTruthy();
		expect(new RegExp("\\d+", "").multiline).toBeFalsy();
		});

		it("matches across multiple lines", () => {
		const match = exec("^f\\d{1}$", "f1\nbar\nbaz\nf2", "m");
		expect(match.matches.length).toBe(1);
		expect(match.matches[0]).toBe("f1");
		});

		it("matches across multiple lines with global mode", () => {
		const regex = new RegExp("^f\\d{1}$", "gm");

		let match = regex.exec("f1\nbar\nbaz\nf2");
		expect(match!.matches[0]).toBe("f1");

		match = regex.exec("f1\nbar\nbaz\nf2");
		expect(match!.matches[0]).toBe("f2");

		match = regex.exec("f1\nbar\nbaz\nf2");
		expect(match).toBeNull();
		});

		it("matches across multiple lines with global mode", () => {
		const regex = new RegExp("^[a-c]", "gm");

		let match = regex.exec("a1\nd2\nc3\n");
		expect(match!.matches[0]).toBe("a");

		match = regex.exec("a1\nd2\nc3\n");
		expect(match!.matches[0]).toBe("c");

		match = regex.exec("a1\nd2\nc3\n");
		expect(match).toBeNull();
		});

		it("matches across multiple lines with global mode", () => {
		const regex = new RegExp("[a-c]$", "gm");

		let match = regex.exec("1a\n2d\n3c\n");
		expect(match!.matches[0]).toBe("a");

		match = regex.exec("1a\n2d\n3c\n");
		expect(match!.matches[0]).toBe("c");

		match = regex.exec("1a\n2d\n3c\n");
		expect(match).toBeNull();
		});
		});

		describe("non-global mode", () => {
		@@ -74,0 +126,0 @@ it("doesn't increment lastIndex", () => {

assembly/char.ts

		export const enum Char {
		None = -1,
		HorizontalTab = 0x09,
		LineFeed = 0x0a,
		VerticalTab = 0x0b,
		FormFeed = 0x0c,
		CarriageReturn = 0x0d,
		LineFeed = 0x0a,
		Space = 0x20,
		@@ -9,0 +9,0 @@ Dollar = 0x24, // "$"

assembly/nfa/matcher.ts

		@@ -121,3 +121,3 @@ import { isDigit, isAlpha, isWhitespace, Char } from "../char";
		for (let i = 0, len = this.ranges.length; i < len; i++) {
		if (code >= u32(this.ranges[i].from) && code <= u32(this.ranges[i].to)) {
		if (this.ranges[i].contains(code)) {
		return true;
		@@ -124,0 +124,0 @@ }

assembly/regexp.ts

		@@ -78,2 +78,3 @@ import { State, Automata, GroupStartMarkerState, MatchResult } from "./nfa/nfa";
		dotAll: bool = false;
		multiline: bool = false;

		@@ -85,2 +86,3 @@ constructor(flagString: string \| null) {
		this.dotAll = flagString.includes("s");
		this.multiline = flagString.includes("m");
		}
		@@ -164,5 +166,6 @@ }
		// search for a match at each index within the string

		for (
		let matchIndex = this.lastIndex;
		matchIndex < (this.startOfInput ? 1 : len);
		matchIndex < (this.startOfInput && !this.multiline ? 1 : len);
		matchIndex++
		@@ -191,10 +194,28 @@ ) {
		const matchEndIndex = match.index + match.matches[0].length;
		if (!this.endOfInput \|\| (this.endOfInput && matchEndIndex == len)) {
		if (this.global) {
		this.lastIndex = matchEndIndex;

		// has the start of input criteria been met?
		if (this.startOfInput) {
		if (this.flags.multiline && matchIndex != 0) {
		if (str.charCodeAt(matchIndex - 1) != Char.LineFeed) continue;
		} else if (matchIndex != 0) {
		continue;
		}
		return match;
		}

		// has the enf of input criteria been met?
		if (this.endOfInput) {
		if (this.flags.multiline && matchEndIndex != len) {
		if (str.charCodeAt(matchEndIndex) != Char.LineFeed) continue;
		} else if (matchEndIndex != len) {
		continue;
		}
		}

		if (this.global) {
		this.lastIndex = matchEndIndex;
		}
		return match;
		}
		}

		this.lastIndex = 0;
		@@ -223,2 +244,6 @@ return null;
		}

		get multiline(): bool {
		return this.flags.multiline;
		}
		}
		@@ -225,0 +250,0 @@

assembly/util.ts

		@@ -27,2 +27,6 @@ export function last<T>(arr: T[]): T {
		}

		contains(value: i32): bool {
		return value >= this.from && value <= this.to;
		}
		}

benchmark/benchmark.js

		@@ -57,2 +57,9 @@ global.TextDecoder = require("text-encoding").TextDecoder;
		})
		.add("complex regex", () => {
		const text =
		"<TR BGCOLOR='#DBE9E9'><TD align=left valign=top>43.<a href='joblist.cfm?JobID=94 6735&Keyword='>Word Processor<BR>(N-1286)</a></TD><TD align=left valign=top>Lega lstaff.com</TD><TD align=left valign=top>CA - Statewide</TD></TR>";
		const regex =
		"<tr([\\w\\W\\s\\d][^<>]{0,})><TD([\\w\\W\\s\\d][^<>]{0,})>([\\d]{0,}\\.)(.*)((<BR>([\\w\\W\\s\\d][^<>]{0,})\|[\\s]{0,}))<\\/a><\\/TD><TD([\\w\\W\\s\\d][^<>]{0,})>([\\w\\W\\s\\d][^<>]{0,})<\\/TD><TD([\\w\\W\\s\\d][^<>]{0,})>([\\w\\W\\s\\d][^<>]{0,})<\\/TD><\\/TR>";
		executeRegex(regex, text, true);
		})
		// add listeners
		@@ -59,0 +66,0 @@ .on("cycle", (event) => {

package.json

		{
		"name": "assemblyscript-regex",
		"version": "1.4.1",
		"version": "1.5.0",
		"description": "A regex engine built with AssemblyScript",
		@@ -5,0 +5,0 @@ "ascMain": "assembly/index.ts",

README.md

		@@ -97,3 +97,3 @@ # assemblyscript-regex
		- [x] case insensitive
		- [ ] multiline
		- [x] multiline
		- [x] dotAll
		@@ -100,0 +100,0 @@ - [ ] unicode

spec/test-generator.js

		@@ -12,5 +12,20 @@ const fs = require("fs");
		const knownIssues = {
		/* ------- features not yet implemented ------- */
		"does not support start of string quantified within an alternation": [
		1363,
		1369,
		],
		"does not support hex notification in character sets": [...range(1147, 1149)],
		"does nto support escaped characters in character ranges": [
		...range(1301, 1308),
		],
		"lazy quantifiers should still yield the longest overall regex match": [
		...range(141, 143),
		1288,
		],

		/* -------- issues with the tests ------------ */
		"test appears to be incorrect?": [203, 204],
		"issue with parsing the test itself": [
		1103,
		...range(1185, 1188),
		...range(1095, 1098),
		@@ -20,6 +35,2 @@ ...range(487, 494),
		],
		"lazy quantifiers should still yield the longest overall regex match": [
		...range(141, 143),
		1288,
		],
		"test contains an octal escape sequence": [1102],
		@@ -29,17 +40,15 @@ // the test results measure captured groups using character length / locations
		// this is tricky to reproduce
		"test requires a substring function": [1087],
		"requires triage": [
		1363,
		1369,
		"test requires a substring function": [1087, 1088],

		/* -------- differences between PCRE and JS regex ------------ */
		"test indicates a malformed regex, whereas it appears OK in JS": [
		1189,
		...range(1186, 1188),
		],
		"JS does not support the \\A \\Z syntax for start and end of string": [
		1163,
		1088,
		1239,
		...range(1147, 1149),
		1413,
		...range(1301, 1308),
		1164,
		],
		"test indicates a malformed regex, whereas it appears OK in JS": [1189],
		"test regex contains syntax not supported in JS": [82, 1158, 281],
		"the test behaviour differs between PCRE and JS": [290],
		"test appears to be incorrect?": [203, 204],
		};
		@@ -87,11 +96,10 @@
		? regex
		: escapeQuote(parts[1] == "NULL" ? "" : parts[1]);
		: escapeQuote(parts[1] == "NULL" ? "" : parts[1]).replaceAll(
		"/",
		"\\\\/"
		);
		let str = parts[2] !== "NULL" ? escapeQuote(parts[2]) : "";
		let flags = parts[0].includes("i") ? "is" : "s";
		let flags = "m" + (parts[0].includes("i") ? "i" : "");
		flags += parts[0] !== "En$" && parts[0] !== "E$n" ? "s" : "";

		if (parts[0].includes("n")) {
		testCase += `xit("line: ${index} - multi line regex not supported yet!", () => { });`;
		return;
		}

		if (regex.includes("\\b")) {
		@@ -107,7 +115,2 @@ testCase += `xit("line: ${index} - word boundary class not supported yet!", () => { });`;

		// if (["}?"].some((f) => regex.includes(f))) {
		// testCase += `xit("line: ${index} - lazy range repitition quantifiers are not supported", () => { });`;
		// return;
		// }

		if (["(?"].some((f) => regex.includes(f))) {
		@@ -138,6 +141,8 @@ testCase += `xit("line: ${index} - non capturing groups not supported", () => {});`;
		// create an expect for each capture group
		const captures = parts[3].match(/$(\d{1,2}\|\?),(\d{1,2}\|\?)$+/g);
		const captures = parts[3].match(/$(\d{1,3}\|\?),(\d{1,3}\|\?)$+/g);
		captures.forEach((capture, index) => {
		const digits = capture.match(/$(\d{1,2}\|\?),(\d{1,2}\|\?)$/);
		nextCase += `expect(match.matches[${index}]).toBe("${str}".substring(${digits[1]}, ${digits[2]}));`;
		const digits = capture.match(/$(\d{1,3}\|\?),(\d{1,3}\|\?)$/);
		if (digits[1] !== "?") {
		nextCase += `expect(match.matches[${index}]).toBe("${str}".substring(${digits[1]}, ${digits[2]}));`;
		}
		});
		@@ -144,0 +149,0 @@ }

ts/index.ts

		@@ -8,4 +8,11 @@ import "assemblyscript/std/portable/index";

		const regexObj = new RegExp("[a-c]", "i");
		const match = regexObj.exec("A");
		const regexObj = new RegExp("abc$", "m");
		let match = regexObj.exec("abc\n");
		console.log(JSON.stringify(match, null, 2));
		// match = regexObj.exec("f1\nbar\nbaz\nf2");
		// console.log(JSON.stringify(match, null, 2));

		// const regex = new RegExp("^f\\d{1}$", "gm");

		// let match = regex.exec("f1\nbar\nbaz\nf2");
		// expect(match!.matches[0]).toBe("f1");

assembly/__spec_tests__/generated.spec.ts

Sorry, the diff of this file is too big to display

assemblyscript-regex - npm Package Compare versions

Improved metrics