assemblyscript-regex - npm Package Compare versions

Comparing version 1.5.0 to 1.6.0

assembly/__tests__/capture-group.spec.ts

		@@ -48,1 +48,7 @@ import { expectMatch, expectNotMatch, exec } from "./utils";
		});

		it("non-capturing groups should not capture", () => {
		const match = exec("(?:foo)bar(baz)", "foobarbaz");
		expect(match.matches[0]).toBe("foobarbaz");
		expect(match.matches[1]).toBe("baz");
		});

assembly/__tests__/character-classes.spec.ts

		@@ -1,55 +0,54 @@
		import { RegExp } from "..";
		import { expectMatch, expectNotMatch, exec } from "./utils";
		import { expectMatch, expectNotMatch } from "./utils";

		it("dot", () => {
		expectMatch(".", [" ", "B", "\|", "9"]);
		expectNotMatch(".", ["", "\n"]);
		it("throws an error if no closing bracket is found", () => {
		// expect(() => new RegExp("[abce")).toThrow();
		});

		it("digit", () => {
		expectMatch("\\d", ["0", "9"]);
		expectNotMatch("\\d", ["", "b"]);
		it("matches discrete characters", () => {
		expectMatch("[abce]", ["a", "b", "c", "e"]);
		expectNotMatch("[abce]", ["", "f", "h"]);
		});

		it("non-digit", () => {
		expectNotMatch("\\D", ["0", "9", ""]);
		expectMatch("\\D", ["b", "\|"]);
		it("matches character ranges", () => {
		expectMatch("[a-c]", ["a", "b", "c"]);
		expectNotMatch("[a-c]", ["d", "e", ""]);
		expectMatch("[K-M]", ["K", "L", "M"]);
		expectNotMatch("[K-M]", ["9", "J"]);
		expectMatch("[0-9]", ["0", "9"]);
		expectNotMatch("[0-9]", ["a", "A"]);
		});

		it("word", () => {
		expectMatch("\\w", ["A", "a", "Z", "z", "0", "9", "_"]);
		expectNotMatch("\\w", ["", "$"]);
		it("matches multiple ranges", () => {
		expectMatch("[a-ce-f]", ["a", "b", "c", "e", "f"]);
		expectNotMatch("[a-ce-f]", ["d"]);
		});

		it("not word", () => {
		expectNotMatch("\\W", ["A", "a", "Z", "z", "0", "9", "_", ""]);
		expectMatch("\\W", ["&", "$"]);
		it("supports closing brackets", () => {
		expectMatch("[]a]", ["]", "a"]);
		});

		it("whitespace", () => {
		expectMatch("\\s", ["\f", "\n", "\r", "\t", "\v"]);
		expectNotMatch("\\s", ["", "a", "0"]);
		it("supports negated sets", () => {
		expectNotMatch("[^a-c]", ["a", "b", "c"]);
		expectMatch("[^a-c]", ["d", "e"]);
		expectNotMatch("[^a-ce-f]", ["a", "b", "c", "e", "f"]);
		expectMatch("[^a-ce-f]", ["d"]);
		});

		it("not whitespace", () => {
		expectNotMatch("\\S", ["", "\f", "\n", "\r", "\t", "\v"]);
		expectMatch("\\S", ["a", "0"]);
		it("treats - as a literal", () => {
		expectMatch("[-abc]", ["-", "a", "b", "c"]);
		expectMatch("[abc-]", ["-", "a", "b", "c"]);
		});

		it("tab, cr, lf, vt, ff", () => {
		expectMatch("\\t", ["\t"]);
		expectMatch("\\r", ["\r"]);
		expectMatch("\\n", ["\n"]);
		expectMatch("\\v", ["\v"]);
		expectMatch("\\f", ["\f"]);
		expectNotMatch("\\t", ["a", " ", ""]);
		it("treats - as a literal in negated sets", () => {
		expectNotMatch("[^-abc]", ["-", "a", "b", "c"]);
		expectMatch("[^-abc]", ["1", "A"]);
		});

		it("escaped dot", () => {
		expectMatch("\\.", ["."]);
		expectNotMatch("\\.", ["", "a"]);
		it("supports case insensitive matching", () => {
		// simple ranges
		expectMatch("[a-c]", ["A", "C", "a", "c"], "i");
		expectNotMatch("[a-c]", ["D", "d"], "i");
		// complex
		expectMatch("[W-c]", ["W", "w", "C", "c"], "i");
		expectNotMatch("[W-c]", ["V", "v", "D", "d"], "i");
		});

		it("unrecognised character classes are treated as characters", () => {
		expectMatch("\\g\\m", ["gm"]);
		});

assembly/__tests__/character-sets.spec.ts

		@@ -1,54 +0,55 @@
		import { expectMatch, expectNotMatch } from "./utils";
		import { RegExp } from "..";
		import { expectMatch, expectNotMatch, exec } from "./utils";

		it("throws an error if no closing bracket is found", () => {
		// expect(() => new RegExp("[abce")).toThrow();
		it("dot", () => {
		expectMatch(".", [" ", "B", "\|", "9"]);
		expectNotMatch(".", ["", "\n"]);
		});

		it("matches discrete characters", () => {
		expectMatch("[abce]", ["a", "b", "c", "e"]);
		expectNotMatch("[abce]", ["", "f", "h"]);
		it("digit", () => {
		expectMatch("\\d", ["0", "9"]);
		expectNotMatch("\\d", ["", "b"]);
		});

		it("matches character ranges", () => {
		expectMatch("[a-c]", ["a", "b", "c"]);
		expectNotMatch("[a-c]", ["d", "e", ""]);
		expectMatch("[K-M]", ["K", "L", "M"]);
		expectNotMatch("[K-M]", ["9", "J"]);
		expectMatch("[0-9]", ["0", "9"]);
		expectNotMatch("[0-9]", ["a", "A"]);
		it("non-digit", () => {
		expectNotMatch("\\D", ["0", "9", ""]);
		expectMatch("\\D", ["b", "\|"]);
		});

		it("matches multiple ranges", () => {
		expectMatch("[a-ce-f]", ["a", "b", "c", "e", "f"]);
		expectNotMatch("[a-ce-f]", ["d"]);
		it("word", () => {
		expectMatch("\\w", ["A", "a", "Z", "z", "0", "9", "_"]);
		expectNotMatch("\\w", ["", "$"]);
		});

		it("supports closing brackets", () => {
		expectMatch("[]a]", ["]", "a"]);
		it("not word", () => {
		expectNotMatch("\\W", ["A", "a", "Z", "z", "0", "9", "_", ""]);
		expectMatch("\\W", ["&", "$"]);
		});

		it("supports negated sets", () => {
		expectNotMatch("[^a-c]", ["a", "b", "c"]);
		expectMatch("[^a-c]", ["d", "e"]);
		expectNotMatch("[^a-ce-f]", ["a", "b", "c", "e", "f"]);
		expectMatch("[^a-ce-f]", ["d"]);
		it("whitespace", () => {
		expectMatch("\\s", ["\f", "\n", "\r", "\t", "\v"]);
		expectNotMatch("\\s", ["", "a", "0"]);
		});

		it("treats - as a literal", () => {
		expectMatch("[-abc]", ["-", "a", "b", "c"]);
		expectMatch("[abc-]", ["-", "a", "b", "c"]);
		it("not whitespace", () => {
		expectNotMatch("\\S", ["", "\f", "\n", "\r", "\t", "\v"]);
		expectMatch("\\S", ["a", "0"]);
		});

		it("treats - as a literal in negated sets", () => {
		expectNotMatch("[^-abc]", ["-", "a", "b", "c"]);
		expectMatch("[^-abc]", ["1", "A"]);
		it("tab, cr, lf, vt, ff", () => {
		expectMatch("\\t", ["\t"]);
		expectMatch("\\r", ["\r"]);
		expectMatch("\\n", ["\n"]);
		expectMatch("\\v", ["\v"]);
		expectMatch("\\f", ["\f"]);
		expectNotMatch("\\t", ["a", " ", ""]);
		});

		it("supports case insensitive matching", () => {
		// simple ranges
		expectMatch("[a-c]", ["A", "C", "a", "c"], "i");
		expectNotMatch("[a-c]", ["D", "d"], "i");
		// complex
		expectMatch("[W-c]", ["W", "w", "C", "c"], "i");
		expectNotMatch("[W-c]", ["V", "v", "D", "d"], "i");
		it("escaped dot", () => {
		expectMatch("\\.", ["."]);
		expectNotMatch("\\.", ["", "a"]);
		});

		it("unrecognised character classes are treated as characters", () => {
		expectMatch("\\g\\m", ["gm"]);
		});

assembly/char.ts

		@@ -19,2 +19,3 @@ export const enum Char {
		Nine = 0x39,
		Colon = 0x3a,
		Question = 0x3f, // "?"
		@@ -21,0 +22,0 @@ A = 0x41,

assembly/nfa/matcher.ts

		@@ -5,4 +5,4 @@ import { isDigit, isAlpha, isWhitespace, Char } from "../char";
		CharacterNode,
		CharacterClassNode,
		CharacterSetNode,
		CharacterClassNode,
		CharacterRangeNode,
		@@ -17,4 +17,4 @@ NodeType,
		CharacterRange,
		CharacterSet,
		CharacterClass,
		CharacterSet,
		}
		@@ -32,6 +32,6 @@
		static fromCharacterClassNode(
		node: CharacterClassNode,
		node: CharacterSetNode,
		flags: Flags
		): CharacterClassMatcher {
		return new CharacterClassMatcher(node.charClass, flags.dotAll);
		): CharacterSetMatcher {
		return new CharacterSetMatcher(node.charClass, flags.dotAll);
		}
		@@ -50,5 +50,5 @@
		static fromCharacterSetNode(
		node: CharacterSetNode,
		node: CharacterClassNode,
		flags: Flags
		): CharacterSetMatcher {
		): CharacterClassMatcher {
		_flags = flags;
		@@ -64,5 +64,5 @@ const matchers = node.expressions.map<Matcher>((exp) => {
		return Matcher.fromCharacterNode(exp as CharacterNode, _flags);
		case NodeType.CharacterClass:
		case NodeType.CharacterSet:
		return Matcher.fromCharacterClassNode(
		exp as CharacterClassNode,
		exp as CharacterSetNode,
		_flags
		@@ -74,3 +74,3 @@ );
		});
		return new CharacterSetMatcher(matchers, node.negated);
		return new CharacterClassMatcher(matchers, node.negated);
		}
		@@ -135,5 +135,5 @@

		export class CharacterClassMatcher extends Matcher {
		export class CharacterSetMatcher extends Matcher {
		constructor(public charClass: Char, private dotAll: bool) {
		super(MatcherType.CharacterClass);
		super(MatcherType.CharacterSet);
		}
		@@ -181,5 +181,5 @@

		export class CharacterSetMatcher extends Matcher {
		export class CharacterClassMatcher extends Matcher {
		constructor(public matchers: Matcher[], public negated: bool) {
		super(MatcherType.CharacterSet);
		super(MatcherType.CharacterClass);
		}
		@@ -200,9 +200,9 @@

		case MatcherType.CharacterSet:
		match = (matcher as CharacterSetMatcher).matches(code);
		break;

		case MatcherType.CharacterClass:
		match = (matcher as CharacterClassMatcher).matches(code);
		break;

		case MatcherType.CharacterSet:
		match = (matcher as CharacterSetMatcher).matches(code);
		break;
		}
		@@ -209,0 +209,0 @@ if (match) break;

assembly/nfa/nfa.ts

		@@ -8,4 +8,4 @@ import {
		AlternationNode,
		CharacterClassNode,
		CharacterSetNode,
		CharacterClassNode,
		GroupNode,
		@@ -46,3 +46,3 @@ NodeType,

		constructor(next: State, public groupId: i32) {
		constructor(next: State, public capturing: bool, public groupId: i32) {
		super();
		@@ -65,6 +65,8 @@ this.transitions.push(next);
		matches(input: string, position: u32): MatchResult {
		this.startMarker.capture = input.substring(
		this.startMarker.location,
		position
		);
		if (this.startMarker.capturing) {
		this.startMarker.capture = input.substring(
		this.startMarker.location,
		position
		);
		}
		return MatchResult.Ignore;
		@@ -170,6 +172,6 @@ }

		function group(nfa: Automata, id: i32): Automata {
		function group(nfa: Automata, capturing: bool, id: i32): Automata {
		// groups are implemented by wrapping the automata with
		// a pair of markers that record matches
		const startMarker = new GroupStartMarkerState(nfa.start, id);
		const startMarker = new GroupStartMarkerState(nfa.start, capturing, id);
		const end = new State();
		@@ -229,13 +231,13 @@ const endMarker = new GroupEndMarkerState(end, startMarker);
		}
		case NodeType.CharacterSet:
		case NodeType.CharacterClass:
		return Automata.fromMatcher(
		Matcher.fromCharacterSetNode(
		expression as CharacterSetNode,
		expression as CharacterClassNode,
		this.flags
		)
		);
		case NodeType.CharacterClass:
		case NodeType.CharacterSet:
		return Automata.fromMatcher(
		Matcher.fromCharacterClassNode(
		expression as CharacterClassNode,
		expression as CharacterSetNode,
		this.flags
		@@ -246,3 +248,7 @@ )
		const node = expression as GroupNode;
		return group(this.automataForNode(node.expression), node.id);
		return group(
		this.automataForNode(node.expression),
		node.capturing,
		node.id
		);
		}
		@@ -249,0 +255,0 @@ case NodeType.Assertion:

assembly/parser/node.ts

		@@ -10,4 +10,4 @@ import { Char } from "../char";
		Character,
		CharacterClass,
		CharacterSet,
		CharacterClass,
		CharacterRange,
		@@ -76,9 +76,9 @@ Repetition,

		export class CharacterSetNode extends Node {
		export class CharacterClassNode extends Node {
		constructor(public expressions: Node[], public negated: bool) {
		super(NodeType.CharacterSet);
		super(NodeType.CharacterClass);
		}

		clone(): Node {
		return new CharacterSetNode(
		return new CharacterClassNode(
		this.expressions.slice(0).map<Node>((s) => s.clone()),
		@@ -131,9 +131,9 @@ this.negated

		export class CharacterClassNode extends Node {
		export class CharacterSetNode extends Node {
		constructor(public charClass: Char) {
		super(NodeType.CharacterClass);
		super(NodeType.CharacterSet);
		}

		clone(): Node {
		return new CharacterClassNode(this.charClass);
		return new CharacterSetNode(this.charClass);
		}
		@@ -215,3 +215,7 @@ }
		export class GroupNode extends Node {
		constructor(public expression: Node, public id: i32 = -1) {
		constructor(
		public expression: Node,
		public capturing: bool,
		public id: i32 = -1
		) {
		super(NodeType.Group);
		@@ -228,3 +232,3 @@ if (id == -1) {
		clone(): Node {
		return new GroupNode(this.expression.clone(), this.id);
		return new GroupNode(this.expression.clone(), this.capturing, this.id);
		}
		@@ -231,0 +235,0 @@

assembly/parser/parser.ts

		@@ -8,3 +8,3 @@ import { isDigit, Char, isHexadecimalDigit } from "../char";
		AssertionNode,
		CharacterClassNode,
		CharacterSetNode,
		CharacterNode,
		@@ -15,3 +15,3 @@ Node,
		RepetitionNode,
		CharacterSetNode,
		CharacterClassNode,
		CharacterRangeNode,
		@@ -143,3 +143,3 @@ } from "./node";
		} else if (isCharacterClass(token)) {
		return new CharacterClassNode(this.eatToken());
		return new CharacterSetNode(this.eatToken());
		} else {
		@@ -156,3 +156,3 @@ return new CharacterNode(this.eatToken());
		this.eatToken(Char.Dot);
		return new CharacterClassNode(Char.Dot);
		return new CharacterSetNode(Char.Dot);
		}
		@@ -211,2 +211,14 @@

		private isCapturing(): bool {
		if (
		this.iterator.current == Char.Question &&
		this.iterator.lookahead(1) == Char.Colon
		) {
		this.eatToken(Char.Question);
		this.eatToken(Char.Colon);
		return false;
		}
		return true;
		}

		// parses a sequence of chars
		@@ -226,3 +238,4 @@ private parseSequence(): Node {
		this.eatToken(Char.LeftParenthesis);
		nodes.push(new GroupNode(this.parseSequence()));
		const capturing = this.isCapturing();
		nodes.push(new GroupNode(this.parseSequence(), capturing));
		this.eatToken(Char.RightParenthesis);
		@@ -252,3 +265,3 @@ // @ts-ignore
		} else if (token == Char.LeftSquareBracket) {
		nodes.push(this.parseCharacterSet());
		nodes.push(this.parseCharacterClass());
		} else {
		@@ -269,3 +282,3 @@ nodes.push(this.parseCharacter());

		private parseCharacterSet(): CharacterSetNode {
		private parseCharacterClass(): CharacterClassNode {
		this.eatToken(Char.LeftSquareBracket);
		@@ -299,3 +312,3 @@
		// otherwise this is a character class
		nodes.push(new CharacterClassNode(this.eatToken()));
		nodes.push(new CharacterSetNode(this.eatToken()));
		}
		@@ -312,4 +325,4 @@ } else {
		this.eatToken(Char.RightSquareBracket);
		return new CharacterSetNode(nodes, negated);
		return new CharacterClassNode(nodes, negated);
		}
		}

assembly/regexp.ts

		@@ -92,5 +92,5 @@ import { State, Automata, GroupStartMarkerState, MatchResult } from "./nfa/nfa";
		// the value of the current state of the string being matched.
		// Repeated capture groups, via rage repetitions (e.g. {2,3}) share the same 'id'. The
		// Repeated capture groups, via range repetitions (e.g. {2,3}) share the same 'id'. The
		// returned regex should only return the value of the final repetition.
		function filterCaptures(groupMarkers: GroupStartMarkerState[]): string[] {
		function lastCapturesForGroup(groupMarkers: GroupStartMarkerState[]): string[] {
		if (!groupMarkers.length) {
		@@ -143,3 +143,6 @@ return [];
		if (state instanceof GroupStartMarkerState) {
		gm.push(state as GroupStartMarkerState);
		const startMarker = state as GroupStartMarkerState;
		if (startMarker.capturing) {
		gm.push(state as GroupStartMarkerState);
		}
		}
		@@ -186,3 +189,3 @@ });
		const match = new Match(
		[matchStr!].concat(filterCaptures(groupMarkers)),
		[matchStr!].concat(lastCapturesForGroup(groupMarkers)),
		matchIndex,
		@@ -189,0 +192,0 @@ str

package.json

		{
		"name": "assemblyscript-regex",
		"version": "1.5.0",
		"version": "1.6.0",
		"description": "A regex engine built with AssemblyScript",
		@@ -5,0 +5,0 @@ "ascMain": "assembly/index.ts",

README.md

		@@ -36,3 +36,3 @@ # assemblyscript-regex

		Character classes
		Character sets

		@@ -81,3 +81,3 @@ - [x] .
		- [ ] (?<Name>x) named capturing group
		- [ ] (?:x) Non-capturing group
		- [x] (?:x) Non-capturing group

		@@ -84,0 +84,0 @@ Quantifiers

spec/test-generator.js

		@@ -25,2 +25,3 @@ const fs = require("fs");
		],
		"peformance issue": [1313, 1314],

		@@ -51,3 +52,3 @@ /* -------- issues with the tests ------------ */
		"test regex contains syntax not supported in JS": [82, 1158, 281],
		"the test behaviour differs between PCRE and JS": [290],
		"the test behaviour differs between PCRE and JS": [290, 1278],
		};
		@@ -113,7 +114,17 @@

		if (["(?"].some((f) => regex.includes(f))) {
		testCase += `xit("line: ${index} - non capturing groups not supported", () => {});`;
		if (["(?!", "(?="].some((f) => regex.includes(f))) {
		testCase += `xit("line: ${index} - lookaheads not supported", () => {});`;
		return;
		}

		if (["(?m", "(?s", "(?ms"].some((f) => regex.includes(f))) {
		testCase += `xit("line: ${index} - JS regex does not support mode modifiers", () => {});`;
		return;
		}

		if (["(?#"].some((f) => regex.includes(f))) {
		testCase += `xit("line: ${index} - JS regex does not support comments", () => {});`;
		return;
		}

		if (regex.match(/\\\\\d{1}/)) {
		@@ -120,0 +131,0 @@ testCase += `xit("line: ${index} - back references are not supported", () => {});`;

ts/index.ts

		@@ -8,11 +8,6 @@ import "assemblyscript/std/portable/index";

		const regexObj = new RegExp("abc$", "m");
		let match = regexObj.exec("abc\n");
		const regexObj = new RegExp("word (?:[a-zA-Z0-9]+ ){0,300}otherword", "");
		let match = regexObj.exec(
		"word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope"
		);
		console.log(JSON.stringify(match, null, 2));
		// match = regexObj.exec("f1\nbar\nbaz\nf2");
		// console.log(JSON.stringify(match, null, 2));

		// const regex = new RegExp("^f\\d{1}$", "gm");

		// let match = regex.exec("f1\nbar\nbaz\nf2");
		// expect(match!.matches[0]).toBe("f1");

assembly/__spec_tests__/generated.spec.ts

Sorry, the diff of this file is too big to display

assemblyscript-regex - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics