Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

assemblyscript-regex

Package Overview
Dependencies
Maintainers
1
Versions
13
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

assemblyscript-regex - npm Package Compare versions

Comparing version 1.5.0 to 1.6.0

6

assembly/__tests__/capture-group.spec.ts

@@ -48,1 +48,7 @@ import { expectMatch, expectNotMatch, exec } from "./utils";

});
it("non-capturing groups should not capture", () => {
const match = exec("(?:foo)bar(baz)", "foobarbaz");
expect(match.matches[0]).toBe("foobarbaz");
expect(match.matches[1]).toBe("baz");
});

73

assembly/__tests__/character-classes.spec.ts

@@ -1,55 +0,54 @@

import { RegExp } from "..";
import { expectMatch, expectNotMatch, exec } from "./utils";
import { expectMatch, expectNotMatch } from "./utils";
it("dot", () => {
expectMatch(".", [" ", "B", "|", "9"]);
expectNotMatch(".", ["", "\n"]);
it("throws an error if no closing bracket is found", () => {
// expect(() => new RegExp("[abce")).toThrow();
});
it("digit", () => {
expectMatch("\\d", ["0", "9"]);
expectNotMatch("\\d", ["", "b"]);
it("matches discrete characters", () => {
expectMatch("[abce]", ["a", "b", "c", "e"]);
expectNotMatch("[abce]", ["", "f", "h"]);
});
it("non-digit", () => {
expectNotMatch("\\D", ["0", "9", ""]);
expectMatch("\\D", ["b", "|"]);
it("matches character ranges", () => {
expectMatch("[a-c]", ["a", "b", "c"]);
expectNotMatch("[a-c]", ["d", "e", ""]);
expectMatch("[K-M]", ["K", "L", "M"]);
expectNotMatch("[K-M]", ["9", "J"]);
expectMatch("[0-9]", ["0", "9"]);
expectNotMatch("[0-9]", ["a", "A"]);
});
it("word", () => {
expectMatch("\\w", ["A", "a", "Z", "z", "0", "9", "_"]);
expectNotMatch("\\w", ["", "$"]);
it("matches multiple ranges", () => {
expectMatch("[a-ce-f]", ["a", "b", "c", "e", "f"]);
expectNotMatch("[a-ce-f]", ["d"]);
});
it("not word", () => {
expectNotMatch("\\W", ["A", "a", "Z", "z", "0", "9", "_", ""]);
expectMatch("\\W", ["&", "$"]);
it("supports closing brackets", () => {
expectMatch("[]a]", ["]", "a"]);
});
it("whitespace", () => {
expectMatch("\\s", ["\f", "\n", "\r", "\t", "\v"]);
expectNotMatch("\\s", ["", "a", "0"]);
it("supports negated sets", () => {
expectNotMatch("[^a-c]", ["a", "b", "c"]);
expectMatch("[^a-c]", ["d", "e"]);
expectNotMatch("[^a-ce-f]", ["a", "b", "c", "e", "f"]);
expectMatch("[^a-ce-f]", ["d"]);
});
it("not whitespace", () => {
expectNotMatch("\\S", ["", "\f", "\n", "\r", "\t", "\v"]);
expectMatch("\\S", ["a", "0"]);
it("treats - as a literal", () => {
expectMatch("[-abc]", ["-", "a", "b", "c"]);
expectMatch("[abc-]", ["-", "a", "b", "c"]);
});
it("tab, cr, lf, vt, ff", () => {
expectMatch("\\t", ["\t"]);
expectMatch("\\r", ["\r"]);
expectMatch("\\n", ["\n"]);
expectMatch("\\v", ["\v"]);
expectMatch("\\f", ["\f"]);
expectNotMatch("\\t", ["a", " ", ""]);
it("treats - as a literal in negated sets", () => {
expectNotMatch("[^-abc]", ["-", "a", "b", "c"]);
expectMatch("[^-abc]", ["1", "A"]);
});
it("escaped dot", () => {
expectMatch("\\.", ["."]);
expectNotMatch("\\.", ["", "a"]);
it("supports case insensitive matching", () => {
// simple ranges
expectMatch("[a-c]", ["A", "C", "a", "c"], "i");
expectNotMatch("[a-c]", ["D", "d"], "i");
// complex
expectMatch("[W-c]", ["W", "w", "C", "c"], "i");
expectNotMatch("[W-c]", ["V", "v", "D", "d"], "i");
});
it("unrecognised character classes are treated as characters", () => {
expectMatch("\\g\\m", ["gm"]);
});

@@ -1,54 +0,55 @@

import { expectMatch, expectNotMatch } from "./utils";
import { RegExp } from "..";
import { expectMatch, expectNotMatch, exec } from "./utils";
it("throws an error if no closing bracket is found", () => {
// expect(() => new RegExp("[abce")).toThrow();
it("dot", () => {
expectMatch(".", [" ", "B", "|", "9"]);
expectNotMatch(".", ["", "\n"]);
});
it("matches discrete characters", () => {
expectMatch("[abce]", ["a", "b", "c", "e"]);
expectNotMatch("[abce]", ["", "f", "h"]);
it("digit", () => {
expectMatch("\\d", ["0", "9"]);
expectNotMatch("\\d", ["", "b"]);
});
it("matches character ranges", () => {
expectMatch("[a-c]", ["a", "b", "c"]);
expectNotMatch("[a-c]", ["d", "e", ""]);
expectMatch("[K-M]", ["K", "L", "M"]);
expectNotMatch("[K-M]", ["9", "J"]);
expectMatch("[0-9]", ["0", "9"]);
expectNotMatch("[0-9]", ["a", "A"]);
it("non-digit", () => {
expectNotMatch("\\D", ["0", "9", ""]);
expectMatch("\\D", ["b", "|"]);
});
it("matches multiple ranges", () => {
expectMatch("[a-ce-f]", ["a", "b", "c", "e", "f"]);
expectNotMatch("[a-ce-f]", ["d"]);
it("word", () => {
expectMatch("\\w", ["A", "a", "Z", "z", "0", "9", "_"]);
expectNotMatch("\\w", ["", "$"]);
});
it("supports closing brackets", () => {
expectMatch("[]a]", ["]", "a"]);
it("not word", () => {
expectNotMatch("\\W", ["A", "a", "Z", "z", "0", "9", "_", ""]);
expectMatch("\\W", ["&", "$"]);
});
it("supports negated sets", () => {
expectNotMatch("[^a-c]", ["a", "b", "c"]);
expectMatch("[^a-c]", ["d", "e"]);
expectNotMatch("[^a-ce-f]", ["a", "b", "c", "e", "f"]);
expectMatch("[^a-ce-f]", ["d"]);
it("whitespace", () => {
expectMatch("\\s", ["\f", "\n", "\r", "\t", "\v"]);
expectNotMatch("\\s", ["", "a", "0"]);
});
it("treats - as a literal", () => {
expectMatch("[-abc]", ["-", "a", "b", "c"]);
expectMatch("[abc-]", ["-", "a", "b", "c"]);
it("not whitespace", () => {
expectNotMatch("\\S", ["", "\f", "\n", "\r", "\t", "\v"]);
expectMatch("\\S", ["a", "0"]);
});
it("treats - as a literal in negated sets", () => {
expectNotMatch("[^-abc]", ["-", "a", "b", "c"]);
expectMatch("[^-abc]", ["1", "A"]);
it("tab, cr, lf, vt, ff", () => {
expectMatch("\\t", ["\t"]);
expectMatch("\\r", ["\r"]);
expectMatch("\\n", ["\n"]);
expectMatch("\\v", ["\v"]);
expectMatch("\\f", ["\f"]);
expectNotMatch("\\t", ["a", " ", ""]);
});
it("supports case insensitive matching", () => {
// simple ranges
expectMatch("[a-c]", ["A", "C", "a", "c"], "i");
expectNotMatch("[a-c]", ["D", "d"], "i");
// complex
expectMatch("[W-c]", ["W", "w", "C", "c"], "i");
expectNotMatch("[W-c]", ["V", "v", "D", "d"], "i");
it("escaped dot", () => {
expectMatch("\\.", ["."]);
expectNotMatch("\\.", ["", "a"]);
});
it("unrecognised character classes are treated as characters", () => {
expectMatch("\\g\\m", ["gm"]);
});

@@ -19,2 +19,3 @@ export const enum Char {

Nine = 0x39,
Colon = 0x3a,
Question = 0x3f, // "?"

@@ -21,0 +22,0 @@ A = 0x41,

@@ -5,4 +5,4 @@ import { isDigit, isAlpha, isWhitespace, Char } from "../char";

CharacterNode,
CharacterClassNode,
CharacterSetNode,
CharacterClassNode,
CharacterRangeNode,

@@ -17,4 +17,4 @@ NodeType,

CharacterRange,
CharacterSet,
CharacterClass,
CharacterSet,
}

@@ -32,6 +32,6 @@

static fromCharacterClassNode(
node: CharacterClassNode,
node: CharacterSetNode,
flags: Flags
): CharacterClassMatcher {
return new CharacterClassMatcher(node.charClass, flags.dotAll);
): CharacterSetMatcher {
return new CharacterSetMatcher(node.charClass, flags.dotAll);
}

@@ -50,5 +50,5 @@

static fromCharacterSetNode(
node: CharacterSetNode,
node: CharacterClassNode,
flags: Flags
): CharacterSetMatcher {
): CharacterClassMatcher {
_flags = flags;

@@ -64,5 +64,5 @@ const matchers = node.expressions.map<Matcher>((exp) => {

return Matcher.fromCharacterNode(exp as CharacterNode, _flags);
case NodeType.CharacterClass:
case NodeType.CharacterSet:
return Matcher.fromCharacterClassNode(
exp as CharacterClassNode,
exp as CharacterSetNode,
_flags

@@ -74,3 +74,3 @@ );

});
return new CharacterSetMatcher(matchers, node.negated);
return new CharacterClassMatcher(matchers, node.negated);
}

@@ -135,5 +135,5 @@

export class CharacterClassMatcher extends Matcher {
export class CharacterSetMatcher extends Matcher {
constructor(public charClass: Char, private dotAll: bool) {
super(MatcherType.CharacterClass);
super(MatcherType.CharacterSet);
}

@@ -181,5 +181,5 @@

export class CharacterSetMatcher extends Matcher {
export class CharacterClassMatcher extends Matcher {
constructor(public matchers: Matcher[], public negated: bool) {
super(MatcherType.CharacterSet);
super(MatcherType.CharacterClass);
}

@@ -200,9 +200,9 @@

case MatcherType.CharacterSet:
match = (matcher as CharacterSetMatcher).matches(code);
break;
case MatcherType.CharacterClass:
match = (matcher as CharacterClassMatcher).matches(code);
break;
case MatcherType.CharacterSet:
match = (matcher as CharacterSetMatcher).matches(code);
break;
}

@@ -209,0 +209,0 @@ if (match) break;

@@ -8,4 +8,4 @@ import {

AlternationNode,
CharacterClassNode,
CharacterSetNode,
CharacterClassNode,
GroupNode,

@@ -46,3 +46,3 @@ NodeType,

constructor(next: State, public groupId: i32) {
constructor(next: State, public capturing: bool, public groupId: i32) {
super();

@@ -65,6 +65,8 @@ this.transitions.push(next);

matches(input: string, position: u32): MatchResult {
this.startMarker.capture = input.substring(
this.startMarker.location,
position
);
if (this.startMarker.capturing) {
this.startMarker.capture = input.substring(
this.startMarker.location,
position
);
}
return MatchResult.Ignore;

@@ -170,6 +172,6 @@ }

function group(nfa: Automata, id: i32): Automata {
function group(nfa: Automata, capturing: bool, id: i32): Automata {
// groups are implemented by wrapping the automata with
// a pair of markers that record matches
const startMarker = new GroupStartMarkerState(nfa.start, id);
const startMarker = new GroupStartMarkerState(nfa.start, capturing, id);
const end = new State();

@@ -229,13 +231,13 @@ const endMarker = new GroupEndMarkerState(end, startMarker);

}
case NodeType.CharacterSet:
case NodeType.CharacterClass:
return Automata.fromMatcher(
Matcher.fromCharacterSetNode(
expression as CharacterSetNode,
expression as CharacterClassNode,
this.flags
)
);
case NodeType.CharacterClass:
case NodeType.CharacterSet:
return Automata.fromMatcher(
Matcher.fromCharacterClassNode(
expression as CharacterClassNode,
expression as CharacterSetNode,
this.flags

@@ -246,3 +248,7 @@ )

const node = expression as GroupNode;
return group(this.automataForNode(node.expression), node.id);
return group(
this.automataForNode(node.expression),
node.capturing,
node.id
);
}

@@ -249,0 +255,0 @@ case NodeType.Assertion:

@@ -10,4 +10,4 @@ import { Char } from "../char";

Character,
CharacterClass,
CharacterSet,
CharacterClass,
CharacterRange,

@@ -76,9 +76,9 @@ Repetition,

export class CharacterSetNode extends Node {
export class CharacterClassNode extends Node {
constructor(public expressions: Node[], public negated: bool) {
super(NodeType.CharacterSet);
super(NodeType.CharacterClass);
}
clone(): Node {
return new CharacterSetNode(
return new CharacterClassNode(
this.expressions.slice(0).map<Node>((s) => s.clone()),

@@ -131,9 +131,9 @@ this.negated

export class CharacterClassNode extends Node {
export class CharacterSetNode extends Node {
constructor(public charClass: Char) {
super(NodeType.CharacterClass);
super(NodeType.CharacterSet);
}
clone(): Node {
return new CharacterClassNode(this.charClass);
return new CharacterSetNode(this.charClass);
}

@@ -215,3 +215,7 @@ }

export class GroupNode extends Node {
constructor(public expression: Node, public id: i32 = -1) {
constructor(
public expression: Node,
public capturing: bool,
public id: i32 = -1
) {
super(NodeType.Group);

@@ -228,3 +232,3 @@ if (id == -1) {

clone(): Node {
return new GroupNode(this.expression.clone(), this.id);
return new GroupNode(this.expression.clone(), this.capturing, this.id);
}

@@ -231,0 +235,0 @@

@@ -8,3 +8,3 @@ import { isDigit, Char, isHexadecimalDigit } from "../char";

AssertionNode,
CharacterClassNode,
CharacterSetNode,
CharacterNode,

@@ -15,3 +15,3 @@ Node,

RepetitionNode,
CharacterSetNode,
CharacterClassNode,
CharacterRangeNode,

@@ -143,3 +143,3 @@ } from "./node";

} else if (isCharacterClass(token)) {
return new CharacterClassNode(this.eatToken());
return new CharacterSetNode(this.eatToken());
} else {

@@ -156,3 +156,3 @@ return new CharacterNode(this.eatToken());

this.eatToken(Char.Dot);
return new CharacterClassNode(Char.Dot);
return new CharacterSetNode(Char.Dot);
}

@@ -211,2 +211,14 @@

private isCapturing(): bool {
if (
this.iterator.current == Char.Question &&
this.iterator.lookahead(1) == Char.Colon
) {
this.eatToken(Char.Question);
this.eatToken(Char.Colon);
return false;
}
return true;
}
// parses a sequence of chars

@@ -226,3 +238,4 @@ private parseSequence(): Node {

this.eatToken(Char.LeftParenthesis);
nodes.push(new GroupNode(this.parseSequence()));
const capturing = this.isCapturing();
nodes.push(new GroupNode(this.parseSequence(), capturing));
this.eatToken(Char.RightParenthesis);

@@ -252,3 +265,3 @@ // @ts-ignore

} else if (token == Char.LeftSquareBracket) {
nodes.push(this.parseCharacterSet());
nodes.push(this.parseCharacterClass());
} else {

@@ -269,3 +282,3 @@ nodes.push(this.parseCharacter());

private parseCharacterSet(): CharacterSetNode {
private parseCharacterClass(): CharacterClassNode {
this.eatToken(Char.LeftSquareBracket);

@@ -299,3 +312,3 @@

// otherwise this is a character class
nodes.push(new CharacterClassNode(this.eatToken()));
nodes.push(new CharacterSetNode(this.eatToken()));
}

@@ -312,4 +325,4 @@ } else {

this.eatToken(Char.RightSquareBracket);
return new CharacterSetNode(nodes, negated);
return new CharacterClassNode(nodes, negated);
}
}

@@ -92,5 +92,5 @@ import { State, Automata, GroupStartMarkerState, MatchResult } from "./nfa/nfa";

// the value of the current state of the string being matched.
// Repeated capture groups, via rage repetitions (e.g. {2,3}) share the same 'id'. The
// Repeated capture groups, via range repetitions (e.g. {2,3}) share the same 'id'. The
// returned regex should only return the value of the final repetition.
function filterCaptures(groupMarkers: GroupStartMarkerState[]): string[] {
function lastCapturesForGroup(groupMarkers: GroupStartMarkerState[]): string[] {
if (!groupMarkers.length) {

@@ -143,3 +143,6 @@ return [];

if (state instanceof GroupStartMarkerState) {
gm.push(state as GroupStartMarkerState);
const startMarker = state as GroupStartMarkerState;
if (startMarker.capturing) {
gm.push(state as GroupStartMarkerState);
}
}

@@ -186,3 +189,3 @@ });

const match = new Match(
[matchStr!].concat(filterCaptures(groupMarkers)),
[matchStr!].concat(lastCapturesForGroup(groupMarkers)),
matchIndex,

@@ -189,0 +192,0 @@ str

{
"name": "assemblyscript-regex",
"version": "1.5.0",
"version": "1.6.0",
"description": "A regex engine built with AssemblyScript",

@@ -5,0 +5,0 @@ "ascMain": "assembly/index.ts",

@@ -36,3 +36,3 @@ # assemblyscript-regex

**Character classes**
**Character sets**

@@ -81,3 +81,3 @@ - [x] .

- [ ] (?<Name>x) named capturing group
- [ ] (?:x) Non-capturing group
- [x] (?:x) Non-capturing group

@@ -84,0 +84,0 @@ **Quantifiers**

@@ -25,2 +25,3 @@ const fs = require("fs");

],
"peformance issue": [1313, 1314],

@@ -51,3 +52,3 @@ /* -------- issues with the tests ------------ */

"test regex contains syntax not supported in JS": [82, 1158, 281],
"the test behaviour differs between PCRE and JS": [290],
"the test behaviour differs between PCRE and JS": [290, 1278],
};

@@ -113,7 +114,17 @@

if (["(?"].some((f) => regex.includes(f))) {
testCase += `xit("line: ${index} - non capturing groups not supported", () => {});`;
if (["(?!", "(?="].some((f) => regex.includes(f))) {
testCase += `xit("line: ${index} - lookaheads not supported", () => {});`;
return;
}
if (["(?m", "(?s", "(?ms"].some((f) => regex.includes(f))) {
testCase += `xit("line: ${index} - JS regex does not support mode modifiers", () => {});`;
return;
}
if (["(?#"].some((f) => regex.includes(f))) {
testCase += `xit("line: ${index} - JS regex does not support comments", () => {});`;
return;
}
if (regex.match(/\\\\\d{1}/)) {

@@ -120,0 +131,0 @@ testCase += `xit("line: ${index} - back references are not supported", () => {});`;

@@ -8,11 +8,6 @@ import "assemblyscript/std/portable/index";

const regexObj = new RegExp("abc$", "m");
let match = regexObj.exec("abc\n");
const regexObj = new RegExp("word (?:[a-zA-Z0-9]+ ){0,300}otherword", "");
let match = regexObj.exec(
"word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope"
);
console.log(JSON.stringify(match, null, 2));
// match = regexObj.exec("f1\nbar\nbaz\nf2");
// console.log(JSON.stringify(match, null, 2));
// const regex = new RegExp("^f\\d{1}$", "gm");
// let match = regex.exec("f1\nbar\nbaz\nf2");
// expect(match!.matches[0]).toBe("f1");

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc