Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

assemblyscript-regex

Package Overview
Dependencies
Maintainers
1
Versions
13
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

assemblyscript-regex - npm Package Compare versions

Comparing version 1.2.0 to 1.3.0

.prettierignore

6

assembly/__tests__/capture-group.spec.ts

@@ -36,1 +36,7 @@ import { expectMatch, expectNotMatch, exec } from "./utils";

});
it("repeated capture groups should return the last match", () => {
const match = exec("([a-c])+", "ac");
expect(match.matches[0]).toBe("ac");
expect(match.matches[1]).toBe("c");
});

12

assembly/__tests__/quantifiers.spec.ts

@@ -54,8 +54,8 @@ import { expectMatch, expectNotMatch, exec } from "./utils";

it("zero or one supports non-greedy mode", () => {
expectMatch("a?", ["a"]);
let match = exec("a?", "bc");
expect(match).not.toBeNull();
expect(match.matches[0]).toStrictEqual("");
});
// it("zero or one supports non-greedy mode", () => {
// expectMatch("a?", ["a"]);
// let match = exec("a??", "bc");
// expect(match).not.toBeNull();
// expect(match.matches[0]).toStrictEqual("");
// });
});

@@ -43,2 +43,7 @@ /* eslint-disable no-invalid-regexp */

it("handles nongreedy quantifiers", () => {
const match = exec("a{2,4}?", "aaaaaaaaaa");
expect(match.matches[0]).toBe("aa");
});
it("throws if quantifying a quantifier!", () => {

@@ -45,0 +50,0 @@ expect(() => {

@@ -8,2 +8,3 @@ export const enum Char {

LineFeed = 0x0a,
Space = 0x20,
Dollar = 0x24, // "$"

@@ -18,2 +19,3 @@ LeftParenthesis = 0x28,

Zero = 0x30,
Nine = 0x39,
Question = 0x3f, // "?"

@@ -24,2 +26,3 @@ A = 0x41,

W = 0x57,
Z = 0x5a,
LeftSquareBracket = 0x5b, // "["

@@ -41,36 +44,54 @@ Backslash = 0x5c, // "\"

x = 0x78,
z = 0x7a,
LeftCurlyBrace = 0x7b /* { */,
VerticalBar = 0x7c /* | */,
RightCurlyBrace = 0x7d /* */,
RightCurlyBrace = 0x7d /* { */,
NonBreakingSpace = 0xa0,
}
// @ts-ignore
@inline
function inRange(value: u32, from: u32, to: u32): bool {
if (ASC_TARGET == 1) {
// makes use of unsigned integer operations, making this
// approach a little faster when compiled to WASM
return value - from < (to - from + 1);
} else {
return value >= from && value <= to;
}
}
export function isDigit(code: u32): bool {
return code - Char.Zero < 10;
return inRange(code, Char.Zero, Char.Nine);
}
export function isHexadecimalDigit(code: u32): bool {
return isDigit(code) || code - Char.a < 6;
return isDigit(code) || inRange(code, Char.a, Char.f);
}
export function isLowercaseAlpha(code: u32): bool {
return code - Char.a < 26;
return inRange(code, Char.a, Char.z);
}
export function isUppercaseAlpha(code: u32): bool {
return code - Char.A < 26;
return inRange(code, Char.A, Char.Z);
}
export function isAlpha(code: u32): bool {
return (code | 32) - Char.a < 26;
if (ASC_TARGET == 1) {
return (code | 32) - Char.a < 26;
} else {
return inRange(code, Char.a, Char.z) || inRange(code, Char.A, Char.Z);
}
}
export function isWhitespace(code: u32): bool {
if (code < 0x1680) {
// < <LS> (1)
// <SP>, <TAB>, <LF>, <VT>, <FF>, <CR> and <NBSP>
// @ts-ignore: cast
return ((code | 0x80) == 0xa0) | (code - 0x09 <= 0x0d - 0x09);
}
if (code - 0x2000 <= 0x200a - 0x2000) return true;
switch (code) {
case Char.Space:
case Char.HorizontalTab:
case Char.VerticalTab:
case Char.FormFeed:
case Char.LineFeed:
case Char.CarriageReturn:
case Char.NonBreakingSpace:
case 0x1680: // <LS> (1)

@@ -85,3 +106,6 @@ case 0x2028: // <LS> (2)

}
if (inRange(code, 0x2000, 0x200a)) {
return true;
}
return false;
}

@@ -137,7 +137,12 @@ import {

function zeroOrOne(nfa: Automata): Automata {
function zeroOrOne(nfa: Automata, greedy: bool): Automata {
const start = new State();
const end = new State();
start.transitions.push(nfa.start);
start.transitions.push(end);
if (greedy) {
start.transitions.push(nfa.start);
start.transitions.push(end);
} else {
start.transitions.push(end);
start.transitions.push(nfa.start);
}
nfa.end.transitions.push(end);

@@ -186,3 +191,3 @@ return new Automata(start, end);

if (quantifier == Char.Question) {
return zeroOrOne(automata);
return zeroOrOne(automata, node.greedy);
} else if (quantifier == Char.Plus) {

@@ -189,0 +194,0 @@ return oneOrMore(automata, node.greedy);

@@ -8,4 +8,4 @@ import { State } from "./nfa";

): void {
if (visited.includes(state)) return;
visitor(state);
if (visited.includes(state)) return;
visited.push(state);

@@ -12,0 +12,0 @@ const nextStates = state.transitions;

@@ -158,3 +158,8 @@ import { Char } from "../char";

export class RangeRepetitionNode extends Node {
constructor(public expression: Node, public from: i32, public to: i32) {
constructor(
public expression: Node,
public from: i32,
public to: i32,
public greedy: bool = true
) {
super(NodeType.RangeRepetition);

@@ -161,0 +166,0 @@ if (expression.type == NodeType.RangeRepetition) {

@@ -77,4 +77,3 @@ import { isDigit, Char, isHexadecimalDigit } from "../char";

class Range {
from: i32 = -1;
to: i32 = -1;
constructor(public from: i32, public to: i32) {}
}

@@ -161,60 +160,50 @@

private maybeParseRepetitionRange(): Range | null {
// snapshot
const iteratorCopy = this.iterator.copy();
this.eatToken(Char.LeftCurlyBrace);
let range = new Range();
let firstDigit = true;
private maybeParseDigit(): i32 {
let digitStr = "";
while (this.iterator.more()) {
const token = this.iterator.current;
if (token == Char.RightParenthesis) break;
if (firstDigit) {
if (isDigit(token)) {
// if it is a digit, keep eating
digitStr += this.iterator.currentAsString();
} else {
range.from = digitStr.length ? <i32>parseInt(digitStr) : -1;
range.to = range.from;
if (token == Char.Comma) {
// if we meet a comma, start parsing the next digit
firstDigit = false;
digitStr = "";
range.to = -1;
} else if (token == Char.RightCurlyBrace) {
this.eatToken(Char.RightCurlyBrace);
// close brace, this is a single value range
return range;
} else {
// anything else, we got a problem
break;
}
}
if (isDigit(token)) {
digitStr += this.iterator.currentAsString();
} else {
if (isDigit(token)) {
// if it is a digit, keep eating
digitStr += this.iterator.currentAsString();
} else {
range.to = digitStr.length ? <i32>parseInt(digitStr) : -1;
if (token == Char.RightCurlyBrace) {
this.eatToken(Char.RightCurlyBrace);
// close brace, end of range
return range;
} else {
// anything else, we got a problem
break;
}
}
return digitStr == "" ? -1 : <i32>parseInt(digitStr);
}
this.eatToken();
}
return digitStr == "" ? -1 : <i32>parseInt(digitStr);
}
// repetition not found - reset state
private maybeParseRepetitionRange(): Range | null {
// snapshot
const iteratorCopy = this.iterator.copy();
this.eatToken(Char.LeftCurlyBrace);
const from = this.maybeParseDigit();
if (from == -1) {
return null;
}
if (this.iterator.current == Char.RightCurlyBrace) {
this.eatToken();
return new Range(from, from);
} else if (this.iterator.current == Char.Comma) {
this.eatToken();
const to = this.maybeParseDigit();
// @ts-ignore
if (this.iterator.current == Char.RightCurlyBrace) {
this.eatToken();
return new Range(from, to);
}
}
this.iterator = iteratorCopy;
return null;
}
private isGreedy(): bool {
if (this.iterator.current == Char.Question) {
this.eatToken();
return false;
}
return true;
}
// parses a sequence of chars

@@ -241,3 +230,10 @@ private parseSequence(): Node {

const expression = nodes.pop();
nodes.push(new RangeRepetitionNode(expression, range.from, range.to));
nodes.push(
new RangeRepetitionNode(
expression,
range.from,
range.to,
this.isGreedy()
)
);
} else {

@@ -250,8 +246,3 @@ // this is not the start of a repetition, it's just a char!

const quantifier = this.eatToken();
let greedy = true;
if (this.iterator.current == Char.Question) {
greedy = false;
this.eatToken();
}
nodes.push(new RepetitionNode(expression, quantifier, greedy));
nodes.push(new RepetitionNode(expression, quantifier, this.isGreedy()));
// @ts-ignore

@@ -258,0 +249,0 @@ } else if (token == Char.LeftSquareBracket) {

@@ -16,2 +16,3 @@ export class StringIterator {

if (this.cursor >= u32(this.sourceString.length)) {
this.current = -1;
return false;

@@ -18,0 +19,0 @@ }

@@ -84,3 +84,9 @@ import { Char } from "../char";

// a{4,} => aaaaa*
clones.push(new RepetitionNode(expression.clone(), Char.Asterisk));
clones.push(
new RepetitionNode(
expression.clone(),
Char.Asterisk,
rangeRepNode.greedy
)
);
} else {

@@ -90,3 +96,9 @@ // a{4,6} => aaaaa?a?

for (let i = 0; i < count; i++) {
clones.push(new RepetitionNode(expression.clone(), Char.Question));
clones.push(
new RepetitionNode(
expression.clone(),
Char.Question,
rangeRepNode.greedy
)
);
}

@@ -93,0 +105,0 @@ }

{
"name": "assemblyscript-regex",
"version": "1.2.0",
"version": "1.3.0",
"description": "A regex engine built with AssemblyScript",

@@ -5,0 +5,0 @@ "ascMain": "assembly/index.ts",

@@ -25,2 +25,8 @@ const fs = require("fs");

1392,
...range(52, 55),
57,
58,
72,
73,
78,
],

@@ -113,6 +119,6 @@ "lazy quantifiers should still yield the longest overall regex match": [

if (["}?"].some((f) => regex.includes(f))) {
testCase += `xit("line: ${index} - lazy range repitition quantifiers are not supported", () => { });`;
return;
}
// if (["}?"].some((f) => regex.includes(f))) {
// testCase += `xit("line: ${index} - lazy range repitition quantifiers are not supported", () => { });`;
// return;
// }

@@ -119,0 +125,0 @@ if (["(?"].some((f) => regex.includes(f))) {

@@ -8,5 +8,5 @@ import "assemblyscript/std/portable/index";

const regexObj = new RegExp(".*?");
const match = regexObj.exec("abc");
const regexObj = new RegExp("ba{0}b");
const match = regexObj.exec("bb");
console.log(match);

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc