Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

es-module-lexer

Package Overview
Dependencies
Maintainers
1
Versions
68
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

es-module-lexer - npm Package Compare versions

Comparing version 0.2.0 to 0.3.0

dist/lexer.cjs.js

7

CHANGELOG.md

@@ -0,2 +1,7 @@

0.3.0
* Web Assembly conversion for performance (https://github.com/guybedford/es-module-lexer/pull/7)
* Fix $ characters in templates (https://github.com/guybedford/es-module-lexer/pull/6, @LarsDenBakker)
* Fix comment handling in imports (https://github.com/guybedford/es-module-lexer/issues/8)
0.2.0
* Include CJS build (https://github.com/guybedford/es-module-lexer/pull/1, @LarsDenBakker)
* Include CJS build (https://github.com/guybedford/es-module-lexer/pull/1, @LarsDenBakker)

696

dist/lexer.js

@@ -1,663 +0,65 @@

"use strict";
export { initPromise as init }
exports.default = analyzeModuleSyntax;
export default function analyze (source) {
if (!parse)
return initPromise.then(() => analyze(source));
function analyzeModuleSyntax(_str) {
str = _str;
baseParse();
return [oImports, oExports];
} // State:
// (for perf, works because this runs sync)
const buffer = new TextEncoder().encode(source);
const extraMem = buffer.byteLength - (memory.buffer.byteLength - __heap_base.value);
if (extraMem > 0)
memory.grow(Math.ceil(extraMem / 1024 / 64));
let i, charCode, str, lastTokenIndex, lastOpenTokenIndex, lastTokenIndexStack, dynamicImportStack, braceDepth, templateDepth, templateStack, oImports, oExports;
function baseParse() {
lastTokenIndex = lastOpenTokenIndex = -1;
oImports = [];
oExports = [];
braceDepth = 0;
templateDepth = 0;
templateStack = [];
lastTokenIndexStack = [];
dynamicImportStack = [];
i = -1;
/*
* This is just the simple loop:
*
* while (charCode = str.charCodeAt(++i)) {
* // reads into the first non-ws / comment token
* commentWhitespace();
* // reads one token at a time
* parseNext();
* // stores the last (non ws/comment) token for division operator backtracking checks
* // (including on lastTokenIndexStack as we nest structures)
* lastTokenIndex = i;
* }
*
* Optimized by:
* - Inlining comment whitespace to avoid repeated "/" checks (minor perf saving)
* - Inlining the division operator check from "parseNext" into this loop
* - Having "regularExpression()" start on the initial index (different to other parse functions)
*/
while (charCode = str.charCodeAt(++i)) {
// reads into the first non-ws / comment token
if (isBrOrWs(charCode)) continue;
if (charCode === 47
/*/*/
) {
charCode = str.charCodeAt(++i);
if (charCode === 47
/*/*/
) lineComment();else if (charCode === 42
/***/
) blockComment();else {
/*
* Division / regex ambiguity handling
* based on checking backtrack analysis of:
* - what token came previously (lastTokenIndex)
* - what token came before the opening paren or brace (lastOpenTokenIndex)
*
* Only known unhandled ambiguities are cases of regexes immediately followed
* by division, another regex or brace:
*
* /regex/ / x
*
* /regex/
* {}
* /regex/
*
* And those cases only show errors when containing "'/` in the regex
*
* Could be fixed tracking stack of last regex, but doesn't seem worth it, and bad for perf
*/
const lastTokenCode = str.charCodeAt(lastTokenIndex);
if (!lastTokenCode || isExpressionKeyword(lastTokenIndex) || isExpressionPunctuator(lastTokenCode) || lastTokenCode === 41
/*)*/
&& isParenKeyword(lastOpenTokenIndex) || lastTokenCode === 125
/*}*/
&& isExpressionTerminator(lastOpenTokenIndex)) {
// TODO: perf improvement
// it may be possible to precompute isParenKeyword and isExpressionTerminator checks
// when they are added to the token stack, not here
// this way we only need to store a stack of "regexTokenDepthStack" and "regexTokenDepth"
// where depth is the combined brace and paren depth count
// when leaving a brace or paren, this stack would be cleared automatically (if a match)
// this check then becomes curDepth === regexTokenDepth for the lastTokenCode )|} case
regularExpression();
}
lastTokenIndex = i;
}
} else {
parseNext();
lastTokenIndex = i;
}
copyToWasm(buffer, memory, salloc(buffer.byteLength));
if (!parse()) {
const idx = e(), err = new Error(`Parse error at ${idx}.`);
err.loc = idx;
throw err;
}
if (braceDepth || templateDepth || lastTokenIndexStack.length) syntaxError();
}
const imports = [], exports = [];
function parseNext() {
switch (charCode) {
case 123
/*{*/
:
// dynamic import followed by { is not a dynamic import (so remove)
// this is a sneaky way to get around { import () {} } v { import () } block / object ambiguity without a parser (assuming source is valid)
if (oImports.length && oImports[oImports.length - 1].e === lastTokenIndex) {
oImports.pop();
}
while (ri()) imports.push({ s: is(), e: ie(), d: id() });
while (re()) exports.push(source.slice(es(), ee()));
braceDepth++;
// fallthrough
case 40
/*(*/
:
lastTokenIndexStack.push(lastTokenIndex);
return;
case 125
/*}*/
:
if (braceDepth-- === templateDepth) {
templateDepth = templateStack.pop();
templateString();
return;
}
if (braceDepth < templateDepth) syntaxError();
// fallthrough
case 41
/*)*/
:
if (!lastTokenIndexStack) syntaxError();
lastOpenTokenIndex = lastTokenIndexStack.pop();
if (dynamicImportStack.length && lastOpenTokenIndex == dynamicImportStack[dynamicImportStack.length - 1]) {
for (let j = 0; j < oImports.length; j++) if (oImports[j].d === lastOpenTokenIndex) {
oImports[j].e = i;
break;
}
dynamicImportStack.pop();
}
return;
case 39
/*'*/
:
singleQuoteString();
return;
case 34
/*"*/
:
doubleQuoteString();
return;
case 96
/*`*/
:
templateString();
return;
case 105
/*i*/
:
{
if (readPrecedingKeyword(i + 5) !== 'import') return;
const start = i;
charCode = str.charCodeAt(i += 6);
if (readToWsOrPunctuator(i) !== '' && charCode !== 46
/*.*/
&& charCode !== 34
/*"*/
&& charCode !== 39
/*'*/
) return;
commentWhitespace();
switch (charCode) {
// dynamic import
case 40
/*(*/
:
lastTokenIndexStack.push(start);
if (str.charCodeAt(lastTokenIndex) === 46
/*.*/
) return; // dynamic import indicated by positive d, which will be set to closing paren index
dynamicImportStack.push(start);
oImports.push({
s: i + 1,
e: undefined,
d: start
});
return;
// import.meta
case 46
/*.*/
:
charCode = str.charCodeAt(++i);
commentWhitespace(); // import.meta indicated by d === -2
if (readToWsOrPunctuator(i) === 'meta' && str.charCodeAt(lastTokenIndex) !== 46
/*.*/
) oImports.push({
s: start,
e: i + 4,
d: -2
});
return;
} // import statement (only permitted at base-level)
if (lastTokenIndexStack.length === 0) {
readSourceString();
return;
}
}
case 101
/*e*/
:
{
if (lastTokenIndexStack.length !== 0 || readPrecedingKeyword(i + 5) !== 'export' || readToWsOrPunctuator(i + 6) !== '') return;
let name;
charCode = str.charCodeAt(i += 6);
commentWhitespace();
switch (charCode) {
// export default ...
case 100
/*d*/
:
oExports.push('default');
return;
// export async? function*? name () {
case 97
/*a*/
:
charCode = str.charCodeAt(i += 5);
commentWhitespace();
// fallthrough
case 102
/*f*/
:
charCode = str.charCodeAt(i += 8);
commentWhitespace();
if (charCode === 42
/***/
) {
charCode = str.charCodeAt(++i);
commentWhitespace();
}
oExports.push(readToWsOrPunctuator(i));
return;
case 99
/*c*/
:
if (readToWsOrPunctuator(i + 1) === 'lass') {
charCode = str.charCodeAt(i += 5);
commentWhitespace();
oExports.push(readToWsOrPunctuator(i));
return;
}
i += 2;
// fallthrough
// export var/let/const name = ...(, name = ...)+
case 118
/*v*/
:
case 108
/*l*/
:
/*
* destructured initializations not currently supported (skipped for { or [)
* also, lexing names after variable equals is skipped (export var p = function () { ... }, q = 5 skips "q")
*/
do {
charCode = str.charCodeAt(i += 3);
commentWhitespace();
name = readToWsOrPunctuator(i); // stops on [ { destructurings
if (!name.length) return;
oExports.push(name);
charCode = str.charCodeAt(i += name.length);
commentWhitespace();
} while (charCode === 44
/*,*/
);
return;
// export {...}
case 123
/*{*/
:
charCode = str.charCodeAt(++i);
commentWhitespace();
do {
name = readToWsOrPunctuator(i);
charCode = str.charCodeAt(i += name.length);
commentWhitespace(); // as
if (charCode === 97
/*a*/
) {
charCode = str.charCodeAt(i += 2);
commentWhitespace();
name = readToWsOrPunctuator(i);
charCode = str.charCodeAt(i += name.length);
commentWhitespace();
} // ,
if (charCode === 44) {
charCode = str.charCodeAt(++i);
commentWhitespace();
}
oExports.push(name);
if (!charCode) syntaxError();
} while (charCode !== 125
/*}*/
);
// fallthrough
// export *
case 42
/***/
:
charCode = str.charCodeAt(++i);
commentWhitespace();
if (charCode === 102 && str.slice(i + 1, i + 4) === 'rom') {
charCode = str.charCodeAt(i += 4);
readSourceString();
}
}
}
}
return [imports, exports];
}
/*
* Helper functions
*/
// seeks through whitespace, comments and multiline comments
function commentWhitespace() {
do {
if (charCode === 47
/*/*/
) {
const nextCharCode = str.charCodeAt(i + 1);
if (nextCharCode === 47
/*/*/
) {
charCode = nextCharCode;
i++;
lineComment();
} else if (nextCharCode === 42
/***/
) {
charCode = nextCharCode;
i++;
blockComment();
} else {
return;
}
} else if (!isBrOrWs(charCode)) {
return;
}
} while (charCode = str.charCodeAt(++i));
const wasmBinary = '';
let wasmBuffer;
if (typeof Buffer !== 'undefined') {
wasmBuffer = Buffer.from(wasmBinary, 'base64');
}
function templateString() {
while (charCode = str.charCodeAt(++i)) {
if (charCode === 36
/*$*/
) {
charCode = str.charCodeAt(++i);
if (charCode === 123
/*{*/
) {
templateStack.push(templateDepth);
templateDepth = ++braceDepth;
return;
}
} else if (charCode === 96
/*`*/
) {
return;
} else if (charCode === 92
/*\*/
) {
charCode = str.charCodeAt(++i);
}
}
syntaxError();
else {
const str = atob(wasmBinary);
const len = str.length;
wasmBuffer = new Uint8Array(len);
for (let i = 0; i < len; i++)
wasmBuffer[i] = str.charCodeAt(i);
}
function readSourceString() {
let start;
let memory, __heap_base, salloc, parse, e, ri, re, is, ie, id, es, ee;
const initPromise = WebAssembly.compile(wasmBuffer)
.then(WebAssembly.instantiate)
.then(({ exports }) => ({ memory, __heap_base, salloc, parse, e, ri, re, is, ie, id, es, ee } = exports));
do {
if (charCode === 39
/*'*/
) {
start = i + 1;
singleQuoteString();
oImports.push({
s: start,
e: i,
d: -1
});
return;
}
if (charCode === 34
/*"*/
) {
start = i + 1;
doubleQuoteString();
oImports.push({
s: start,
e: i,
d: -1
});
return;
}
} while (charCode = str.charCodeAt(++i));
syntaxError();
}
function isWs() {
// Note there are even more than this - https://en.wikipedia.org/wiki/Whitespace_character#Unicode
return charCode === 32
/* */
|| charCode === 9
/*\t*/
|| charCode === 12
/*\f*/
|| charCode === 11
/*\v*/
|| charCode === 160
/*\u00A0*/
|| charCode === 65279
/*\ufeff*/
;
}
function isBr() {
// (8232 <LS> and 8233 <PS> omitted for now)
return charCode === 10
/*\n*/
|| charCode === 13
/*\r*/
;
}
function isBrOrWs(charCode) {
return charCode > 8 && charCode < 14 || charCode === 32 || charCode === 160 || charCode === 65279;
}
function blockComment() {
charCode = str.charCodeAt(++i);
while (charCode) {
if (charCode === 42
/***/
) {
charCode = str.charCodeAt(++i);
if (charCode === 47
/*/*/
) return;
continue;
}
charCode = str.charCodeAt(++i);
function copyToWasm (buffer, memory, pointer) {
const byteLen = buffer.byteLength;
const len32 = byteLen >> 2;
const outBuf = new Uint32Array(memory.buffer, pointer, len32);
const inBuf = new Uint32Array(buffer.buffer, 0, len32);
for (let i = 0; i < len32; i++)
outBuf[i] = inBuf[i];
// handle remainder
let doneLen = len32 << 2;
const outBuf8 = new Uint8Array(memory.buffer);
if (doneLen !== byteLen) {
const inBuf8 = new Uint8Array(buffer.buffer);
while (doneLen !== byteLen) {
outBuf8[pointer + doneLen] = inBuf8[doneLen];
doneLen++;
}
}
// add null terminator
outBuf8[pointer + byteLen] = 0;
}
function lineComment() {
while (charCode = str.charCodeAt(++i)) {
if (isBr()) return;
}
}
function singleQuoteString() {
while (charCode = str.charCodeAt(++i)) {
if (charCode === 39
/*'*/
) return;
if (charCode === 92
/*\*/
) i++;else if (isBr()) syntaxError();
}
syntaxError();
}
function doubleQuoteString() {
while (charCode = str.charCodeAt(++i)) {
if (charCode === 34
/*"*/
) return;
if (charCode === 92
/*\*/
) i++;else if (isBr()) syntaxError();
}
syntaxError();
}
function regexCharacterClass() {
while (charCode = str.charCodeAt(++i)) {
if (charCode === 93
/*]*/
) return;
if (charCode === 92
/*\*/
) i++;else if (isBr()) syntaxError();
}
syntaxError();
}
function regularExpression() {
do {
if (charCode === 47
/*/*/
) return;
if (charCode === 91
/*[*/
) regexCharacterClass();else if (charCode === 92
/*\*/
) i++;else if (isBr()) syntaxError();
} while (charCode = str.charCodeAt(++i));
syntaxError();
}
function readPrecedingKeyword(endIndex) {
let startIndex = endIndex;
let nextChar = str.charCodeAt(startIndex);
while (nextChar && nextChar > 96
/*a*/
&& nextChar < 123
/*z*/
) nextChar = str.charCodeAt(--startIndex); // must be preceded by punctuator or whitespace
if (nextChar && !isBrOrWs(nextChar) && !isPunctuator(nextChar) || nextChar === 46
/*.*/
) return '';
return str.slice(startIndex + 1, endIndex + 1);
}
function readToWsOrPunctuator(startIndex) {
let endIndex = startIndex;
let nextChar = str.charCodeAt(endIndex);
while (nextChar && !isBrOrWs(nextChar) && !isPunctuator(nextChar)) nextChar = str.charCodeAt(++endIndex);
return str.slice(startIndex, endIndex);
}
const expressionKeywords = {
case: 1,
debugger: 1,
delete: 1,
do: 1,
else: 1,
in: 1,
instanceof: 1,
new: 1,
return: 1,
throw: 1,
typeof: 1,
void: 1,
yield: 1,
await: 1
};
function isExpressionKeyword(lastTokenIndex) {
return expressionKeywords[readPrecedingKeyword(lastTokenIndex)];
}
function isParenKeyword(lastTokenIndex) {
const precedingKeyword = readPrecedingKeyword(lastTokenIndex);
return precedingKeyword === 'while' || precedingKeyword === 'for' || precedingKeyword === 'if';
}
function isPunctuator(charCode) {
// 23 possible punctuator endings: !%&()*+,-./:;<=>?[]^{}|~
return charCode === 33 || charCode === 37 || charCode === 38 || charCode > 39 && charCode < 48 || charCode > 57 && charCode < 64 || charCode === 91 || charCode === 93 || charCode === 94 || charCode > 122 && charCode < 127;
}
function isExpressionPunctuator(charCode) {
return isPunctuator(charCode) && charCode !== 93
/*]*/
&& charCode !== 41
/*)*/
&& charCode !== 125
/*}*/
;
}
function isExpressionTerminator(lastTokenIndex) {
// detects:
// ; ) -1 finally
// as all of these followed by a { will indicate a statement brace
// in future we will need: "catch" (optional catch parameters)
// "do" (do expressions)
switch (str.charCodeAt(lastTokenIndex)) {
case 59
/*;*/
:
case 41
/*)*/
:
case NaN:
return true;
case 121
/*y*/
:
return readPrecedingKeyword(lastTokenIndex) === 'finally';
}
return false;
}
function syntaxError() {
// we just need the stack
// this isn't shown to users, only for diagnostics
throw new Error();
}
module.exports = exports.default;
{
"name": "es-module-lexer",
"version": "0.2.0",
"version": "0.3.0",
"description": "Lexes ES modules returning their import/export metadata",
"main": "dist/lexer.js",
"module": "lexer.js",
"main": "dist/lexer.cjs",
"module": "dist/lexer.js",
"scripts": {
"test": "mocha -r esm -u tdd test/unit.js",
"build": "babel lexer.js --out-dir dist",
"bench": "node -r esm bench",
"test": "mocha -r esm -b -u tdd test/*.js",
"build": "node --experimental-modules build.js && babel dist/lexer.js | terser -o dist/lexer.cjs.js",
"build-wasm": "make lib/lexer.wasm && node --experimental-modules build.js",
"bench": "node --experimental-modules --expose-gc bench/index.js",
"prepublishOnly": "npm run build"

@@ -23,9 +24,16 @@ },

"mocha": "^5.2.0",
"pretty-ms": "^5.0.0"
"terser": "^4.1.4"
},
"files": [
"lexer.js",
"dist"
],
"type": "module"
"type": "module",
"repository": {
"type": "git",
"url": "git+https://github.com/guybedford/es-module-lexer.git"
},
"bugs": {
"url": "https://github.com/guybedford/es-module-lexer/issues"
},
"homepage": "https://github.com/guybedford/es-module-lexer#readme"
}
# ES Module Lexer
JS module syntax lexer used in [es-module-shims](https://github.com/guybedford/es-module-shims).
A JS module syntax lexer used in [es-module-shims](https://github.com/guybedford/es-module-shims).
Very small (< 500 lines) and fast ES module lexer.
A very small single JS file (4KiB gzipped) that includes inlined Web Assembly to [very fast](#benchmarks) source analysis for ES modules only.
The output interfaces use minification-friendly names.
Outputs the list of exports and locations of import specifiers, including dynamic import and import meta handling.
_Comprehensively handles the JS language grammar while remaining small and fast - can parse 2MB of JavaScript in under 30ms from a completely cold start, and in just 20ms after a few runs, [see benchmarks](#benchmarks) for more info._
### Usage
> Note: this module is exposed as an ES module build only (lexer.js contains `export default analyze(source) { ... }`).
```

@@ -17,4 +17,2 @@ npm install es-module-lexer

Using `node --experimental-modules` -
```js

@@ -77,22 +75,39 @@ import analyze from 'es-module-lexer';

```
bench/samples/d3.js (497K)
> Cold: 55ms
> Warm: 7ms (average of 25 runs)
bench/samples/d3.min.js (268K)
> Cold: 13ms
> Warm: 5ms (average of 25 runs)
bench/samples/magic-string.js (35K)
> Cold: 4ms
> Warm: 0ms (average of 25 runs)
bench/samples/magic-string.min.js (20K)
> Cold: 0ms
> Warm: 0ms (average of 25 runs)
bench/samples/rollup.js (881K)
> Cold: 27ms
> Warm: 13ms (average of 25 runs)
bench/samples/rollup.min.js (420K)
> Cold: 8ms
> Warm: 8ms (average of 25 runs)
Module load time
> 6ms
Cold Run, All Samples
test/samples/*.js (2150 KiB)
> 29ms
Warm Runs (average of 25 runs)
test/samples/d3.js (491 KiB)
> 5.6ms
test/samples/d3.min.js (274 KiB)
> 3.44ms
test/samples/magic-string.js (34 KiB)
> 0.36ms
test/samples/magic-string.min.js (20 KiB)
> 0.04ms
test/samples/rollup.js (902 KiB)
> 9.24ms
test/samples/rollup.min.js (429 KiB)
> 5.24ms
Warm Runs, All Samples (average of 25 runs)
test/samples/*.js (2150 KiB)
> 24.8ms
```
### Building
To build download the WASI SDK from https://github.com/CraneStation/wasi-sdk/releases.
The Makefile assumes that the `clang` in PATH corresponds to LLVM 8 (provided by WASI SDK as well, or a standard clang 8 install can be used as well), and that `../wasi-sdk-6` contains the SDK as extracted above, which is important to locate the WASI sysroot.
The build through the Makefile is then run via `make lib/lexer.wasm`, which can also be triggered via `npm run build-wasm` to create `dist/lexer.js`.
On Windows it may be preferable to use the Linux subsystem.
After the Web Assembly build, the CJS build can be triggered via `npm run build`.
### Limitations

@@ -99,0 +114,0 @@

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc