attributes-parser
Advanced tools
Comparing version 1.0.4 to 2.0.0
/** | ||
* Regular expression for matching attribute names. | ||
* Matches various white space characters, including tab, vertical tab, form | ||
* feed, and zero-width non-breaking space. | ||
* | ||
* Ensure that the string doesn't start with any of the prohibited characters, | ||
* including space characters, U+0000 NULL, U+0022 QUOTATION MARK | ||
* (""), U+0027 APOSTROPHE ("'"), U+003E GREATER-THAN SIGN (>), | ||
* U+002F SOLIDUS (/), U+003D EQUALS SIGN (=), control characters | ||
* (U+0000 to U+001F, U+007F to U+009F), and characters that are not | ||
* defined by Unicode. Following by one or more characters that are not in | ||
* the prohibited set of characters. | ||
* | ||
* @see [HTML syntax attributes](https://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attributes) | ||
* Unicode space separators are not included! | ||
*/ | ||
export declare const ATTR_NAME_RULE: RegExp; | ||
export declare const WhiteSpace: RegExp; | ||
/** | ||
* Regular expression for matching single-quoted attribute values. | ||
* | ||
* Matches boolean literals, allowing for optional single or double quotes. | ||
*/ | ||
export declare const BooleanLiteral: RegExp; | ||
/** | ||
* Matches various forms of numeric literals, including hexadecimal, octal, | ||
* binary, decimal, and scientific notation. | ||
*/ | ||
export declare const NumericLiteral: RegExp; | ||
/** | ||
* Matches a single-quoted attribute value enclosed in single quotes. Allows | ||
@@ -26,6 +26,4 @@ * for escaped single quotes (`\'`) and escaped newlines (`\\n`), with the | ||
*/ | ||
export declare const ATTR_SINGLE_QUOTED_VALUE_RULE: RegExp; | ||
export declare const SingleQuotedLiteral: RegExp; | ||
/** | ||
* Regular expression for matching double-quoted attribute values. | ||
* | ||
* Matches a double-quoted attribute value enclosed in double quotes. Allows | ||
@@ -39,6 +37,4 @@ * for escaped double quotes (`\"`) and escaped newlines (`\\n`), with the | ||
*/ | ||
export declare const ATTR_DOUBLE_QUOTED_VALUE_RULE: RegExp; | ||
export declare const DoubleQuotedLiteral: RegExp; | ||
/** | ||
* Regular expression for matching unquoted attribute values. | ||
* | ||
* Matches a sequence of characters that must not contain any of the | ||
@@ -50,2 +46,14 @@ * characters U+0022 ("), U+0027 ('), U+0060 (`), U+003D (=), U+003C (<), | ||
*/ | ||
export declare const ATTR_UNQUOTED_VALUE_RULE: RegExp; | ||
export declare const UnquotedLiteral: RegExp; | ||
/** | ||
* Ensure that the string doesn't start with any of the prohibited characters, | ||
* including space characters, U+0000 NULL, U+0022 QUOTATION MARK | ||
* (""), U+0027 APOSTROPHE ("'"), U+003E GREATER-THAN SIGN (>), | ||
* U+002F SOLIDUS (/), U+003D EQUALS SIGN (=), control characters | ||
* (U+0000 to U+001F, U+007F to U+009F), and characters that are not | ||
* defined by Unicode. Following by one or more characters that are not in | ||
* the prohibited set of characters. | ||
* | ||
* @see [HTML syntax attributes](https://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attributes) | ||
*/ | ||
export declare const AttributeName: RegExp; |
@@ -1,23 +0,9 @@ | ||
import type { Attributes, Token, TokenType } from './types.js'; | ||
import type { Attributes } from './types.js'; | ||
/** | ||
* Tokenize the attributes string. | ||
* | ||
* @param str - Attributes string. | ||
* @returns Array of tokens. | ||
*/ | ||
export declare function tokenizeAttrs(str: string): Token[]; | ||
/** | ||
* Parse attributes string into an object. | ||
* | ||
* @param str - Attributes string. | ||
* @param input - Attributes string. | ||
* @returns Parsed attributes as key-value pairs. | ||
*/ | ||
export declare function parseAttrs(str: string): Attributes; | ||
/** | ||
* Serialize an array of tokens into a string. | ||
* | ||
* @param tokens - Array of tokens to serialize. | ||
* @returns Serialized string. | ||
*/ | ||
export declare function serializeTokens(tokens: Token[]): string; | ||
export type { Attributes, Token, TokenType }; | ||
export default function parseAttrs(input: string): Attributes; | ||
export type { Attributes, Token, TokenType } from './types.js'; |
@@ -1,64 +0,112 @@ | ||
import r from "moo"; | ||
const i = /(?![\s\x00\x22\x27\x3E\x2F\x3D\x00-\x1F\x7F-\x9F])[^\s\x00-\x1F\x7F-\x9F\x22\x27\x3E\x2F\x3D]+/, l = /'(?!.*&[0-9a-zA-Z]+;)[^'\\]*(?:\\.|\\n[^"\\]*|&[^0-9a-zA-Z;]*)*'/, p = /"(?!.*&[0-9a-zA-Z]+;)[^"\\]*(?:\\.|\\n[^"\\]*|&[^0-9a-zA-Z;]*)*"/, c = /[^"\s'`=<>\x00]+/, o = r.states({ | ||
import moo from "moo"; | ||
import jsonLoose from "json-loose"; | ||
const WhiteSpace = /[ \t\v\f\ufeff]+/; | ||
const BooleanLiteral = new RegExp("(?<==)(?:true|false)"); | ||
const NumericLiteral = new RegExp("(?<==)-?(?:(?:0[xX][\\da-fA-F](?:_?[\\da-fA-F])*|0[oO][0-7](?:_?[0-7])*|0[bB][01](?:_?[01])*)n?|-?0n|-?[1-9](?:_?\\d)*n|(?:(?:0(?!\\d)|0\\d*[89]\\d*|[1-9](?:_?\\d)*)(?:\\.(?:\\d(?:_?\\d)*)?)?|\\.\\d(?:_?\\d)*)(?:[eE][+-]?\\d(?:_?\\d)*)?|-?0[0-7]+)"); | ||
const SingleQuotedLiteral = new RegExp(`(?<==)'(?!.*&[0-9a-zA-Z]+;)[^'\\\\]*(?:\\\\.|\\\\n[^"\\\\]*|&[^0-9a-zA-Z;]*)*'`); | ||
const DoubleQuotedLiteral = new RegExp('(?<==)"(?!.*&[0-9a-zA-Z]+;)[^"\\\\]*(?:\\\\.|\\\\n[^"\\\\]*|&[^0-9a-zA-Z;]*)*"'); | ||
const UnquotedLiteral = new RegExp("(?<==)[^\"\\s'`=<>\\x00]+"); | ||
const AttributeName = /(?:(?![\s\x00\x22\x27\x3E\x2F\x3D\x00-\x1F\x7F-\x9F])[^\s\x00-\x1F\x7F-\x9F\x22\x27\x3E\x2F\x3D])+/; | ||
function formatString(text) { | ||
const value = typeof text === "string" && /^(['"]).*?\1$/.test(text) ? ( | ||
// omit quotes | ||
text.slice(1, -1) | ||
) : text; | ||
if (value.startsWith("[") && value.endsWith("]") || value.startsWith("{") && value.endsWith("}")) { | ||
return JSON.parse(jsonLoose(value)); | ||
} | ||
return value; | ||
} | ||
function serialize(attrs) { | ||
let acc = ""; | ||
for (const key in attrs) { | ||
const value = attrs[key]; | ||
switch (typeof value) { | ||
case "object": | ||
acc += ` ${key}='${JSON.stringify(value)}'`; | ||
break; | ||
case "string": | ||
acc += ` ${key}="${value}"`; | ||
break; | ||
case "number": | ||
case "boolean": | ||
acc += ` ${key}=${value}`; | ||
break; | ||
} | ||
} | ||
return acc.slice(1); | ||
} | ||
const lexer = moo.states({ | ||
main: { | ||
whitespace: /[ \t]+/, | ||
name: i, | ||
separator: { | ||
match: "=", | ||
push: "insideValue" | ||
} | ||
}, | ||
insideValue: { | ||
whitespace: /[ \t]+/, | ||
singleQuotedvalue: { | ||
match: l, | ||
value: a, | ||
push: "main" | ||
WhiteSpace: { match: WhiteSpace, lineBreaks: true }, | ||
BooleanLiteral: { | ||
match: BooleanLiteral, | ||
value(x) { | ||
return x === "true" ? true : false; | ||
} | ||
}, | ||
doubleQuotedvalue: { | ||
match: p, | ||
value: a, | ||
push: "main" | ||
NumericLiteral: { | ||
match: NumericLiteral, | ||
value(x) { | ||
const n = Number(x); | ||
return Number.isNaN(n) ? Number(x.replace(/_|n$/g, "")) : Number(x); | ||
} | ||
}, | ||
unquotedvalue: { | ||
match: c, | ||
value: a, | ||
push: "main" | ||
SingleQuotedValue: { | ||
match: SingleQuotedLiteral, | ||
value: formatString, | ||
type: () => "StringLiteral" | ||
}, | ||
DoubleQuotedLiteral: { | ||
match: DoubleQuotedLiteral, | ||
value: formatString, | ||
type: () => "StringLiteral" | ||
}, | ||
UnquotedLiteral: { | ||
match: UnquotedLiteral, | ||
value: formatString, | ||
type: () => "StringLiteral" | ||
}, | ||
AttributeName, | ||
Separator: "=" | ||
} | ||
}); | ||
function parseAttrs(input) { | ||
let currentKey = null; | ||
const tokens = lexer.reset(input); | ||
const attrs = {}; | ||
Object.defineProperties(attrs, { | ||
toString: { | ||
writable: false, | ||
enumerable: false, | ||
configurable: false, | ||
value: () => serialize(attrs) | ||
}, | ||
getTokens: { | ||
writable: false, | ||
enumerable: false, | ||
configurable: false, | ||
value: () => Array.from(lexer.reset(input)) | ||
} | ||
}); | ||
for (const { type, value } of tokens) { | ||
switch (type) { | ||
case "AttributeName": | ||
currentKey = value; | ||
attrs[currentKey] = currentKey; | ||
break; | ||
case "BooleanLiteral": | ||
case "NumericLiteral": | ||
case "StringLiteral": | ||
if (currentKey) { | ||
attrs[currentKey] = value; | ||
currentKey = null; | ||
} | ||
break; | ||
} | ||
} | ||
}); | ||
function a(n) { | ||
let e = n; | ||
return typeof n == "string" && /^(['"]).*?\1$/.test(n) && (e = n.slice(1, -1)), /^-?0*(\d+(?:\.\d+)?)$/.test(e) ? Number(e) : e.startsWith("[") && e.endsWith("]") || e.startsWith("{") && e.endsWith("}") || e === "true" || e === "false" ? Function(`return ${e}`)() : e; | ||
return attrs; | ||
} | ||
function x(n) { | ||
o.reset(n); | ||
const e = []; | ||
let t; | ||
for (; t = o.next(); ) | ||
e.push(t); | ||
return e; | ||
} | ||
function h(n) { | ||
const e = x(n), t = {}; | ||
let u = null; | ||
for (const s of e) | ||
s.type === "name" ? (u = s.value, t[u] = !0) : (s.type === "unquotedvalue" || s.type === "singleQuotedvalue" || s.type === "doubleQuotedvalue") && u && (t[u] = s.value, u = null); | ||
return t; | ||
} | ||
function m(n) { | ||
const e = [ | ||
"whitespace", | ||
"name", | ||
"separator", | ||
"singleQuotedvalue", | ||
"doubleQuotedvalue", | ||
"unquotedvalue" | ||
]; | ||
return n.map(({ type: t, value: u, text: s }) => ({ type: t, value: u, text: s })).reduce((t, u) => e.indexOf(u.type) === -1 ? t : t + u.text, ""); | ||
} | ||
export { | ||
h as parseAttrs, | ||
m as serializeTokens, | ||
x as tokenizeAttrs | ||
parseAttrs as default | ||
}; |
/** | ||
* AnyType is a type that represents any possible JavaScript value. | ||
* | ||
* **Note:** This type should be used sparingly, as it bypasses type checking. | ||
*/ | ||
export type AnyType = any; | ||
/** | ||
* Attributes represent a collection of key-value pairs where the keys are | ||
* strings and the values can be null, string, boolean, number, an array of | ||
* AnyType, or an object with keys of type string and values of type AnyType. | ||
* strings and the values can be null, string, boolean, number, an array, or an | ||
* object with keys of type string and values. | ||
*/ | ||
export type Attributes = { | ||
[key: string]: null | string | boolean | number | AnyType[] | { | ||
[key: string]: AnyType; | ||
[key: string]: null | string | boolean | number | unknown[] | { | ||
[key: string]: unknown; | ||
}; | ||
}; | ||
} & AttributesHelpers; | ||
export interface AttributesHelpers { | ||
toString(): string; | ||
getTokens(): Token[]; | ||
} | ||
/** | ||
* TokenType represents the type of a lexer token, which can be one of the | ||
* following: | ||
* | ||
* - 'whitespace' | ||
* - 'name' | ||
* - 'separator' | ||
* - 'singleQuotedvalue' | ||
* - 'doubleQuotedvalue' | ||
* - 'unquotedvalue' | ||
*/ | ||
export type TokenType = 'whitespace' | 'name' | 'separator' | 'singleQuotedvalue' | 'doubleQuotedvalue' | 'unquotedvalue'; | ||
/** | ||
* Token represents a lexer token with various properties. | ||
@@ -67,1 +53,13 @@ */ | ||
} | ||
/** | ||
* TokenType represents the type of a lexer token, which can be one of the | ||
* following: | ||
* | ||
* - 'WhiteSpace' | ||
* - 'Separator' | ||
* - 'BooleanLiteral' | ||
* - 'NumericLiteral' | ||
* - 'StringLiteral' | ||
* - 'AttributeName' | ||
*/ | ||
export type TokenType = 'WhiteSpace' | 'Separator' | 'BooleanLiteral' | 'NumericLiteral' | 'StringLiteral' | 'AttributeName'; |
{ | ||
"name": "attributes-parser", | ||
"description": "Parsing and tokenizing attributes string", | ||
"version": "1.0.4", | ||
"version": "2.0.0", | ||
"publishConfig": { | ||
@@ -29,3 +29,3 @@ "access": "public" | ||
"type": "module", | ||
"main": "dist/index.cjs", | ||
"main": "dist/index.umd.cjs", | ||
"module": "dist/index.js", | ||
@@ -36,3 +36,3 @@ "types": "dist/index.d.ts", | ||
"import": "./dist/index.js", | ||
"require": "./dist/index.cjs", | ||
"require": "./dist/index.umd.cjs", | ||
"types": "./dist/index.d.ts" | ||
@@ -47,5 +47,8 @@ } | ||
"scripts": { | ||
"start": "npm run dev", | ||
"dev": "vite build --watch", | ||
"build": "vite build && npm run types", | ||
"start": "npm run prod", | ||
"prod": "vite build && vite", | ||
"dev": "vite build:ssr --watch", | ||
"build": "npm run build:ssr && npm run build:prod && npm run types", | ||
"build:ssr": "vite build --ssr src/index.ts", | ||
"build:prod": "vite build", | ||
"test": "vitest", | ||
@@ -57,8 +60,11 @@ "coverage": "vitest run --coverage", | ||
}, | ||
"dependencies": { | ||
"json-loose": "^1.0.0" | ||
}, | ||
"devDependencies": { | ||
"@semantic-release/changelog": "^6.0.3", | ||
"@semantic-release/git": "^10.0.1", | ||
"@types/moo": "^0.5.6", | ||
"@types/moo": "0.5.7", | ||
"doogu": "3.2.7", | ||
"semantic-release": "^22.0.4" | ||
"semantic-release": "^22.0.5" | ||
}, | ||
@@ -76,6 +82,3 @@ "config": { | ||
"extends": "doogu/release.config.js" | ||
}, | ||
"dependencies": { | ||
"moo": "^0.5.2" | ||
} | ||
} |
161
readme.md
@@ -7,3 +7,3 @@ # Attributes Parser | ||
You can install this module using npm or yarn: | ||
You can install this module using npm or yarn, it's just `3.24 kB | min: 1.94 kB`: | ||
@@ -20,13 +20,11 @@ ```bash | ||
### Parse Attributes | ||
To parse an attribute string into key-value pairs, use the `parseAttrs` function. | ||
```js | ||
import { parseAttrs } from 'attributes-parser' | ||
import parseAttrs from 'attributes-parser' | ||
const attrs = `id="foo" class=\'bar\' num="3.14" data-value=baz name="@myName" data-value="[1, 2, 3]" fooBar="{foo: 'bar'}" checked=false disabled` | ||
const parsedAttrs = parseAttrs(attrs) | ||
const attr = `id="my-id" class='my-class' num=3.14 numNeg=-3.14 data-num="3.14" data-value="123" data-value=1_000_000 options=\'{"key": "value", "array": [1, 2, 3]}\' data-list="[1, 2, 3]" punc="a=b,c,d,e" checked=false checked=false data-checked="false" disabled` | ||
const parsedAttr = parseAttrs(attr) | ||
console.log(parsedAttrs) | ||
console.log(parsedAttr) | ||
// use `parsedAttr.toString()` to turn it back into a string | ||
// use `parsedAttr.getTokens()` to get the tokens array | ||
``` | ||
@@ -38,91 +36,17 @@ | ||
{ | ||
id: 'foo', | ||
class: 'bar', | ||
num: 3.14, | ||
'data-value': [ 1, 2, 3 ], | ||
name: '@myName', | ||
fooBar: { foo: 'bar' }, | ||
checked: false, | ||
disabled: true | ||
id: 'my-id', | ||
class: 'my-class', | ||
num: 3.14, // number | ||
numNeg: -3.14, // negative number | ||
'data-num': '3.14', // preserve string | ||
'data-value': 1000000, // duplicate key, second value is kept | ||
options: { key: 'value', array: [ 1, 2, 3 ] }, | ||
'data-list': [ 1, 2, 3 ], | ||
punc: 'a=b,c,d,e', // allowed, no ambiguous ampersand | ||
checked: false, // boolean | ||
'data-checked': 'false', // preserve string | ||
disabled: "disabled" // shorthand | ||
} | ||
``` | ||
### Tokenize Attributes | ||
To tokenize an attribute string, use the `tokenizeAttrs` function. | ||
```js | ||
import { tokenizeAttrs } from 'attributes-parser' | ||
const attrs = `id="foo" class=\'bar\' num="3.14" data-value=baz name="@myName" data-value="[1, 2, 3]" fooBar="{foo: 'bar'}" checked=false disabled` | ||
const tokens = tokenizeAttrs(attributeString) | ||
console.log(tokens) | ||
``` | ||
Yields: | ||
```js | ||
[ | ||
{ | ||
type: 'name', | ||
value: 'id', | ||
text: 'id', | ||
toString: [Function: tokenToString], | ||
offset: 0, | ||
lineBreaks: 0, | ||
line: 1, | ||
col: 1 | ||
}, | ||
{ | ||
type: 'separator', | ||
value: '=', | ||
text: '=', | ||
toString: [Function: tokenToString], | ||
offset: 2, | ||
lineBreaks: 0, | ||
line: 1, | ||
col: 3 | ||
}, | ||
{ | ||
type: 'doubleQuotedvalue', | ||
value: 'foo', | ||
text: '"foo"', | ||
toString: [Function: tokenToString], | ||
offset: 3, | ||
lineBreaks: 0, | ||
line: 1, | ||
col: 4 | ||
}, | ||
... | ||
] | ||
``` | ||
### Serialize Tokens | ||
To serialize an array of tokens into a string, use the `serializeTokens` function. | ||
```ts | ||
import { serializeTokens, type Token } from 'attributes-parser' | ||
const tokens = [ | ||
{ type: 'name', value: 'id', text: 'id' }, | ||
{ type: 'separator', value: '=', text: '=' }, | ||
{ type: 'unquotedvalue', value: 'foo', text: 'foo' }, | ||
{ type: 'whitespace', value: ' ', text: ' ' }, | ||
{ type: 'name', value: 'class', text: 'class' }, | ||
{ type: 'separator', value: '=', text: '=' }, | ||
{ type: 'doubleQuotedvalue', value: 'bar', text: '"bar"' } | ||
] as Token[] | ||
const attrs = serializeTokens(tokens) | ||
console.log(attrs) | ||
``` | ||
Yields: | ||
```bash | ||
id=foo class="bar" | ||
``` | ||
## Attribute Validation | ||
@@ -144,3 +68,3 @@ | ||
### Attribute names | ||
### `AttributeName` | ||
@@ -173,6 +97,29 @@ #### Valid | ||
### Single-quoted attribute values | ||
### `BooleanLiteral` | ||
- `true` | ||
- `false` | ||
### `NumericLiteral` | ||
#### Valid | ||
- `0x1A3` (hexLiteral) | ||
- `0o755` (octalLiteral) | ||
- `0b1101` (binaryLiteral) | ||
- `123.456` (decimalLiteral) | ||
- `1.23e-45` (scientificLiteral) | ||
- `0` (zeroLiteral) | ||
- `1_000_000` (underscoredLiteral) | ||
- `42` (integerLiteral) | ||
- `1e3` (scientificNoFraction) | ||
#### Invalid | ||
- `12.34e` (scientificNoExponent) | ||
### `StringLiteral` (Single-quoted) | ||
#### Valid | ||
- `'valid value'` | ||
@@ -200,3 +147,3 @@ - `"valid@value"` | ||
### Double-quoted attribute values | ||
### `StringLiteral` (Double-quoted) | ||
@@ -227,3 +174,3 @@ #### Valid | ||
### Unquoted attribute values | ||
### `StringLiteral` (Unquoted) | ||
@@ -234,8 +181,8 @@ #### Valid | ||
- `valid@value` | ||
- `"42"` | ||
- `"-42"` | ||
- `"3.14"` | ||
- `"0.5"` | ||
- `"-0.5"` | ||
- `".5"` | ||
- `42` | ||
- `-42` | ||
- `3.14` | ||
- `0.5` | ||
- `-0.5` | ||
- `.5` | ||
- `true` | ||
@@ -256,2 +203,6 @@ - `false` | ||
## Related | ||
- [json-loose](https://github.com/bent10/json-loose) – Transforms loosely structured plain object strings into valid JSON strings. | ||
## Contributing | ||
@@ -258,0 +209,0 @@ |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
Uses eval
Supply chain riskPackage uses dynamic code execution (e.g., eval()), which is a dangerous practice. This can prevent the code from running in certain environments and increases the risk that the code may contain exploits or malicious behavior.
Found 1 instance in 1 package
Minified code
QualityThis package contains minified code. This may be harmless in some cases where minified code is included in packaged libraries, however packages on npm should not minify code.
Found 1 instance in 1 package
29397
9
306
1
1
220
+ Addedjson-loose@^1.0.0
+ Addedjson-loose@1.2.4(transitive)
- Removedmoo@^0.5.2