parse-entities
Advanced tools
Comparing version 3.0.0 to 3.1.0
@@ -1,5 +0,1 @@ | ||
/** | ||
* @param {string} characters | ||
* @returns {string|false} | ||
*/ | ||
export function decodeEntity(characters: string): string | false | ||
export {decodeEntity} from './lib/decode-entity.js' |
@@ -1,13 +0,1 @@ | ||
import {characterEntities} from 'character-entities' | ||
var own = {}.hasOwnProperty | ||
/** | ||
* @param {string} characters | ||
* @returns {string|false} | ||
*/ | ||
export function decodeEntity(characters) { | ||
return own.call(characterEntities, characters) | ||
? characterEntities[characters] | ||
: false | ||
} | ||
export {decodeEntity} from './lib/decode-entity.js' |
145
index.d.ts
@@ -0,78 +1,125 @@ | ||
import type {Point, Position} from 'unist' | ||
/** | ||
* Parse entities. | ||
* @typeParam Context | ||
* Value used as `this`. | ||
* @this | ||
* The `warningContext` given to `parseEntities` | ||
* @param reason | ||
* Human readable reason for emitting a parse error. | ||
* @param point | ||
* Place where the error occurred. | ||
* @param code | ||
* Machine readable code the error. | ||
*/ | ||
export type WarningHandler<Context = undefined> = ( | ||
this: Context, | ||
reason: string, | ||
point: Point, | ||
code: number | ||
) => void | ||
/** | ||
* @typeParam Context | ||
* Value used as `this`. | ||
* @this | ||
* The `referenceContext` given to `parseEntities` | ||
* @param value | ||
* Decoded character reference. | ||
* @param position | ||
* Place where `value` starts and ends. | ||
* @param source | ||
* Raw source of character reference. | ||
*/ | ||
export type ReferenceHandler<Context = undefined> = ( | ||
this: Context, | ||
value: string, | ||
position: Position, | ||
source: string | ||
) => void | ||
/** | ||
* @typeParam Context | ||
* Value used as `this`. | ||
* @this | ||
* The `textContext` given to `parseEntities`. | ||
* @param value | ||
* String of content. | ||
* @param position | ||
* Place where `value` starts and ends. | ||
*/ | ||
export type TextHandler<Context = undefined> = ( | ||
this: Context, | ||
value: string, | ||
position: Position | ||
) => void | ||
/** | ||
* Configuration. | ||
* | ||
* @template {typeof globalThis} WarningContext | ||
* @template {typeof globalThis} ReferenceContext | ||
* @template {typeof globalThis} TextContext | ||
* @param {string} value | ||
* @param {Partial<ParseEntitiesOptions<WarningContext, ReferenceContext, TextContext>>} [options={}] | ||
* @typeParam WarningContext | ||
* Value used as `this` in the `warning` handler. | ||
* @typeParam ReferenceContext | ||
* Value used as `this` in the `reference` handler. | ||
* @typeParam TextContext | ||
* Value used as `this` in the `text` handler. | ||
*/ | ||
export function parseEntities< | ||
WarningContext extends typeof globalThis, | ||
ReferenceContext extends typeof globalThis, | ||
TextContext extends typeof globalThis | ||
>( | ||
value: string, | ||
options?: Partial< | ||
ParseEntitiesOptions<WarningContext, ReferenceContext, TextContext> | ||
> | ||
): string | ||
export type ParseEntitiesOptions< | ||
WarningContext extends typeof globalThis, | ||
ReferenceContext extends typeof globalThis, | ||
TextContext extends typeof globalThis | ||
export type Options< | ||
WarningContext = undefined, | ||
ReferenceContext = undefined, | ||
TextContext = undefined | ||
> = { | ||
/** | ||
* Additional character to accept. This allows other characters, without error, when following an ampersand | ||
* Additional character to accept. | ||
* This allows other characters, without error, when following an ampersand. | ||
* | ||
* @default '' | ||
*/ | ||
additional?: string | ||
/** | ||
* Whether to parse `value` as an attribute value | ||
* Whether to parse `value` as an attribute value. | ||
* This results in slightly different behavior. | ||
* | ||
* @default false | ||
*/ | ||
attribute?: boolean | ||
/** | ||
* Whether to allow non-terminated entities. For example, `©cat` for `Β©cat`. This behaviour is spec-compliant but can lead to unexpected results | ||
* Whether to allow nonterminated character references. | ||
* For example, `©cat` for `Β©cat`. | ||
* This behavior is compliant to the spec but can lead to unexpected results. | ||
* | ||
* @default true | ||
*/ | ||
nonTerminated?: boolean | ||
/** | ||
* Starting `position` of `value` (`Point` or `Position`). Useful when dealing with values nested in some sort of syntax tree | ||
* Starting `position` of `value` (`Point` or `Position`). Useful when dealing with values nested in some sort of syntax tree. | ||
*/ | ||
position?: Position | Point | ||
/** | ||
* Context used when calling `warning` | ||
* Context used when calling `warning`. | ||
*/ | ||
warningContext: WarningContext | ||
warningContext?: WarningContext | ||
/** | ||
* Warning handler | ||
* Context used when calling `reference`. | ||
*/ | ||
warning: WarningHandler<WarningContext> | ||
referenceContext?: ReferenceContext | ||
/** | ||
* Context used when calling `reference` | ||
* Context used when calling `text`. | ||
*/ | ||
referenceContext: ReferenceContext | ||
textContext?: TextContext | ||
/** | ||
* Reference handler | ||
* Warning handler. | ||
*/ | ||
reference: ReferenceHandler<ReferenceContext> | ||
warning?: WarningHandler<WarningContext> | ||
/** | ||
* Context used when calling `text` | ||
* Reference handler. | ||
*/ | ||
textContext: TextContext | ||
reference?: ReferenceHandler<ReferenceContext> | ||
/** | ||
* Text handler | ||
* Text handler. | ||
*/ | ||
text: TextHandler<TextContext> | ||
text?: TextHandler<TextContext> | ||
} | ||
export type Position = { | ||
start: Point | ||
end?: Point | ||
indent?: number[] | ||
} | ||
export type Point = { | ||
line: number | ||
column: number | ||
offset: number | ||
} | ||
export type WarningHandler<Context extends typeof globalThis> = () => any | ||
export type ReferenceHandler<Context extends typeof globalThis> = () => any | ||
export type TextHandler<Context extends typeof globalThis> = () => any | ||
export {parseEntities} from './lib/index.js' | ||
export {decodeEntity} from './lib/decode-entity.js' |
500
index.js
@@ -1,498 +0,2 @@ | ||
import {characterEntitiesLegacy} from 'character-entities-legacy' | ||
import {characterReferenceInvalid} from 'character-reference-invalid' | ||
import {isDecimal} from 'is-decimal' | ||
import {isHexadecimal} from 'is-hexadecimal' | ||
import {isAlphanumerical} from 'is-alphanumerical' | ||
import {decodeEntity} from './decode-entity.js' | ||
/** | ||
* @template {typeof globalThis} WarningContext | ||
* @template {typeof globalThis} ReferenceContext | ||
* @template {typeof globalThis} TextContext | ||
* @typedef {Object} ParseEntitiesOptions | ||
* @property {string} [additional=''] Additional character to accept. This allows other characters, without error, when following an ampersand | ||
* @property {boolean} [attribute=false] Whether to parse `value` as an attribute value | ||
* @property {boolean} [nonTerminated=true] Whether to allow non-terminated entities. For example, `©cat` for `Β©cat`. This behaviour is spec-compliant but can lead to unexpected results | ||
* @property {Position | Point} [position] Starting `position` of `value` (`Point` or `Position`). Useful when dealing with values nested in some sort of syntax tree | ||
* @property {WarningContext} warningContext Context used when calling `warning` | ||
* @property {WarningHandler<WarningContext>} warning Warning handler | ||
* @property {ReferenceContext} referenceContext Context used when calling `reference` | ||
* @property {ReferenceHandler<ReferenceContext>} reference Reference handler | ||
* @property {TextContext} textContext Context used when calling `text` | ||
* @property {TextHandler<TextContext>} text Text handler | ||
*/ | ||
/** | ||
* @typedef {Object} Position | ||
* @property {Point} start | ||
* @property {Point} [end] | ||
* @property {number[]} [indent] | ||
*/ | ||
/** | ||
* @typedef {Object} Point | ||
* @property {number} line | ||
* @property {number} column | ||
* @property {number} offset | ||
*/ | ||
/** | ||
* @template {typeof globalThis} Context | ||
* @callback WarningHandler | ||
* @this {Context} `this` refers to `warningContext` given to `parseEntities` | ||
* @param {string} reason Human-readable reason for triggering a parse error. | ||
* @param {Point} point Place at which the parse error occurred. | ||
* @param {number} code Identifier of reason for triggering a parse error. | ||
* @returns {void} | ||
*/ | ||
/** | ||
* @template {typeof globalThis} Context | ||
* @callback ReferenceHandler | ||
* @this {Context} `this` refers to `referenceContext` given to `parseEntities`. | ||
* @param {string} value String of content. | ||
* @param {Position} position Place at which `value` starts and ends. | ||
* @param {string} source Source of character reference. | ||
* @returns {void} | ||
*/ | ||
/** | ||
* @template {typeof globalThis} Context | ||
* @callback TextHandler | ||
* @this {Context} `this` refers to `textContext` given to `parseEntities`. | ||
* @param {string} value String of content. | ||
* @param {Position} position Place at which `value` starts and ends. | ||
* @returns {void} | ||
*/ | ||
var own = {}.hasOwnProperty | ||
var fromCharCode = String.fromCharCode | ||
// Warning messages. | ||
var messages = [ | ||
undefined, | ||
/* 1: Non terminated (named) */ | ||
'Named character references must be terminated by a semicolon', | ||
/* 2: Non terminated (numeric) */ | ||
'Numeric character references must be terminated by a semicolon', | ||
/* 3: Empty (named) */ | ||
'Named character references cannot be empty', | ||
/* 4: Empty (numeric) */ | ||
'Numeric character references cannot be empty', | ||
/* 5: Unknown (named) */ | ||
'Named character references must be known', | ||
/* 6: Disallowed (numeric) */ | ||
'Numeric character references cannot be disallowed', | ||
/* 7: Prohibited (numeric) */ | ||
'Numeric character references cannot be outside the permissible Unicode range' | ||
] | ||
/** | ||
* Parse entities. | ||
* | ||
* @template {typeof globalThis} WarningContext | ||
* @template {typeof globalThis} ReferenceContext | ||
* @template {typeof globalThis} TextContext | ||
* @param {string} value | ||
* @param {Partial<ParseEntitiesOptions<WarningContext, ReferenceContext, TextContext>>} [options={}] | ||
*/ | ||
export function parseEntities(value, options = {}) { | ||
var additional = | ||
typeof options.additional === 'string' | ||
? options.additional.charCodeAt(0) | ||
: options.additional | ||
var index = 0 | ||
var lines = -1 | ||
var queue = '' | ||
/** @type {string[]} */ | ||
var result = [] | ||
/** @type {Point?} */ | ||
var pos | ||
/** @type {number[]?} */ | ||
var indent | ||
/** @type {number} */ | ||
var line | ||
/** @type {number} */ | ||
var column | ||
/** @type {string} */ | ||
var entityCharacters | ||
/** @type {string|false} */ | ||
var namedEntity | ||
/** @type {boolean} */ | ||
var terminated | ||
/** @type {string} */ | ||
var characters | ||
/** @type {number} */ | ||
var character | ||
/** @type {string} */ | ||
var reference | ||
/** @type {number} */ | ||
var referenceCode | ||
/** @type {number} */ | ||
var following | ||
/** @type {number} */ | ||
var reason | ||
/** @type {string} */ | ||
var output | ||
/** @type {string} */ | ||
var entity | ||
/** @type {number} */ | ||
var begin | ||
/** @type {number} */ | ||
var start | ||
/** @type {string} */ | ||
var type | ||
/** @type {(code: number) => boolean} */ | ||
var test | ||
/** @type {Point} */ | ||
var previous | ||
/** @type {Point} */ | ||
var next | ||
/** @type {number} */ | ||
var diff | ||
/** @type {number} */ | ||
var end | ||
if (options.position) { | ||
if ('start' in options.position || 'indent' in options.position) { | ||
indent = options.position.indent | ||
pos = options.position.start | ||
} else { | ||
pos = options.position | ||
} | ||
} | ||
line = (pos && pos.line) || 1 | ||
column = (pos && pos.column) || 1 | ||
// Cache the current point. | ||
previous = now() | ||
// Ensure the algorithm walks over the first character (inclusive). | ||
index-- | ||
while (++index <= value.length) { | ||
// If the previous character was a newline. | ||
if (character === 10 /* `\n` */) { | ||
column = (indent && indent[lines]) || 1 | ||
} | ||
character = value.charCodeAt(index) | ||
if (character === 38 /* `&` */) { | ||
following = value.charCodeAt(index + 1) | ||
// The behavior depends on the identity of the next character. | ||
if ( | ||
following === 9 /* `\t` */ || | ||
following === 10 /* `\n` */ || | ||
following === 12 /* `\f` */ || | ||
following === 32 /* ` ` */ || | ||
following === 38 /* `&` */ || | ||
following === 60 /* `<` */ || | ||
Number.isNaN(following) || | ||
(additional && following === additional) | ||
) { | ||
// Not a character reference. | ||
// No characters are consumed, and nothing is returned. | ||
// This is not an error, either. | ||
queue += fromCharCode(character) | ||
column++ | ||
continue | ||
} | ||
start = index + 1 | ||
begin = start | ||
end = start | ||
if (following === 35 /* `#` */) { | ||
// Numerical reference. | ||
end = ++begin | ||
// The behavior further depends on the next character. | ||
following = value.charCodeAt(end) | ||
if (following === 88 /* `X` */ || following === 120 /* `x` */) { | ||
// ASCII hexadecimal digits. | ||
type = 'hexadecimal' | ||
end = ++begin | ||
} else { | ||
// ASCII decimal digits. | ||
type = 'decimal' | ||
} | ||
} else { | ||
// Named entity. | ||
type = 'named' | ||
} | ||
entityCharacters = '' | ||
entity = '' | ||
characters = '' | ||
// Each type of character reference accepts different characters. | ||
// This test is used to detect whether a reference has ended (as the semicolon | ||
// is not strictly needed). | ||
test = | ||
type === 'named' | ||
? isAlphanumerical | ||
: type === 'decimal' | ||
? isDecimal | ||
: isHexadecimal | ||
end-- | ||
while (++end <= value.length) { | ||
following = value.charCodeAt(end) | ||
if (!test(following)) { | ||
break | ||
} | ||
characters += fromCharCode(following) | ||
// Check if we can match a legacy named reference. | ||
// If so, we cache that as the last viable named reference. | ||
// This ensures we do not need to walk backwards later. | ||
if (type === 'named' && own.call(characterEntitiesLegacy, characters)) { | ||
entityCharacters = characters | ||
entity = characterEntitiesLegacy[characters] | ||
} | ||
} | ||
terminated = value.charCodeAt(end) === 59 /* `;` */ | ||
if (terminated) { | ||
end++ | ||
namedEntity = type === 'named' ? decodeEntity(characters) : false | ||
if (namedEntity) { | ||
entityCharacters = characters | ||
entity = namedEntity | ||
} | ||
} | ||
diff = 1 + end - start | ||
if (!terminated && options.nonTerminated === false) { | ||
// Empty. | ||
} else if (!characters) { | ||
// An empty (possible) reference is valid, unless itβs numeric (thus an | ||
// ampersand followed by an octothorp). | ||
if (type !== 'named') { | ||
warning(4 /* Empty (numeric) */, diff) | ||
} | ||
} else if (type === 'named') { | ||
// An ampersand followed by anything unknown, and not terminated, is | ||
// invalid. | ||
if (terminated && !entity) { | ||
warning(5 /* Unknown (named) */, 1) | ||
} else { | ||
// If theres something after an entity name which is not known, cap | ||
// the reference. | ||
if (entityCharacters !== characters) { | ||
end = begin + entityCharacters.length | ||
diff = 1 + end - begin | ||
terminated = false | ||
} | ||
// If the reference is not terminated, warn. | ||
if (!terminated) { | ||
reason = entityCharacters | ||
? 1 /* Non terminated (named) */ | ||
: 3 /* Empty (named) */ | ||
if (options.attribute) { | ||
following = value.charCodeAt(end) | ||
if (following === 61 /* `=` */) { | ||
warning(reason, diff) | ||
entity = null | ||
} else if (isAlphanumerical(following)) { | ||
entity = null | ||
} else { | ||
warning(reason, diff) | ||
} | ||
} else { | ||
warning(reason, diff) | ||
} | ||
} | ||
} | ||
reference = entity | ||
} else { | ||
if (!terminated) { | ||
// All non-terminated numeric references are not rendered, and emit a | ||
// warning. | ||
warning(2 /* Non terminated (numeric) */, diff) | ||
} | ||
// When terminated and numerical, parse as either hexadecimal or | ||
// decimal. | ||
referenceCode = Number.parseInt( | ||
characters, | ||
type === 'hexadecimal' ? 16 : 10 | ||
) | ||
// Emit a warning when the parsed number is prohibited, and replace with | ||
// replacement character. | ||
if (prohibited(referenceCode)) { | ||
warning(7 /* Prohibited (numeric) */, diff) | ||
reference = fromCharCode(65533 /* `οΏ½` */) | ||
} else if (referenceCode in characterReferenceInvalid) { | ||
// Emit a warning when the parsed number is disallowed, and replace by | ||
// an alternative. | ||
warning(6 /* Disallowed (numeric) */, diff) | ||
reference = characterReferenceInvalid[referenceCode] | ||
} else { | ||
// Parse the number. | ||
output = '' | ||
// Emit a warning when the parsed number should not be used. | ||
if (disallowed(referenceCode)) { | ||
warning(6 /* Disallowed (numeric) */, diff) | ||
} | ||
// Serialize the number. | ||
if (referenceCode > 0xffff) { | ||
referenceCode -= 0x10000 | ||
output += fromCharCode((referenceCode >>> (10 & 0x3ff)) | 0xd800) | ||
referenceCode = 0xdc00 | (referenceCode & 0x3ff) | ||
} | ||
reference = output + fromCharCode(referenceCode) | ||
} | ||
} | ||
// Found it! | ||
// First eat the queued characters as normal text, then eat a reference. | ||
if (reference) { | ||
flush() | ||
previous = now() | ||
index = end - 1 | ||
column += end - start + 1 | ||
result.push(reference) | ||
next = now() | ||
next.offset++ | ||
if (options.reference) { | ||
options.reference.call( | ||
options.referenceContext, | ||
reference, | ||
{start: previous, end: next}, | ||
value.slice(start - 1, end) | ||
) | ||
} | ||
previous = next | ||
} else { | ||
// If we could not find a reference, queue the checked characters (as | ||
// normal characters), and move the pointer to their end. | ||
// This is possible because we can be certain neither newlines nor | ||
// ampersands are included. | ||
characters = value.slice(start - 1, end) | ||
queue += characters | ||
column += characters.length | ||
index = end - 1 | ||
} | ||
} else { | ||
// Handle anything other than an ampersand, including newlines and EOF. | ||
if (character === 10 /* `\n` */) { | ||
line++ | ||
lines++ | ||
column = 0 | ||
} | ||
if (Number.isNaN(character)) { | ||
flush() | ||
} else { | ||
queue += fromCharCode(character) | ||
column++ | ||
} | ||
} | ||
} | ||
// Return the reduced nodes. | ||
return result.join('') | ||
// Get current position. | ||
function now() { | ||
return { | ||
line, | ||
column, | ||
offset: index + ((pos && pos.offset) || 0) | ||
} | ||
} | ||
/** | ||
* Handle the warning. | ||
* | ||
* @param {number} code | ||
* @param {number} offset | ||
*/ | ||
function warning(code, offset) { | ||
/** @type {Point} */ | ||
var position | ||
if (options.warning) { | ||
position = now() | ||
position.column += offset | ||
position.offset += offset | ||
options.warning.call( | ||
options.warningContext, | ||
messages[code], | ||
position, | ||
code | ||
) | ||
} | ||
} | ||
/** | ||
* Flush `queue` (normal text). | ||
* Macro invoked before each reference and at the end of `value`. | ||
* Does nothing when `queue` is empty. | ||
*/ | ||
function flush() { | ||
if (queue) { | ||
result.push(queue) | ||
if (options.text) { | ||
options.text.call(options.textContext, queue, { | ||
start: previous, | ||
end: now() | ||
}) | ||
} | ||
queue = '' | ||
} | ||
} | ||
} | ||
/** | ||
* Check if `character` is outside the permissible unicode range. | ||
* | ||
* @param {number} code | ||
* @returns {boolean} | ||
*/ | ||
function prohibited(code) { | ||
return (code >= 0xd800 && code <= 0xdfff) || code > 0x10ffff | ||
} | ||
/** | ||
* Check if `character` is disallowed. | ||
* | ||
* @param {number} code | ||
* @returns {boolean} | ||
*/ | ||
function disallowed(code) { | ||
return ( | ||
(code >= 0x0001 && code <= 0x0008) || | ||
code === 0x000b || | ||
(code >= 0x000d && code <= 0x001f) || | ||
(code >= 0x007f && code <= 0x009f) || | ||
(code >= 0xfdd0 && code <= 0xfdef) || | ||
(code & 0xffff) === 0xffff || | ||
(code & 0xffff) === 0xfffe | ||
) | ||
} | ||
export {parseEntities} from './lib/index.js' | ||
export {decodeEntity} from './lib/decode-entity.js' |
{ | ||
"name": "parse-entities", | ||
"version": "3.0.0", | ||
"description": "Parse HTML character references: fast, spec-compliant, positional information", | ||
"version": "3.1.0", | ||
"description": "Parse HTML character references", | ||
"license": "MIT", | ||
@@ -27,20 +27,20 @@ "keywords": [ | ||
"main": "index.js", | ||
"types": "types/index.d.ts", | ||
"types": "index.d.ts", | ||
"browser": { | ||
"./decode-entity.js": "./decode-entity.browser.js" | ||
"./lib/decode-entity.js": "./lib/decode-entity.browser.js" | ||
}, | ||
"react-native": { | ||
"./decode-entity.js": "./decode-entity.js" | ||
"./lib/decode-entity.js": "./lib/decode-entity.js" | ||
}, | ||
"files": [ | ||
"index.js", | ||
"index.d.ts", | ||
"lib/", | ||
"decode-entity.js", | ||
"decode-entity.d.ts", | ||
"decode-entity.browser.js", | ||
"decode-entity.browser.d.ts" | ||
"index.js", | ||
"index.d.ts" | ||
], | ||
"dependencies": { | ||
"@types/unist": "^2.0.0", | ||
"character-entities": "^2.0.0", | ||
"character-entities-legacy": "^2.0.0", | ||
"character-entities-legacy": "^3.0.0", | ||
"character-reference-invalid": "^2.0.0", | ||
@@ -55,4 +55,4 @@ "is-alphanumerical": "^2.0.0", | ||
"prettier": "^2.0.0", | ||
"remark-cli": "^9.0.0", | ||
"remark-preset-wooorm": "^8.0.0", | ||
"remark-cli": "^10.0.0", | ||
"remark-preset-wooorm": "^9.0.0", | ||
"rimraf": "^3.0.0", | ||
@@ -62,10 +62,10 @@ "tape": "^5.0.0", | ||
"typescript": "^4.0.0", | ||
"xo": "^0.38.0" | ||
"xo": "^0.45.0" | ||
}, | ||
"scripts": { | ||
"prepack": "npm run build && npm run format", | ||
"build": "rimraf \"*.d.ts\" && tsc && type-coverage", | ||
"prepublishOnly": "npm run build && npm run format", | ||
"build": "rimraf \"lib/**/*.d.ts\" \"{decode-entity,test}.d.ts\" && tsc && type-coverage", | ||
"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix", | ||
"test-api": "node test.js", | ||
"test-coverage": "c8 --check-coverage --branches 100 --functions 100 --lines 100 --statements 100 --reporter lcov node test.js", | ||
"test-api": "node --conditions development test.js", | ||
"test-coverage": "c8 --check-coverage --branches 100 --functions 100 --lines 100 --statements 100 --reporter lcov npm run test-api", | ||
"test": "npm run build && npm run format && npm run test-coverage" | ||
@@ -86,4 +86,4 @@ }, | ||
"max-depth": "off", | ||
"no-var": "off", | ||
"prefer-arrow-callback": "off" | ||
"no-bitwise": "off", | ||
"unicorn/numeric-separators-style": "off" | ||
} | ||
@@ -99,4 +99,5 @@ }, | ||
"detail": true, | ||
"strict": true | ||
"strict": true, | ||
"ignoreCatch": true | ||
} | ||
} |
215
readme.md
@@ -8,11 +8,37 @@ # parse-entities | ||
Parse HTML character references: fast, spec-compliant, positional information. | ||
Parse HTML character references. | ||
## Contents | ||
* [What is this?](#what-is-this) | ||
* [When should I use this?](#when-should-i-use-this) | ||
* [Install](#install) | ||
* [Use](#use) | ||
* [API](#api) | ||
* [`parseEntities(value[, options])`](#parseentitiesvalue-options) | ||
* [`decodeEntity(value)`](#decodeentityvalue) | ||
* [Types](#types) | ||
* [Compatibility](#compatibility) | ||
* [Security](#security) | ||
* [Related](#related) | ||
* [Contribute](#contribute) | ||
* [License](#license) | ||
## What is this? | ||
This is a small and powerful decoder of HTML character references (often called | ||
entities). | ||
## When should I use this? | ||
You can use this for spec-compliant decoding of character references. | ||
Itβs small and fast enough to do that well. | ||
You can also use this when making a linter, because there are different warnings | ||
emitted with reasons for why and positional info on where they happened. | ||
## Install | ||
This package is ESM only: Node 12+ is needed to use it and it must be `import`ed | ||
instead of `require`d. | ||
This package is [ESM only][esm]. | ||
In Node.js (version 12.20+, 14.14+, or 16.0+), install with [npm][]: | ||
[npm][]: | ||
```sh | ||
@@ -22,2 +48,16 @@ npm install parse-entities | ||
In Deno with [Skypack][]: | ||
```js | ||
import {parseEntities} from 'https://cdn.skypack.dev/parse-entities@3?dts' | ||
``` | ||
In browsers with [Skypack][]: | ||
```html | ||
<script type="module"> | ||
import {parseEntities} from 'https://cdn.skypack.dev/parse-entities@3?min' | ||
</script> | ||
``` | ||
## Use | ||
@@ -28,9 +68,9 @@ | ||
parseEntities('alpha & bravo') | ||
console.log(parseEntities('alpha & bravo'))) | ||
// => alpha & bravo | ||
parseEntities('charlie ©cat; delta') | ||
console.log(parseEntities('charlie ©cat; delta')) | ||
// => charlie Β©cat; delta | ||
parseEntities('echo © foxtrot ≠ golf 𝌆 hotel') | ||
console.log(parseEntities('echo © foxtrot ≠ golf 𝌆 hotel')) | ||
// => echo Β© foxtrot β golf π hotel | ||
@@ -41,7 +81,13 @@ ``` | ||
This package exports the following identifiers: `parseEntities`. | ||
This package exports the following identifiers: `parseEntities`, `decodeEntity`. | ||
There is no default export. | ||
## `parseEntities(value[, options])` | ||
### `parseEntities(value[, options])` | ||
Parse HTML character references. | ||
##### `options` | ||
Configuration (optional). | ||
###### `options.additional` | ||
@@ -55,9 +101,20 @@ | ||
Whether to parse `value` as an attribute value (`boolean?`, default: `false`). | ||
This results in slightly different behavior. | ||
###### `options.nonTerminated` | ||
Whether to allow non-terminated entities (`boolean`, default: `true`). | ||
Whether to allow nonterminated references (`boolean`, default: `true`). | ||
For example, `©cat` for `Β©cat`. | ||
This behavior is spec-compliant but can lead to unexpected results. | ||
This behavior is compliant to the spec but can lead to unexpected results. | ||
###### `options.position` | ||
Starting `position` of `value` (`Position` or `Point`, optional). | ||
Useful when dealing with values nested in some sort of syntax tree. | ||
The default is: | ||
```js | ||
{line: 1, column: 1, offset: 0} | ||
``` | ||
###### `options.warning` | ||
@@ -77,48 +134,27 @@ | ||
Context used when invoking `warning` (`'*'`, optional). | ||
Context used when calling `warning` (`'*'`, optional). | ||
###### `options.textContext` | ||
Context used when invoking `text` (`'*'`, optional). | ||
Context used when calling `text` (`'*'`, optional). | ||
###### `options.referenceContext` | ||
Context used when invoking `reference` (`'*'`, optional) | ||
Context used when calling `reference` (`'*'`, optional) | ||
###### `options.position` | ||
Starting `position` of `value` (`Position` or `Point`, optional). | ||
Useful when dealing with values nested in some sort of syntax tree. | ||
The default is: | ||
```js | ||
{line: 1, column: 1, offset: 0} | ||
``` | ||
##### Returns | ||
`string` β Decoded `value`. | ||
`string` β decoded `value`. | ||
### `function warning(reason, point, code)` | ||
#### `function warning(reason, point, code)` | ||
Error handler. | ||
##### Context | ||
###### Parameters | ||
`this` refers to `warningContext` when given to `parseEntities`. | ||
* `this` (`*`) β refers to `warningContext` when given to `parseEntities` | ||
* `reason` (`string`) β human readable reason for emitting a parse error | ||
* `point` ([`Point`][point]) β place where the error occurred | ||
* `code` (`number`) β machine readable code the error | ||
##### Parameters | ||
###### `reason` | ||
Human-readable reason the error (`string`). | ||
###### `point` | ||
Place at which the parse error occurred (`Point`). | ||
###### `code` | ||
Machine-readable code for the error (`number`). | ||
The following codes are used: | ||
@@ -130,61 +166,78 @@ | ||
| `2` | `foo { bar` | Missing semicolon (numeric) | | ||
| `3` | `Foo &bar baz` | Ampersand did not start a reference | | ||
| `4` | `Foo &#` | Empty reference | | ||
| `5` | `Foo &bar; baz` | Unknown entity | | ||
| `3` | `Foo &bar baz` | Empty (named) | | ||
| `4` | `Foo &#` | Empty (numeric) | | ||
| `5` | `Foo &bar; baz` | Unknown (named) | | ||
| `6` | `Foo € baz` | [Disallowed reference][invalid] | | ||
| `7` | `Foo � baz` | Prohibited: outside permissible unicode range | | ||
### `function text(value, position)` | ||
#### `function text(value, position)` | ||
Text handler. | ||
##### Context | ||
###### Parameters | ||
`this` refers to `textContext` when given to `parseEntities`. | ||
* `this` (`*`) β refers to `textContext` when given to `parseEntities` | ||
* `value` (`string`) β string of content | ||
* `position` ([`Position`][position]) β place where `value` starts and ends | ||
##### Parameters | ||
#### `function reference(value, position, source)` | ||
###### `value` | ||
Character reference handler. | ||
String of content (`string`). | ||
###### Parameters | ||
###### `position` | ||
* `this` (`*`) β refers to `referenceContext` when given to `parseEntities` | ||
* `value` (`string`) β decoded character reference | ||
* `position` ([`Position`][position]) β place where `source` starts and ends | ||
* `source` (`string`) β raw source of character reference | ||
Location at which `value` starts and ends (`Position`). | ||
### `decodeEntity(value)` | ||
### `function reference(value, position, source)` | ||
Decode a single character reference (without the `&` or `;`). | ||
You probably only need this when youβre building parsers yourself that follow | ||
different rules compared to HTML. | ||
This is optimized to be tiny in browsers. | ||
Character reference handler. | ||
###### Parameters | ||
##### Context | ||
* `value` (`string`) β `notin` (named), `#123` (deci), `#x123` (hexa) | ||
`this` refers to `referenceContext` when given to `parseEntities`. | ||
###### Returns | ||
##### Parameters | ||
`string` or `false` β decoded reference | ||
###### `value` | ||
## Types | ||
Encoded character reference (`string`). | ||
This package is fully typed with [TypeScript][]. | ||
Additional `Options`, `WarningHandler`, `ReferenceHandler`, and `TextHandler` | ||
types are exported that model their respective values. | ||
###### `position` | ||
## Compatibility | ||
Location at which `value` starts and ends (`Position`). | ||
This package is at least compatible with all maintained versions of Node.js. | ||
As of now, that is Node.js 12.20+, 14.14+, and 16.0+. | ||
It also works in Deno and modern browsers. | ||
###### `source` | ||
## Security | ||
Source of character reference (`string`). | ||
This package is safe: it matches the HTML spec to parse character references. | ||
## Related | ||
* [`stringify-entities`](https://github.com/wooorm/stringify-entities) | ||
β Encode HTML character references | ||
* [`character-entities`](https://github.com/wooorm/character-entities) | ||
β Info on character entities | ||
* [`character-entities-html4`](https://github.com/wooorm/character-entities-html4) | ||
β Info on HTML4 character entities | ||
* [`character-entities-legacy`](https://github.com/wooorm/character-entities-legacy) | ||
β Info on legacy character entities | ||
* [`character-reference-invalid`](https://github.com/wooorm/character-reference-invalid) | ||
β Info on invalid numeric character references | ||
* [`wooorm/stringify-entities`](https://github.com/wooorm/stringify-entities) | ||
β encode HTML character references | ||
* [`wooorm/character-entities`](https://github.com/wooorm/character-entities) | ||
β info on character references | ||
* [`wooorm/character-entities-html4`](https://github.com/wooorm/character-entities-html4) | ||
β info on HTML4 character references | ||
* [`wooorm/character-entities-legacy`](https://github.com/wooorm/character-entities-legacy) | ||
β info on legacy character references | ||
* [`wooorm/character-reference-invalid`](https://github.com/wooorm/character-reference-invalid) | ||
β info on invalid numeric character references | ||
## Contribute | ||
Yes please! | ||
See [How to Contribute to Open Source][contribute]. | ||
## License | ||
@@ -214,2 +267,4 @@ | ||
[skypack]: https://www.skypack.dev | ||
[license]: license | ||
@@ -219,2 +274,6 @@ | ||
[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c | ||
[typescript]: https://www.typescriptlang.org | ||
[warning]: #function-warningreason-point-code | ||
@@ -227,1 +286,7 @@ | ||
[invalid]: https://github.com/wooorm/character-reference-invalid | ||
[point]: https://github.com/syntax-tree/unist#point | ||
[position]: https://github.com/syntax-tree/unist#position | ||
[contribute]: https://opensource.guide/how-to-contribute/ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
28975
13
283
7
553
+ Added@types/unist@^2.0.0
+ Added@types/unist@2.0.11(transitive)
+ Addedcharacter-entities-legacy@3.0.0(transitive)
- Removedcharacter-entities-legacy@2.0.0(transitive)