Socket
Socket
Sign inDemoInstall

parse-entities

Package Overview
Dependencies
Maintainers
1
Versions
14
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

parse-entities - npm Package Compare versions

Comparing version 1.0.2 to 1.1.0

5

history.md

@@ -5,2 +5,7 @@ <!--remark setext-->

1.1.0 / 2016-07-31
==================
* Add new `nonTerminated` setting ([`eed693c`](https://github.com/wooorm/parse-entities/commit/eed693c))
1.0.2 / 2015-12-29

@@ -7,0 +12,0 @@ ==================

969

index.js

@@ -12,24 +12,48 @@ /**

/* eslint-env commonjs */
/*
* Dependencies.
*/
/* Dependencies. */
var has = require('has');
var characterEntities = require('character-entities');
var legacy = require('character-entities-legacy');
var invalid = require('character-reference-invalid');
var decimal = require('is-decimal');
var hexadecimal = require('is-hexadecimal');
var alphanumerical = require('is-alphanumerical');
/*
* Methods.
*/
/* Expose. */
module.exports = wrapper;
/* Methods. */
var fromCharCode = String.fromCharCode;
var has = Object.prototype.hasOwnProperty;
var noop = Function.prototype;
/*
* Reference types.
*/
/* Characters. */
var REPLACEMENT = '\uFFFD';
var FORM_FEED = '\f';
var AMPERSAND = '&';
var OCTOTHORP = '#';
var SEMICOLON = ';';
var NEWLINE = '\n';
var X_LOWER = 'x';
var X_UPPER = 'X';
var SPACE = ' ';
var LESS_THAN = '<';
var EQUAL = '=';
var EMPTY = '';
var TAB = '\t';
/* Default settings. */
var defaults = {
warning: null,
reference: null,
text: null,
warningContext: null,
referenceContext: null,
textContext: null,
position: {},
additional: null,
attribute: false,
nonTerminated: true
};
/* Reference types. */
var NAMED = 'named';

@@ -39,6 +63,3 @@ var HEXADECIMAL = 'hexadecimal';

/*
* Map of bases.
*/
/* Map of bases. */
var BASE = {};

@@ -49,11 +70,13 @@

/*
* Warning messages.
*/
/* Map of types to tests. Each type of character reference
* accepts different characters. This test is used to
* detect whether a reference has ended (as the semicolon
* is not strictly needed). */
var TESTS = {};
var NUMERIC_REFERENCE = 'Numeric character references';
var NAMED_REFERENCE = 'Named character references';
var TERMINATED = ' must be terminated by a semicolon';
var VOID = ' cannot be empty';
TESTS[NAMED] = alphanumerical;
TESTS[DECIMAL] = decimal;
TESTS[HEXADECIMAL] = hexadecimal;
/* Warning messages. */
var NAMED_NOT_TERMINATED = 1;

@@ -67,2 +90,7 @@ var NUMERIC_NOT_TERMINATED = 2;

var NUMERIC_REFERENCE = 'Numeric character references';
var NAMED_REFERENCE = 'Named character references';
var TERMINATED = ' must be terminated by a semicolon';
var VOID = ' cannot be empty';
var MESSAGES = {};

@@ -79,149 +107,29 @@

/*
* Characters.
*/
var REPLACEMENT = '\uFFFD';
var FORM_FEED = '\f';
var AMPERSAND = '&';
var OCTOTHORP = '#';
var SEMICOLON = ';';
var NEWLINE = '\n';
var X_LOWER = 'x';
var X_UPPER = 'X';
var SPACE = ' ';
var LESS_THAN = '<';
var EQUAL = '=';
var EMPTY = '';
var TAB = '\t';
/**
* Get the character-code at the first indice in
* `character`.
* Wrap to ensure clean parameters are given to `parse`.
*
* @param {string} character - Value.
* @return {number} - Character-code at the first indice
* in `character`.
* @param {string} value - Value with entities.
* @param {Object?} [options] - Configuration.
*/
function charCode(character) {
return character.charCodeAt(0);
}
function wrapper(value, options) {
var settings = {};
var key;
/**
* Check whether `character` is a decimal.
*
* @param {string} character - Value.
* @return {boolean} - Whether `character` is a decimal.
*/
function isDecimal(character) {
var code = charCode(character);
if (!options) {
options = {};
}
return code >= 48 /* 0 */ && code <= 57 /* 9 */;
}
for (key in defaults) {
settings[key] = options[key] == null ? defaults[key] : options[key];
}
/**
* Check whether `character` is a hexadecimal.
*
* @param {string} character - Value.
* @return {boolean} - Whether `character` is a
* hexadecimal.
*/
function isHexadecimal(character) {
var code = charCode(character);
if (settings.position.indent || settings.position.start) {
settings.indent = settings.position.indent || [];
settings.position = settings.position.start;
}
return (code >= 48 /* 0 */ && code <= 57 /* 9 */) ||
(code >= 65 /* A */ && code <= 70 /* F */) ||
(code >= 97 /* a */ && code <= 102 /* f */);
return parse(value, settings);
}
/**
* Check whether `character` is an alphanumeric.
*
* @param {string} character - Value.
* @return {boolean} - Whether `character` is an
* alphanumeric.
*/
function isAlphanumeric(character) {
var code = charCode(character);
return (code >= 48 /* 0 */ && code <= 57 /* 9 */) ||
(code >= 65 /* A */ && code <= 90 /* Z */) ||
(code >= 97 /* a */ && code <= 122 /* z */);
}
/**
* Check whether `character` is outside the permissible
* unicode range.
*
* @param {number} characterCode - Value.
* @return {boolean} - Whether `character` is an
* outside the permissible unicode range.
*/
function isProhibited(characterCode) {
return (characterCode >= 0xD800 && characterCode <= 0xDFFF) ||
(characterCode > 0x10FFFF);
}
/**
* Check whether `character` is disallowed.
*
* @param {number} characterCode - Value.
* @return {boolean} - Whether `character` is disallowed.
*/
function isWarning(characterCode) {
return (characterCode >= 0x0001 && characterCode <= 0x0008) ||
(characterCode >= 0x000D && characterCode <= 0x001F) ||
(characterCode >= 0x007F && characterCode <= 0x009F) ||
(characterCode >= 0xFDD0 && characterCode <= 0xFDEF) ||
characterCode === 0x000B ||
characterCode === 0xFFFE ||
characterCode === 0xFFFF ||
characterCode === 0x1FFFE ||
characterCode === 0x1FFFF ||
characterCode === 0x2FFFE ||
characterCode === 0x2FFFF ||
characterCode === 0x3FFFE ||
characterCode === 0x3FFFF ||
characterCode === 0x4FFFE ||
characterCode === 0x4FFFF ||
characterCode === 0x5FFFE ||
characterCode === 0x5FFFF ||
characterCode === 0x6FFFE ||
characterCode === 0x6FFFF ||
characterCode === 0x7FFFE ||
characterCode === 0x7FFFF ||
characterCode === 0x8FFFE ||
characterCode === 0x8FFFF ||
characterCode === 0x9FFFE ||
characterCode === 0x9FFFF ||
characterCode === 0xAFFFE ||
characterCode === 0xAFFFF ||
characterCode === 0xBFFFE ||
characterCode === 0xBFFFF ||
characterCode === 0xCFFFE ||
characterCode === 0xCFFFF ||
characterCode === 0xDFFFE ||
characterCode === 0xDFFFF ||
characterCode === 0xEFFFE ||
characterCode === 0xEFFFF ||
characterCode === 0xFFFFE ||
characterCode === 0xFFFFF ||
characterCode === 0x10FFFE ||
characterCode === 0x10FFFF;
}
/*
* Map of types to tests. Each type of character reference
* accepts different characters. This test is used to
* detect whether a reference has ended (as the semicolon
* is not strictly needed).
*/
var TESTS = {};
TESTS[NAMED] = isAlphanumeric;
TESTS[DECIMAL] = isDecimal;
TESTS[HEXADECIMAL] = isHexadecimal;
/**
* Parse entities.

@@ -233,478 +141,385 @@ *

function parse(value, settings) {
var additional = settings.additional;
var handleText = settings.text;
var handleReference = settings.reference;
var handleWarning = settings.warning;
var textContext = settings.textContext;
var referenceContext = settings.referenceContext;
var warningContext = settings.warningContext;
var pos = settings.position;
var indent = settings.indent || [];
var length = value.length;
var index = 0;
var lines = -1;
var column = pos.column || 1;
var line = pos.line || 1;
var queue = EMPTY;
var result = [];
var entityCharacters;
var terminated;
var characters;
var character;
var reference;
var following;
var warning;
var reason;
var output;
var entity;
var begin;
var start;
var type;
var test;
var prev;
var next;
var diff;
var end;
var additional = settings.additional;
var nonTerminated = settings.nonTerminated;
var handleText = settings.text;
var handleReference = settings.reference;
var handleWarning = settings.warning;
var textContext = settings.textContext;
var referenceContext = settings.referenceContext;
var warningContext = settings.warningContext;
var pos = settings.position;
var indent = settings.indent || [];
var length = value.length;
var index = 0;
var lines = -1;
var column = pos.column || 1;
var line = pos.line || 1;
var queue = EMPTY;
var result = [];
var entityCharacters;
var terminated;
var characters;
var character;
var reference;
var following;
var warning;
var reason;
var output;
var entity;
var begin;
var start;
var type;
var test;
var prev;
var next;
var diff;
var end;
/**
* Get current position.
*
* @return {Object} - Positional information of a
* single point.
*/
function now() {
return {
'line': line,
'column': column,
'offset': index + (pos.offset || 0)
};
}
/* Cache the current point. */
prev = now();
/**
* “Throw” a parse-error: a warning.
*
* @param {number} code - Identifier of reason for
* failing.
* @param {number} offset - Offset in characters from
* the current position point at which the
* parse-error ocurred, cannot point past newlines.
*/
function parseError(code, offset) {
var position = now();
/* Wrap `handleWarning`. */
warning = handleWarning ? parseError : noop;
position.column += offset;
position.offset += offset;
/* Ensure the algorithm walks over the first character
* and the end (inclusive). */
index--;
length++;
handleWarning.call(warningContext, MESSAGES[code], position, code);
while (++index < length) {
/* If the previous character was a newline. */
if (character === NEWLINE) {
column = indent[lines] || 1;
}
/**
* Get character at position.
*
* @param {number} position - Indice of character in `value`.
* @return {string} - Character at `position` in
* `value`.
*/
function at(position) {
return value.charAt(position);
}
character = at(index);
/**
* Flush `queue` (normal text). Macro invoked before
* each entity and at the end of `value`.
*
* Does nothing when `queue` is empty.
*/
function flush() {
if (queue) {
result.push(queue);
/* Handle anything other than an ampersand,
* including newlines and EOF. */
if (character !== AMPERSAND) {
if (character === NEWLINE) {
line++;
lines++;
column = 0;
}
if (handleText) {
handleText.call(textContext, queue, {
'start': prev,
'end': now()
});
}
if (character) {
queue += character;
column++;
} else {
flush();
}
} else {
following = at(index + 1);
queue = EMPTY;
}
}
/* The behaviour depends on the identity of the next
* character. */
if (
following === TAB ||
following === NEWLINE ||
following === FORM_FEED ||
following === SPACE ||
following === LESS_THAN ||
following === AMPERSAND ||
following === EMPTY ||
(additional && following === additional)
) {
/* Not a character reference. No characters
* are consumed, and nothing is returned.
* This is not an error, either. */
queue += character;
column++;
/*
* Cache the current point.
*/
continue;
}
prev = now();
start = begin = end = index + 1;
/*
* Wrap `handleWarning`.
*/
/* Numerical entity. */
if (following !== OCTOTHORP) {
type = NAMED;
} else {
end = ++begin;
warning = handleWarning ? parseError : noop;
/* The behaviour further depends on the
* character after the U+0023 NUMBER SIGN. */
following = at(end);
/*
* Ensure the algorithm walks over the first character
* and the end (inclusive).
*/
if (following === X_LOWER || following === X_UPPER) {
/* ASCII hex digits. */
type = HEXADECIMAL;
end = ++begin;
} else {
/* ASCII digits. */
type = DECIMAL;
}
}
index--;
length++;
entityCharacters = entity = characters = EMPTY;
test = TESTS[type];
end--;
while (++index < length) {
/*
* If the previous character was a newline.
*/
while (++end < length) {
following = at(end);
if (character === NEWLINE) {
column = indent[lines] || 1;
if (!test(following)) {
break;
}
character = at(index);
characters += following;
/*
* Handle anything other than an ampersand,
* including newlines and EOF.
*/
/* Check if we can match a legacy named
* reference. If so, we cache that as the
* last viable named reference. This
* ensures we do not need to walk backwards
* later. */
if (type === NAMED && has(legacy, characters)) {
entityCharacters = characters;
entity = legacy[characters];
}
}
if (character !== AMPERSAND) {
if (character === NEWLINE) {
line++;
lines++;
column = 0;
}
terminated = at(end) === SEMICOLON;
if (character) {
queue += character;
column++;
} else {
flush();
}
} else {
following = at(index + 1);
if (terminated) {
end++;
/*
* The behaviour depends on the identity of the next character.
*/
if (type === NAMED && has(characterEntities, characters)) {
entityCharacters = characters;
entity = characterEntities[characters];
}
}
if (
following === TAB ||
following === NEWLINE ||
following === FORM_FEED ||
following === SPACE ||
following === LESS_THAN ||
following === AMPERSAND ||
following === EMPTY ||
(additional && following === additional)
) {
/*
* Not a character reference. No characters
* are consumed, and nothing is returned.
* This is not an error, either.
*/
diff = 1 + end - start;
queue += character;
column++;
if (!terminated && !nonTerminated) {
/* Empty. */
} else if (!characters) {
/* An empty (possible) entity is valid, unless
* its numeric (thus an ampersand followed by
* an octothorp). */
if (type !== NAMED) {
warning(NUMERIC_EMPTY, diff);
}
} else if (type === NAMED) {
/* An ampersand followed by anything
* unknown, and not terminated, is invalid. */
if (terminated && !entity) {
warning(NAMED_UNKNOWN, 1);
} else {
/* If theres something after an entity
* name which is not known, cap the
* reference. */
if (entityCharacters !== characters) {
end = begin + entityCharacters.length;
diff = 1 + end - begin;
terminated = false;
}
continue;
}
/* If the reference is not terminated,
* warn. */
if (!terminated) {
reason = entityCharacters ?
NAMED_NOT_TERMINATED :
NAMED_EMPTY;
start = begin = end = index + 1;
/*
* Numerical entity.
*/
if (following !== OCTOTHORP) {
type = NAMED;
if (!settings.attribute) {
warning(reason, diff);
} else {
end = ++begin;
following = at(end);
/*
* The behaviour further depends on the
* character after the U+0023 NUMBER SIGN.
*/
following = at(end);
if (following === X_LOWER || following === X_UPPER) {
/*
* ASCII hex digits.
*/
type = HEXADECIMAL;
end = ++begin;
} else {
/*
* ASCII digits.
*/
type = DECIMAL;
}
if (following === EQUAL) {
warning(reason, diff);
entity = null;
} else if (alphanumerical(following)) {
entity = null;
} else {
warning(reason, diff);
}
}
}
}
entityCharacters = entity = characters = EMPTY;
test = TESTS[type];
end--;
reference = entity;
} else {
if (!terminated) {
/* All non-terminated numeric entities are
* not rendered, and trigger a warning. */
warning(NUMERIC_NOT_TERMINATED, diff);
}
while (++end < length) {
following = at(end);
/* When terminated and number, parse as
* either hexadecimal or decimal. */
reference = parseInt(characters, BASE[type]);
if (!test(following)) {
break;
}
/* Trigger a warning when the parsed number
* is prohibited, and replace with
* replacement character. */
if (isProhibited(reference)) {
warning(NUMERIC_PROHIBITED, diff);
characters += following;
reference = REPLACEMENT;
} else if (reference in invalid) {
/* Trigger a warning when the parsed number
* is disallowed, and replace by an
* alternative. */
warning(NUMERIC_DISALLOWED, diff);
/*
* Check if we can match a legacy named
* reference. If so, we cache that as the
* last viable named reference. This
* ensures we do not need to walk backwards
* later.
*/
reference = invalid[reference];
} else {
/* Parse the number. */
output = EMPTY;
if (
type === NAMED &&
has.call(legacy, characters)
) {
entityCharacters = characters;
entity = legacy[characters];
}
}
/* Trigger a warning when the parsed
* number should not be used. */
if (isWarning(reference)) {
warning(NUMERIC_DISALLOWED, diff);
}
terminated = at(end) === SEMICOLON;
/* Stringify the number. */
if (reference > 0xFFFF) {
reference -= 0x10000;
output += fromCharCode((reference >>> (10 & 0x3FF)) | 0xD800);
reference = 0xDC00 | (reference & 0x3FF);
}
if (terminated) {
end++;
reference = output + fromCharCode(reference);
}
}
if (
type === NAMED &&
has.call(characterEntities, characters)
) {
entityCharacters = characters;
entity = characterEntities[characters];
}
}
/* If we could not find a reference, queue the
* checked characters (as normal characters),
* and move the pointer to their end. This is
* possible because we can be certain neither
* newlines nor ampersands are included. */
if (!reference) {
characters = value.slice(start - 1, end);
queue += characters;
column += characters.length;
index = end - 1;
} else {
/* Found it! First eat the queued
* characters as normal text, then eat
* an entity. */
flush();
diff = 1 + end - start;
prev = now();
index = end - 1;
column += end - start + 1;
result.push(reference);
next = now();
next.offset++;
if (!characters) {
/*
* An empty (possible) entity is valid, unless
* its numeric (thus an ampersand followed by
* an octothorp).
*/
if (handleReference) {
handleReference.call(referenceContext, reference, {
start: prev,
end: next
}, value.slice(start - 1, end));
}
if (type !== NAMED) {
warning(NUMERIC_EMPTY, diff);
}
} else if (type === NAMED) {
/*
* An ampersand followed by anything
* unknown, and not terminated, is invalid.
*/
prev = next;
}
}
}
if (terminated && !entity) {
warning(NAMED_UNKNOWN, 1);
} else {
/*
* If theres something after an entity
* name which is not known, cap the
* reference.
*/
/* Return the reduced nodes, and any possible warnings. */
return result.join(EMPTY);
if (entityCharacters !== characters) {
end = begin + entityCharacters.length;
diff = 1 + end - begin;
terminated = false;
}
/**
* Get current position.
*
* @return {Object} - Positional information of a
* single point.
*/
function now() {
return {
line: line,
column: column,
offset: index + (pos.offset || 0)
};
}
/*
* If the reference is not terminated,
* warn.
*/
/**
* “Throw” a parse-error: a warning.
*
* @param {number} code - Identifier of reason for
* failing.
* @param {number} offset - Offset in characters from
* the current position point at which the
* parse-error ocurred, cannot point past newlines.
*/
function parseError(code, offset) {
var position = now();
if (!terminated) {
reason = entityCharacters ?
NAMED_NOT_TERMINATED :
NAMED_EMPTY;
position.column += offset;
position.offset += offset;
if (!settings.attribute) {
warning(reason, diff);
} else {
following = at(end);
handleWarning.call(warningContext, MESSAGES[code], position, code);
}
if (following === EQUAL) {
warning(reason, diff);
entity = null;
} else if (isAlphanumeric(following)) {
entity = null;
} else {
warning(reason, diff);
}
}
}
}
/**
* Get character at position.
*
* @param {number} position - Indice of character in `value`.
* @return {string} - Character at `position` in
* `value`.
*/
function at(position) {
return value.charAt(position);
}
reference = entity;
} else {
if (!terminated) {
/*
* All non-terminated numeric entities are
* not rendered, and trigger a warning.
*/
/**
* Flush `queue` (normal text). Macro invoked before
* each entity and at the end of `value`.
*
* Does nothing when `queue` is empty.
*/
function flush() {
if (queue) {
result.push(queue);
warning(NUMERIC_NOT_TERMINATED, diff);
}
if (handleText) {
handleText.call(textContext, queue, {
start: prev,
end: now()
});
}
/*
* When terminated and number, parse as
* either hexadecimal or decimal.
*/
reference = parseInt(characters, BASE[type]);
/*
* Trigger a warning when the parsed number
* is prohibited, and replace with
* replacement character.
*/
if (isProhibited(reference)) {
warning(NUMERIC_PROHIBITED, diff);
reference = REPLACEMENT;
} else if (reference in invalid) {
/*
* Trigger a warning when the parsed number
* is disallowed, and replace by an
* alternative.
*/
warning(NUMERIC_DISALLOWED, diff);
reference = invalid[reference];
} else {
/*
* Parse the number.
*/
output = EMPTY;
/*
* Trigger a warning when the parsed
* number should not be used.
*/
if (isWarning(reference)) {
warning(NUMERIC_DISALLOWED, diff);
}
/*
* Stringify the number.
*/
if (reference > 0xFFFF) {
reference -= 0x10000;
output += fromCharCode(
reference >>> 10 & 0x3FF | 0xD800
);
reference = 0xDC00 | reference & 0x3FF;
}
reference = output + fromCharCode(reference);
}
}
/*
* If we could not find a reference, queue the
* checked characters (as normal characters),
* and move the pointer to their end. This is
* possible because we can be certain neither
* newlines nor ampersands are included.
*/
if (!reference) {
characters = value.slice(start - 1, end);
queue += characters;
column += characters.length;
index = end - 1;
} else {
/*
* Found it! First eat the queued
* characters as normal text, then eat
* an entity.
*/
flush();
prev = now();
index = end - 1;
column += end - start + 1;
result.push(reference);
next = now();
next.offset++;
if (handleReference) {
handleReference.call(referenceContext, reference, {
'start': prev,
'end': next
}, value.slice(start - 1, end));
}
prev = next;
}
}
queue = EMPTY;
}
/*
* Return the reduced nodes, and any possible warnings.
*/
return result.join(EMPTY);
}
}
var defaults = {
'warning': null,
'reference': null,
'text': null,
'warningContext': null,
'referenceContext': null,
'textContext': null,
'position': {},
'additional': null,
'attribute': false
};
/**
* Wrap to ensure clean parameters are given to `parse`.
* Check whether `character` is outside the permissible
* unicode range.
*
* @param {string} value - Value with entities.
* @param {Object?} [options] - Configuration.
* @param {number} code - Value.
* @return {boolean} - Whether `character` is an
* outside the permissible unicode range.
*/
function wrapper(value, options) {
var settings = {};
var key;
if (!options) {
options = {};
}
for (key in defaults) {
settings[key] = options[key] || defaults[key];
}
if (settings.position.indent || settings.position.start) {
settings.indent = settings.position.indent || [];
settings.position = settings.position.start;
}
return parse(value, settings);
function isProhibited(code) {
return (code >= 0xD800 && code <= 0xDFFF) || (code > 0x10FFFF);
}
/*
* Expose.
/**
* Check whether `character` is disallowed.
*
* @param {number} code - Value.
* @return {boolean} - Whether `character` is disallowed.
*/
function isWarning(code) {
if (
(code >= 0x0001 && code <= 0x0008) ||
code === 0x000B ||
(code >= 0x000D && code <= 0x001F) ||
(code >= 0x007F && code <= 0x009F) ||
(code >= 0xFDD0 && code <= 0xFDEF) ||
(code & 0xFFFF) === 0xFFFF ||
(code & 0xFFFF) === 0xFFFE
) {
return true;
}
module.exports = wrapper;
return false;
}
{
"name": "parse-entities",
"version": "1.0.2",
"version": "1.1.0",
"description": "Parse HTML character references: fast, spec-compliant, positional information",

@@ -15,4 +15,3 @@ "license": "MIT",

"files": [
"index.js",
"LICENSE"
"index.js"
],

@@ -22,24 +21,25 @@ "dependencies": {

"character-entities-legacy": "^1.0.0",
"character-reference-invalid": "^1.0.0"
"character-reference-invalid": "^1.0.0",
"has": "^1.0.1",
"is-alphanumerical": "^1.0.0",
"is-decimal": "^1.0.0",
"is-hexadecimal": "^1.0.0"
},
"repository": {
"type": "git",
"url": "https://github.com/wooorm/parse-entities.git"
},
"author": "Titus Wormer <tituswormer@gmail.com>",
"repository": "https://github.com/wooorm/parse-entities",
"bugs": "https://github.com/wooorm/parse-entities/issues",
"author": "Titus Wormer <tituswormer@gmail.com> (http://wooorm.com)",
"contributors": [
"Titus Wormer <tituswormer@gmail.com> (http://wooorm.com)"
],
"devDependencies": {
"browserify": "^12.0.0",
"eslint": "^1.0.0",
"browserify": "^13.0.0",
"esmangle": "^1.0.0",
"istanbul": "^0.4.0",
"jscs": "^2.0.0",
"jscs-jsdoc": "^1.0.0",
"remark": "^3.0.0",
"remark-comment-config": "^2.0.0",
"remark-github": "^2.0.0",
"remark-lint": "^2.0.0",
"remark-slug": "^3.0.0",
"remark-validate-links": "^2.0.0",
"remark-yaml-config": "^2.0.0",
"tape": "^4.2.0"
"nyc": "^7.1.0",
"remark-cli": "^1.0.0",
"remark-comment-config": "^4.0.0",
"remark-github": "^5.0.0",
"remark-lint": "^4.0.0",
"remark-validate-links": "^4.0.0",
"tape": "^4.2.0",
"xo": "^0.16.0"
},

@@ -51,9 +51,45 @@ "scripts": {

"build": "npm run build-md && npm run build-bundle && npm run build-mangle",
"lint-api": "eslint .",
"lint-style": "jscs --reporter inline .",
"lint": "npm run lint-api && npm run lint-style",
"lint": "xo",
"test-api": "node test.js",
"test-coverage": "istanbul cover test.js",
"test-coverage": "nyc --reporter lcov tape test.js",
"test": "npm run build && npm run lint && npm run test-coverage"
},
"nyc": {
"check-coverage": true,
"lines": 100,
"functions": 100,
"branches": 100
},
"xo": {
"space": true,
"rules": {
"guard-for-in": "off",
"no-negated-condition": "off",
"max-depth": "off",
"max-lines": "off",
"complexity": "off",
"no-eq-null": "off",
"eqeqeq": "off"
},
"ignores": [
"parse-entities.js",
"parse-entities.min.js"
]
},
"remarkConfig": {
"output": true,
"plugins": {
"lint": {
"heading-increment": false,
"list-item-spacing": false,
"no-duplicate-headings": false
},
"github": null,
"comment-config": null,
"validate-links": null
},
"settings": {
"bullet": "*"
}
}
}

@@ -1,8 +0,9 @@

# parse-entities [![Build Status](https://img.shields.io/travis/wooorm/parse-entities.svg?style=flat)](https://travis-ci.org/wooorm/parse-entities) [![Coverage Status](https://img.shields.io/codecov/c/github/wooorm/parse-entities.svg)](https://codecov.io/github/wooorm/parse-entities)
# parse-entities [![Build Status][build-badge]][build-status] [![Coverage Status][coverage-badge]][coverage-status]
Parse HTML character references: fast, spec-compliant, positional information.
Parse HTML character references: fast, spec-compliant, positional
information.
## Installation
[npm](https://docs.npmjs.com/cli/install):
[npm][]:

@@ -13,6 +14,2 @@ ```bash

**parse-entities** is also available for [duo](http://duojs.org/#getting-started),
and [bundled](https://github.com/wooorm/parse-entities/releases) for AMD,
CommonJS, and globals (uncompressed and compressed).
## Usage

@@ -35,69 +32,60 @@

## parseEntities(value\[, options])
## `parseEntities(value[, options])`
**Parameters**
###### `options`
* `value` (`string`)
— Value with entities to parse;
* `additional` (`string`, optional, default: `''`)
— Additional character to accept when following an ampersand (without
error);
* `attribute` (`boolean`, optional, default: `false`)
— Whether to parse `value` as an attribute value;
* `nonTerminated` (`boolean`, default: `true`)
— Whether to allow non-terminated entities, such as `&copycat` to
`©cat`. This behaviour is spec-compliant but can lead to unexpected
results;
* `warning` ([`Function`][warning], optional)
— Error handler;
* `text` ([`Function`][text], optional)
— Text handler;
* `reference` ([`Function`][reference],
optional) — Reference handler;
* `warningContext` (`'*'`, optional)
— Context used when invoking `warning`;
* `textContext` (`'*'`, optional)
— Context used when invoking `text`;
* `referenceContext` (`'*'`, optional)
— Context used when invoking `reference`;
* `position` (`Location` or `Position`, optional)
— Starting `position` of `value`, useful when dealing with values
nested in some sort of syntax tree. The default is:
* `options` (`Object`, optional):
```json
{
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"indent": []
}
```
* `additional` (`string`, optional, default: `''`)
— Additional character to accept when following an ampersand (without
error);
###### Returns
* `attribute` (`boolean`, optional, default: `false`)
— Whether to parse `value` as an attribute value;
* `position` (`Location` or `Position`, optional)
— Starting `position` of `value`, useful when dealing with values
nested in some sort of syntax tree. The default is:
```json
{
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"indent": []
}
```
* `warning` ([`Function`](#function-warningreason-position-code),
optional) — Error handler;
* `text` ([`Function`](#function-textvalue-location), optional)
— Text handler;
* `reference` ([`Function`](#function-referencevalue-location-source),
optional) — Reference handler;
* `warningContext` (`'*'`, optional)
— Context used when invoking `warning`;
* `textContext` (`'*'`, optional)
— Context used when invoking `text`;
* `referenceContext` (`'*'`, optional)
— Context used when invoking `reference`.
**Returns**
`string` — Decoded `value`.
### function warning(reason, position, code)
### `function warning(reason, position, code)`
Error handler.
**Context**: `this` refers to `warningContext` when given to `parseEntities`.
###### Context
**Parameters**
`this` refers to `warningContext` when given to `parseEntities`.
###### Parameters
* `reason` (`string`)
— Reason (human-readable) for triggering a parse error;
* `position` (`Position`)
— Place at which the parse error occurred;
* `code` (`number`)

@@ -108,31 +96,35 @@ — Identifier of reason for triggering a parse error.

| Code | Example | Note |
| ---- | ------------------ | ----------------------------------------------------------------------------- |
| `1` | `foo &amp bar` | Missing semicolon (named) |
| `2` | `foo &#123 bar` | Missing semicolon (numeric) |
| `3` | `Foo &bar baz` | Ampersand did not start a reference |
| `4` | `Foo &#` | Empty reference |
| `5` | `Foo &bar; baz` | Unknown entity |
| `6` | `Foo &#128; baz` | [Disallowed reference](https://github.com/wooorm/character-reference-invalid) |
| `7` | `Foo &#xD800; baz` | Prohibited: outside permissible unicode range |
| Code | Example | Note |
| ---- | ------------------ | --------------------------------------------- |
| `1` | `foo &amp bar` | Missing semicolon (named) |
| `2` | `foo &#123 bar` | Missing semicolon (numeric) |
| `3` | `Foo &bar baz` | Ampersand did not start a reference |
| `4` | `Foo &#` | Empty reference |
| `5` | `Foo &bar; baz` | Unknown entity |
| `6` | `Foo &#128; baz` | [Disallowed reference][invalid] |
| `7` | `Foo &#xD800; baz` | Prohibited: outside permissible unicode range |
### function text(value, location)
###### `function text(value, location)`
Text handler.
**Context**: `this` refers to `textContext` when given to `parseEntities`.
###### Context
**Parameters**
`this` refers to `textContext` when given to `parseEntities`.
###### Parameters
* `value` (`string`) — String of content;
* `location` (`Location`) — Location at which `value` starts and ends.
### function reference(value, location, source)
### `function reference(value, location, source)`
Character reference handler.
**Context**: `this` refers to `referenceContext` when given to `parseEntities`.
###### Context
**Parameters**
`this` refers to `referenceContext` when given to `parseEntities`.
###### Parameters
* `value` (`string`) — Encoded character reference;

@@ -144,2 +136,26 @@ * `location` (`Location`) — Location at which `value` starts and ends;

[MIT](LICENSE) © [Titus Wormer](http://wooorm.com)
[MIT][license] © [Titus Wormer][author]
<!-- Definitions -->
[build-badge]: https://img.shields.io/travis/wooorm/parse-entities.svg
[build-status]: https://travis-ci.org/wooorm/parse-entities
[coverage-badge]: https://img.shields.io/codecov/c/github/wooorm/parse-entities.svg
[coverage-status]: https://codecov.io/github/wooorm/parse-entities
[npm]: https://docs.npmjs.com/cli/install
[license]: LICENSE
[author]: http://wooorm.com
[warning]: #function-warningreason-position-code
[text]: #function-textvalue-location
[reference]: #function-referencevalue-location-source
[invalid]: https://github.com/wooorm/character-reference-invalid
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc