parse-entities
Advanced tools
Comparing version 1.0.2 to 1.1.0
@@ -5,2 +5,7 @@ <!--remark setext--> | ||
1.1.0 / 2016-07-31 | ||
================== | ||
* Add new `nonTerminated` setting ([`eed693c`](https://github.com/wooorm/parse-entities/commit/eed693c)) | ||
1.0.2 / 2015-12-29 | ||
@@ -7,0 +12,0 @@ ================== |
969
index.js
@@ -12,24 +12,48 @@ /** | ||
/* eslint-env commonjs */ | ||
/* | ||
* Dependencies. | ||
*/ | ||
/* Dependencies. */ | ||
var has = require('has'); | ||
var characterEntities = require('character-entities'); | ||
var legacy = require('character-entities-legacy'); | ||
var invalid = require('character-reference-invalid'); | ||
var decimal = require('is-decimal'); | ||
var hexadecimal = require('is-hexadecimal'); | ||
var alphanumerical = require('is-alphanumerical'); | ||
/* | ||
* Methods. | ||
*/ | ||
/* Expose. */ | ||
module.exports = wrapper; | ||
/* Methods. */ | ||
var fromCharCode = String.fromCharCode; | ||
var has = Object.prototype.hasOwnProperty; | ||
var noop = Function.prototype; | ||
/* | ||
* Reference types. | ||
*/ | ||
/* Characters. */ | ||
var REPLACEMENT = '\uFFFD'; | ||
var FORM_FEED = '\f'; | ||
var AMPERSAND = '&'; | ||
var OCTOTHORP = '#'; | ||
var SEMICOLON = ';'; | ||
var NEWLINE = '\n'; | ||
var X_LOWER = 'x'; | ||
var X_UPPER = 'X'; | ||
var SPACE = ' '; | ||
var LESS_THAN = '<'; | ||
var EQUAL = '='; | ||
var EMPTY = ''; | ||
var TAB = '\t'; | ||
/* Default settings. */ | ||
var defaults = { | ||
warning: null, | ||
reference: null, | ||
text: null, | ||
warningContext: null, | ||
referenceContext: null, | ||
textContext: null, | ||
position: {}, | ||
additional: null, | ||
attribute: false, | ||
nonTerminated: true | ||
}; | ||
/* Reference types. */ | ||
var NAMED = 'named'; | ||
@@ -39,6 +63,3 @@ var HEXADECIMAL = 'hexadecimal'; | ||
/* | ||
* Map of bases. | ||
*/ | ||
/* Map of bases. */ | ||
var BASE = {}; | ||
@@ -49,11 +70,13 @@ | ||
/* | ||
* Warning messages. | ||
*/ | ||
/* Map of types to tests. Each type of character reference | ||
* accepts different characters. This test is used to | ||
* detect whether a reference has ended (as the semicolon | ||
* is not strictly needed). */ | ||
var TESTS = {}; | ||
var NUMERIC_REFERENCE = 'Numeric character references'; | ||
var NAMED_REFERENCE = 'Named character references'; | ||
var TERMINATED = ' must be terminated by a semicolon'; | ||
var VOID = ' cannot be empty'; | ||
TESTS[NAMED] = alphanumerical; | ||
TESTS[DECIMAL] = decimal; | ||
TESTS[HEXADECIMAL] = hexadecimal; | ||
/* Warning messages. */ | ||
var NAMED_NOT_TERMINATED = 1; | ||
@@ -67,2 +90,7 @@ var NUMERIC_NOT_TERMINATED = 2; | ||
var NUMERIC_REFERENCE = 'Numeric character references'; | ||
var NAMED_REFERENCE = 'Named character references'; | ||
var TERMINATED = ' must be terminated by a semicolon'; | ||
var VOID = ' cannot be empty'; | ||
var MESSAGES = {}; | ||
@@ -79,149 +107,29 @@ | ||
/* | ||
* Characters. | ||
*/ | ||
var REPLACEMENT = '\uFFFD'; | ||
var FORM_FEED = '\f'; | ||
var AMPERSAND = '&'; | ||
var OCTOTHORP = '#'; | ||
var SEMICOLON = ';'; | ||
var NEWLINE = '\n'; | ||
var X_LOWER = 'x'; | ||
var X_UPPER = 'X'; | ||
var SPACE = ' '; | ||
var LESS_THAN = '<'; | ||
var EQUAL = '='; | ||
var EMPTY = ''; | ||
var TAB = '\t'; | ||
/** | ||
* Get the character-code at the first indice in | ||
* `character`. | ||
* Wrap to ensure clean parameters are given to `parse`. | ||
* | ||
* @param {string} character - Value. | ||
* @return {number} - Character-code at the first indice | ||
* in `character`. | ||
* @param {string} value - Value with entities. | ||
* @param {Object?} [options] - Configuration. | ||
*/ | ||
function charCode(character) { | ||
return character.charCodeAt(0); | ||
} | ||
function wrapper(value, options) { | ||
var settings = {}; | ||
var key; | ||
/** | ||
* Check whether `character` is a decimal. | ||
* | ||
* @param {string} character - Value. | ||
* @return {boolean} - Whether `character` is a decimal. | ||
*/ | ||
function isDecimal(character) { | ||
var code = charCode(character); | ||
if (!options) { | ||
options = {}; | ||
} | ||
return code >= 48 /* 0 */ && code <= 57 /* 9 */; | ||
} | ||
for (key in defaults) { | ||
settings[key] = options[key] == null ? defaults[key] : options[key]; | ||
} | ||
/** | ||
* Check whether `character` is a hexadecimal. | ||
* | ||
* @param {string} character - Value. | ||
* @return {boolean} - Whether `character` is a | ||
* hexadecimal. | ||
*/ | ||
function isHexadecimal(character) { | ||
var code = charCode(character); | ||
if (settings.position.indent || settings.position.start) { | ||
settings.indent = settings.position.indent || []; | ||
settings.position = settings.position.start; | ||
} | ||
return (code >= 48 /* 0 */ && code <= 57 /* 9 */) || | ||
(code >= 65 /* A */ && code <= 70 /* F */) || | ||
(code >= 97 /* a */ && code <= 102 /* f */); | ||
return parse(value, settings); | ||
} | ||
/** | ||
* Check whether `character` is an alphanumeric. | ||
* | ||
* @param {string} character - Value. | ||
* @return {boolean} - Whether `character` is an | ||
* alphanumeric. | ||
*/ | ||
function isAlphanumeric(character) { | ||
var code = charCode(character); | ||
return (code >= 48 /* 0 */ && code <= 57 /* 9 */) || | ||
(code >= 65 /* A */ && code <= 90 /* Z */) || | ||
(code >= 97 /* a */ && code <= 122 /* z */); | ||
} | ||
/** | ||
* Check whether `character` is outside the permissible | ||
* unicode range. | ||
* | ||
* @param {number} characterCode - Value. | ||
* @return {boolean} - Whether `character` is an | ||
* outside the permissible unicode range. | ||
*/ | ||
function isProhibited(characterCode) { | ||
return (characterCode >= 0xD800 && characterCode <= 0xDFFF) || | ||
(characterCode > 0x10FFFF); | ||
} | ||
/** | ||
* Check whether `character` is disallowed. | ||
* | ||
* @param {number} characterCode - Value. | ||
* @return {boolean} - Whether `character` is disallowed. | ||
*/ | ||
function isWarning(characterCode) { | ||
return (characterCode >= 0x0001 && characterCode <= 0x0008) || | ||
(characterCode >= 0x000D && characterCode <= 0x001F) || | ||
(characterCode >= 0x007F && characterCode <= 0x009F) || | ||
(characterCode >= 0xFDD0 && characterCode <= 0xFDEF) || | ||
characterCode === 0x000B || | ||
characterCode === 0xFFFE || | ||
characterCode === 0xFFFF || | ||
characterCode === 0x1FFFE || | ||
characterCode === 0x1FFFF || | ||
characterCode === 0x2FFFE || | ||
characterCode === 0x2FFFF || | ||
characterCode === 0x3FFFE || | ||
characterCode === 0x3FFFF || | ||
characterCode === 0x4FFFE || | ||
characterCode === 0x4FFFF || | ||
characterCode === 0x5FFFE || | ||
characterCode === 0x5FFFF || | ||
characterCode === 0x6FFFE || | ||
characterCode === 0x6FFFF || | ||
characterCode === 0x7FFFE || | ||
characterCode === 0x7FFFF || | ||
characterCode === 0x8FFFE || | ||
characterCode === 0x8FFFF || | ||
characterCode === 0x9FFFE || | ||
characterCode === 0x9FFFF || | ||
characterCode === 0xAFFFE || | ||
characterCode === 0xAFFFF || | ||
characterCode === 0xBFFFE || | ||
characterCode === 0xBFFFF || | ||
characterCode === 0xCFFFE || | ||
characterCode === 0xCFFFF || | ||
characterCode === 0xDFFFE || | ||
characterCode === 0xDFFFF || | ||
characterCode === 0xEFFFE || | ||
characterCode === 0xEFFFF || | ||
characterCode === 0xFFFFE || | ||
characterCode === 0xFFFFF || | ||
characterCode === 0x10FFFE || | ||
characterCode === 0x10FFFF; | ||
} | ||
/* | ||
* Map of types to tests. Each type of character reference | ||
* accepts different characters. This test is used to | ||
* detect whether a reference has ended (as the semicolon | ||
* is not strictly needed). | ||
*/ | ||
var TESTS = {}; | ||
TESTS[NAMED] = isAlphanumeric; | ||
TESTS[DECIMAL] = isDecimal; | ||
TESTS[HEXADECIMAL] = isHexadecimal; | ||
/** | ||
* Parse entities. | ||
@@ -233,478 +141,385 @@ * | ||
function parse(value, settings) { | ||
var additional = settings.additional; | ||
var handleText = settings.text; | ||
var handleReference = settings.reference; | ||
var handleWarning = settings.warning; | ||
var textContext = settings.textContext; | ||
var referenceContext = settings.referenceContext; | ||
var warningContext = settings.warningContext; | ||
var pos = settings.position; | ||
var indent = settings.indent || []; | ||
var length = value.length; | ||
var index = 0; | ||
var lines = -1; | ||
var column = pos.column || 1; | ||
var line = pos.line || 1; | ||
var queue = EMPTY; | ||
var result = []; | ||
var entityCharacters; | ||
var terminated; | ||
var characters; | ||
var character; | ||
var reference; | ||
var following; | ||
var warning; | ||
var reason; | ||
var output; | ||
var entity; | ||
var begin; | ||
var start; | ||
var type; | ||
var test; | ||
var prev; | ||
var next; | ||
var diff; | ||
var end; | ||
var additional = settings.additional; | ||
var nonTerminated = settings.nonTerminated; | ||
var handleText = settings.text; | ||
var handleReference = settings.reference; | ||
var handleWarning = settings.warning; | ||
var textContext = settings.textContext; | ||
var referenceContext = settings.referenceContext; | ||
var warningContext = settings.warningContext; | ||
var pos = settings.position; | ||
var indent = settings.indent || []; | ||
var length = value.length; | ||
var index = 0; | ||
var lines = -1; | ||
var column = pos.column || 1; | ||
var line = pos.line || 1; | ||
var queue = EMPTY; | ||
var result = []; | ||
var entityCharacters; | ||
var terminated; | ||
var characters; | ||
var character; | ||
var reference; | ||
var following; | ||
var warning; | ||
var reason; | ||
var output; | ||
var entity; | ||
var begin; | ||
var start; | ||
var type; | ||
var test; | ||
var prev; | ||
var next; | ||
var diff; | ||
var end; | ||
/** | ||
* Get current position. | ||
* | ||
* @return {Object} - Positional information of a | ||
* single point. | ||
*/ | ||
function now() { | ||
return { | ||
'line': line, | ||
'column': column, | ||
'offset': index + (pos.offset || 0) | ||
}; | ||
} | ||
/* Cache the current point. */ | ||
prev = now(); | ||
/** | ||
* “Throw” a parse-error: a warning. | ||
* | ||
* @param {number} code - Identifier of reason for | ||
* failing. | ||
* @param {number} offset - Offset in characters from | ||
* the current position point at which the | ||
* parse-error ocurred, cannot point past newlines. | ||
*/ | ||
function parseError(code, offset) { | ||
var position = now(); | ||
/* Wrap `handleWarning`. */ | ||
warning = handleWarning ? parseError : noop; | ||
position.column += offset; | ||
position.offset += offset; | ||
/* Ensure the algorithm walks over the first character | ||
* and the end (inclusive). */ | ||
index--; | ||
length++; | ||
handleWarning.call(warningContext, MESSAGES[code], position, code); | ||
while (++index < length) { | ||
/* If the previous character was a newline. */ | ||
if (character === NEWLINE) { | ||
column = indent[lines] || 1; | ||
} | ||
/** | ||
* Get character at position. | ||
* | ||
* @param {number} position - Indice of character in `value`. | ||
* @return {string} - Character at `position` in | ||
* `value`. | ||
*/ | ||
function at(position) { | ||
return value.charAt(position); | ||
} | ||
character = at(index); | ||
/** | ||
* Flush `queue` (normal text). Macro invoked before | ||
* each entity and at the end of `value`. | ||
* | ||
* Does nothing when `queue` is empty. | ||
*/ | ||
function flush() { | ||
if (queue) { | ||
result.push(queue); | ||
/* Handle anything other than an ampersand, | ||
* including newlines and EOF. */ | ||
if (character !== AMPERSAND) { | ||
if (character === NEWLINE) { | ||
line++; | ||
lines++; | ||
column = 0; | ||
} | ||
if (handleText) { | ||
handleText.call(textContext, queue, { | ||
'start': prev, | ||
'end': now() | ||
}); | ||
} | ||
if (character) { | ||
queue += character; | ||
column++; | ||
} else { | ||
flush(); | ||
} | ||
} else { | ||
following = at(index + 1); | ||
queue = EMPTY; | ||
} | ||
} | ||
/* The behaviour depends on the identity of the next | ||
* character. */ | ||
if ( | ||
following === TAB || | ||
following === NEWLINE || | ||
following === FORM_FEED || | ||
following === SPACE || | ||
following === LESS_THAN || | ||
following === AMPERSAND || | ||
following === EMPTY || | ||
(additional && following === additional) | ||
) { | ||
/* Not a character reference. No characters | ||
* are consumed, and nothing is returned. | ||
* This is not an error, either. */ | ||
queue += character; | ||
column++; | ||
/* | ||
* Cache the current point. | ||
*/ | ||
continue; | ||
} | ||
prev = now(); | ||
start = begin = end = index + 1; | ||
/* | ||
* Wrap `handleWarning`. | ||
*/ | ||
/* Numerical entity. */ | ||
if (following !== OCTOTHORP) { | ||
type = NAMED; | ||
} else { | ||
end = ++begin; | ||
warning = handleWarning ? parseError : noop; | ||
/* The behaviour further depends on the | ||
* character after the U+0023 NUMBER SIGN. */ | ||
following = at(end); | ||
/* | ||
* Ensure the algorithm walks over the first character | ||
* and the end (inclusive). | ||
*/ | ||
if (following === X_LOWER || following === X_UPPER) { | ||
/* ASCII hex digits. */ | ||
type = HEXADECIMAL; | ||
end = ++begin; | ||
} else { | ||
/* ASCII digits. */ | ||
type = DECIMAL; | ||
} | ||
} | ||
index--; | ||
length++; | ||
entityCharacters = entity = characters = EMPTY; | ||
test = TESTS[type]; | ||
end--; | ||
while (++index < length) { | ||
/* | ||
* If the previous character was a newline. | ||
*/ | ||
while (++end < length) { | ||
following = at(end); | ||
if (character === NEWLINE) { | ||
column = indent[lines] || 1; | ||
if (!test(following)) { | ||
break; | ||
} | ||
character = at(index); | ||
characters += following; | ||
/* | ||
* Handle anything other than an ampersand, | ||
* including newlines and EOF. | ||
*/ | ||
/* Check if we can match a legacy named | ||
* reference. If so, we cache that as the | ||
* last viable named reference. This | ||
* ensures we do not need to walk backwards | ||
* later. */ | ||
if (type === NAMED && has(legacy, characters)) { | ||
entityCharacters = characters; | ||
entity = legacy[characters]; | ||
} | ||
} | ||
if (character !== AMPERSAND) { | ||
if (character === NEWLINE) { | ||
line++; | ||
lines++; | ||
column = 0; | ||
} | ||
terminated = at(end) === SEMICOLON; | ||
if (character) { | ||
queue += character; | ||
column++; | ||
} else { | ||
flush(); | ||
} | ||
} else { | ||
following = at(index + 1); | ||
if (terminated) { | ||
end++; | ||
/* | ||
* The behaviour depends on the identity of the next character. | ||
*/ | ||
if (type === NAMED && has(characterEntities, characters)) { | ||
entityCharacters = characters; | ||
entity = characterEntities[characters]; | ||
} | ||
} | ||
if ( | ||
following === TAB || | ||
following === NEWLINE || | ||
following === FORM_FEED || | ||
following === SPACE || | ||
following === LESS_THAN || | ||
following === AMPERSAND || | ||
following === EMPTY || | ||
(additional && following === additional) | ||
) { | ||
/* | ||
* Not a character reference. No characters | ||
* are consumed, and nothing is returned. | ||
* This is not an error, either. | ||
*/ | ||
diff = 1 + end - start; | ||
queue += character; | ||
column++; | ||
if (!terminated && !nonTerminated) { | ||
/* Empty. */ | ||
} else if (!characters) { | ||
/* An empty (possible) entity is valid, unless | ||
* its numeric (thus an ampersand followed by | ||
* an octothorp). */ | ||
if (type !== NAMED) { | ||
warning(NUMERIC_EMPTY, diff); | ||
} | ||
} else if (type === NAMED) { | ||
/* An ampersand followed by anything | ||
* unknown, and not terminated, is invalid. */ | ||
if (terminated && !entity) { | ||
warning(NAMED_UNKNOWN, 1); | ||
} else { | ||
/* If theres something after an entity | ||
* name which is not known, cap the | ||
* reference. */ | ||
if (entityCharacters !== characters) { | ||
end = begin + entityCharacters.length; | ||
diff = 1 + end - begin; | ||
terminated = false; | ||
} | ||
continue; | ||
} | ||
/* If the reference is not terminated, | ||
* warn. */ | ||
if (!terminated) { | ||
reason = entityCharacters ? | ||
NAMED_NOT_TERMINATED : | ||
NAMED_EMPTY; | ||
start = begin = end = index + 1; | ||
/* | ||
* Numerical entity. | ||
*/ | ||
if (following !== OCTOTHORP) { | ||
type = NAMED; | ||
if (!settings.attribute) { | ||
warning(reason, diff); | ||
} else { | ||
end = ++begin; | ||
following = at(end); | ||
/* | ||
* The behaviour further depends on the | ||
* character after the U+0023 NUMBER SIGN. | ||
*/ | ||
following = at(end); | ||
if (following === X_LOWER || following === X_UPPER) { | ||
/* | ||
* ASCII hex digits. | ||
*/ | ||
type = HEXADECIMAL; | ||
end = ++begin; | ||
} else { | ||
/* | ||
* ASCII digits. | ||
*/ | ||
type = DECIMAL; | ||
} | ||
if (following === EQUAL) { | ||
warning(reason, diff); | ||
entity = null; | ||
} else if (alphanumerical(following)) { | ||
entity = null; | ||
} else { | ||
warning(reason, diff); | ||
} | ||
} | ||
} | ||
} | ||
entityCharacters = entity = characters = EMPTY; | ||
test = TESTS[type]; | ||
end--; | ||
reference = entity; | ||
} else { | ||
if (!terminated) { | ||
/* All non-terminated numeric entities are | ||
* not rendered, and trigger a warning. */ | ||
warning(NUMERIC_NOT_TERMINATED, diff); | ||
} | ||
while (++end < length) { | ||
following = at(end); | ||
/* When terminated and number, parse as | ||
* either hexadecimal or decimal. */ | ||
reference = parseInt(characters, BASE[type]); | ||
if (!test(following)) { | ||
break; | ||
} | ||
/* Trigger a warning when the parsed number | ||
* is prohibited, and replace with | ||
* replacement character. */ | ||
if (isProhibited(reference)) { | ||
warning(NUMERIC_PROHIBITED, diff); | ||
characters += following; | ||
reference = REPLACEMENT; | ||
} else if (reference in invalid) { | ||
/* Trigger a warning when the parsed number | ||
* is disallowed, and replace by an | ||
* alternative. */ | ||
warning(NUMERIC_DISALLOWED, diff); | ||
/* | ||
* Check if we can match a legacy named | ||
* reference. If so, we cache that as the | ||
* last viable named reference. This | ||
* ensures we do not need to walk backwards | ||
* later. | ||
*/ | ||
reference = invalid[reference]; | ||
} else { | ||
/* Parse the number. */ | ||
output = EMPTY; | ||
if ( | ||
type === NAMED && | ||
has.call(legacy, characters) | ||
) { | ||
entityCharacters = characters; | ||
entity = legacy[characters]; | ||
} | ||
} | ||
/* Trigger a warning when the parsed | ||
* number should not be used. */ | ||
if (isWarning(reference)) { | ||
warning(NUMERIC_DISALLOWED, diff); | ||
} | ||
terminated = at(end) === SEMICOLON; | ||
/* Stringify the number. */ | ||
if (reference > 0xFFFF) { | ||
reference -= 0x10000; | ||
output += fromCharCode((reference >>> (10 & 0x3FF)) | 0xD800); | ||
reference = 0xDC00 | (reference & 0x3FF); | ||
} | ||
if (terminated) { | ||
end++; | ||
reference = output + fromCharCode(reference); | ||
} | ||
} | ||
if ( | ||
type === NAMED && | ||
has.call(characterEntities, characters) | ||
) { | ||
entityCharacters = characters; | ||
entity = characterEntities[characters]; | ||
} | ||
} | ||
/* If we could not find a reference, queue the | ||
* checked characters (as normal characters), | ||
* and move the pointer to their end. This is | ||
* possible because we can be certain neither | ||
* newlines nor ampersands are included. */ | ||
if (!reference) { | ||
characters = value.slice(start - 1, end); | ||
queue += characters; | ||
column += characters.length; | ||
index = end - 1; | ||
} else { | ||
/* Found it! First eat the queued | ||
* characters as normal text, then eat | ||
* an entity. */ | ||
flush(); | ||
diff = 1 + end - start; | ||
prev = now(); | ||
index = end - 1; | ||
column += end - start + 1; | ||
result.push(reference); | ||
next = now(); | ||
next.offset++; | ||
if (!characters) { | ||
/* | ||
* An empty (possible) entity is valid, unless | ||
* its numeric (thus an ampersand followed by | ||
* an octothorp). | ||
*/ | ||
if (handleReference) { | ||
handleReference.call(referenceContext, reference, { | ||
start: prev, | ||
end: next | ||
}, value.slice(start - 1, end)); | ||
} | ||
if (type !== NAMED) { | ||
warning(NUMERIC_EMPTY, diff); | ||
} | ||
} else if (type === NAMED) { | ||
/* | ||
* An ampersand followed by anything | ||
* unknown, and not terminated, is invalid. | ||
*/ | ||
prev = next; | ||
} | ||
} | ||
} | ||
if (terminated && !entity) { | ||
warning(NAMED_UNKNOWN, 1); | ||
} else { | ||
/* | ||
* If theres something after an entity | ||
* name which is not known, cap the | ||
* reference. | ||
*/ | ||
/* Return the reduced nodes, and any possible warnings. */ | ||
return result.join(EMPTY); | ||
if (entityCharacters !== characters) { | ||
end = begin + entityCharacters.length; | ||
diff = 1 + end - begin; | ||
terminated = false; | ||
} | ||
/** | ||
* Get current position. | ||
* | ||
* @return {Object} - Positional information of a | ||
* single point. | ||
*/ | ||
function now() { | ||
return { | ||
line: line, | ||
column: column, | ||
offset: index + (pos.offset || 0) | ||
}; | ||
} | ||
/* | ||
* If the reference is not terminated, | ||
* warn. | ||
*/ | ||
/** | ||
* “Throw” a parse-error: a warning. | ||
* | ||
* @param {number} code - Identifier of reason for | ||
* failing. | ||
* @param {number} offset - Offset in characters from | ||
* the current position point at which the | ||
* parse-error ocurred, cannot point past newlines. | ||
*/ | ||
function parseError(code, offset) { | ||
var position = now(); | ||
if (!terminated) { | ||
reason = entityCharacters ? | ||
NAMED_NOT_TERMINATED : | ||
NAMED_EMPTY; | ||
position.column += offset; | ||
position.offset += offset; | ||
if (!settings.attribute) { | ||
warning(reason, diff); | ||
} else { | ||
following = at(end); | ||
handleWarning.call(warningContext, MESSAGES[code], position, code); | ||
} | ||
if (following === EQUAL) { | ||
warning(reason, diff); | ||
entity = null; | ||
} else if (isAlphanumeric(following)) { | ||
entity = null; | ||
} else { | ||
warning(reason, diff); | ||
} | ||
} | ||
} | ||
} | ||
/** | ||
* Get character at position. | ||
* | ||
* @param {number} position - Indice of character in `value`. | ||
* @return {string} - Character at `position` in | ||
* `value`. | ||
*/ | ||
function at(position) { | ||
return value.charAt(position); | ||
} | ||
reference = entity; | ||
} else { | ||
if (!terminated) { | ||
/* | ||
* All non-terminated numeric entities are | ||
* not rendered, and trigger a warning. | ||
*/ | ||
/** | ||
* Flush `queue` (normal text). Macro invoked before | ||
* each entity and at the end of `value`. | ||
* | ||
* Does nothing when `queue` is empty. | ||
*/ | ||
function flush() { | ||
if (queue) { | ||
result.push(queue); | ||
warning(NUMERIC_NOT_TERMINATED, diff); | ||
} | ||
if (handleText) { | ||
handleText.call(textContext, queue, { | ||
start: prev, | ||
end: now() | ||
}); | ||
} | ||
/* | ||
* When terminated and number, parse as | ||
* either hexadecimal or decimal. | ||
*/ | ||
reference = parseInt(characters, BASE[type]); | ||
/* | ||
* Trigger a warning when the parsed number | ||
* is prohibited, and replace with | ||
* replacement character. | ||
*/ | ||
if (isProhibited(reference)) { | ||
warning(NUMERIC_PROHIBITED, diff); | ||
reference = REPLACEMENT; | ||
} else if (reference in invalid) { | ||
/* | ||
* Trigger a warning when the parsed number | ||
* is disallowed, and replace by an | ||
* alternative. | ||
*/ | ||
warning(NUMERIC_DISALLOWED, diff); | ||
reference = invalid[reference]; | ||
} else { | ||
/* | ||
* Parse the number. | ||
*/ | ||
output = EMPTY; | ||
/* | ||
* Trigger a warning when the parsed | ||
* number should not be used. | ||
*/ | ||
if (isWarning(reference)) { | ||
warning(NUMERIC_DISALLOWED, diff); | ||
} | ||
/* | ||
* Stringify the number. | ||
*/ | ||
if (reference > 0xFFFF) { | ||
reference -= 0x10000; | ||
output += fromCharCode( | ||
reference >>> 10 & 0x3FF | 0xD800 | ||
); | ||
reference = 0xDC00 | reference & 0x3FF; | ||
} | ||
reference = output + fromCharCode(reference); | ||
} | ||
} | ||
/* | ||
* If we could not find a reference, queue the | ||
* checked characters (as normal characters), | ||
* and move the pointer to their end. This is | ||
* possible because we can be certain neither | ||
* newlines nor ampersands are included. | ||
*/ | ||
if (!reference) { | ||
characters = value.slice(start - 1, end); | ||
queue += characters; | ||
column += characters.length; | ||
index = end - 1; | ||
} else { | ||
/* | ||
* Found it! First eat the queued | ||
* characters as normal text, then eat | ||
* an entity. | ||
*/ | ||
flush(); | ||
prev = now(); | ||
index = end - 1; | ||
column += end - start + 1; | ||
result.push(reference); | ||
next = now(); | ||
next.offset++; | ||
if (handleReference) { | ||
handleReference.call(referenceContext, reference, { | ||
'start': prev, | ||
'end': next | ||
}, value.slice(start - 1, end)); | ||
} | ||
prev = next; | ||
} | ||
} | ||
queue = EMPTY; | ||
} | ||
/* | ||
* Return the reduced nodes, and any possible warnings. | ||
*/ | ||
return result.join(EMPTY); | ||
} | ||
} | ||
var defaults = { | ||
'warning': null, | ||
'reference': null, | ||
'text': null, | ||
'warningContext': null, | ||
'referenceContext': null, | ||
'textContext': null, | ||
'position': {}, | ||
'additional': null, | ||
'attribute': false | ||
}; | ||
/** | ||
* Wrap to ensure clean parameters are given to `parse`. | ||
* Check whether `character` is outside the permissible | ||
* unicode range. | ||
* | ||
* @param {string} value - Value with entities. | ||
* @param {Object?} [options] - Configuration. | ||
* @param {number} code - Value. | ||
* @return {boolean} - Whether `character` is an | ||
* outside the permissible unicode range. | ||
*/ | ||
function wrapper(value, options) { | ||
var settings = {}; | ||
var key; | ||
if (!options) { | ||
options = {}; | ||
} | ||
for (key in defaults) { | ||
settings[key] = options[key] || defaults[key]; | ||
} | ||
if (settings.position.indent || settings.position.start) { | ||
settings.indent = settings.position.indent || []; | ||
settings.position = settings.position.start; | ||
} | ||
return parse(value, settings); | ||
function isProhibited(code) { | ||
return (code >= 0xD800 && code <= 0xDFFF) || (code > 0x10FFFF); | ||
} | ||
/* | ||
* Expose. | ||
/** | ||
* Check whether `character` is disallowed. | ||
* | ||
* @param {number} code - Value. | ||
* @return {boolean} - Whether `character` is disallowed. | ||
*/ | ||
function isWarning(code) { | ||
if ( | ||
(code >= 0x0001 && code <= 0x0008) || | ||
code === 0x000B || | ||
(code >= 0x000D && code <= 0x001F) || | ||
(code >= 0x007F && code <= 0x009F) || | ||
(code >= 0xFDD0 && code <= 0xFDEF) || | ||
(code & 0xFFFF) === 0xFFFF || | ||
(code & 0xFFFF) === 0xFFFE | ||
) { | ||
return true; | ||
} | ||
module.exports = wrapper; | ||
return false; | ||
} |
{ | ||
"name": "parse-entities", | ||
"version": "1.0.2", | ||
"version": "1.1.0", | ||
"description": "Parse HTML character references: fast, spec-compliant, positional information", | ||
@@ -15,4 +15,3 @@ "license": "MIT", | ||
"files": [ | ||
"index.js", | ||
"LICENSE" | ||
"index.js" | ||
], | ||
@@ -22,24 +21,25 @@ "dependencies": { | ||
"character-entities-legacy": "^1.0.0", | ||
"character-reference-invalid": "^1.0.0" | ||
"character-reference-invalid": "^1.0.0", | ||
"has": "^1.0.1", | ||
"is-alphanumerical": "^1.0.0", | ||
"is-decimal": "^1.0.0", | ||
"is-hexadecimal": "^1.0.0" | ||
}, | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/wooorm/parse-entities.git" | ||
}, | ||
"author": "Titus Wormer <tituswormer@gmail.com>", | ||
"repository": "https://github.com/wooorm/parse-entities", | ||
"bugs": "https://github.com/wooorm/parse-entities/issues", | ||
"author": "Titus Wormer <tituswormer@gmail.com> (http://wooorm.com)", | ||
"contributors": [ | ||
"Titus Wormer <tituswormer@gmail.com> (http://wooorm.com)" | ||
], | ||
"devDependencies": { | ||
"browserify": "^12.0.0", | ||
"eslint": "^1.0.0", | ||
"browserify": "^13.0.0", | ||
"esmangle": "^1.0.0", | ||
"istanbul": "^0.4.0", | ||
"jscs": "^2.0.0", | ||
"jscs-jsdoc": "^1.0.0", | ||
"remark": "^3.0.0", | ||
"remark-comment-config": "^2.0.0", | ||
"remark-github": "^2.0.0", | ||
"remark-lint": "^2.0.0", | ||
"remark-slug": "^3.0.0", | ||
"remark-validate-links": "^2.0.0", | ||
"remark-yaml-config": "^2.0.0", | ||
"tape": "^4.2.0" | ||
"nyc": "^7.1.0", | ||
"remark-cli": "^1.0.0", | ||
"remark-comment-config": "^4.0.0", | ||
"remark-github": "^5.0.0", | ||
"remark-lint": "^4.0.0", | ||
"remark-validate-links": "^4.0.0", | ||
"tape": "^4.2.0", | ||
"xo": "^0.16.0" | ||
}, | ||
@@ -51,9 +51,45 @@ "scripts": { | ||
"build": "npm run build-md && npm run build-bundle && npm run build-mangle", | ||
"lint-api": "eslint .", | ||
"lint-style": "jscs --reporter inline .", | ||
"lint": "npm run lint-api && npm run lint-style", | ||
"lint": "xo", | ||
"test-api": "node test.js", | ||
"test-coverage": "istanbul cover test.js", | ||
"test-coverage": "nyc --reporter lcov tape test.js", | ||
"test": "npm run build && npm run lint && npm run test-coverage" | ||
}, | ||
"nyc": { | ||
"check-coverage": true, | ||
"lines": 100, | ||
"functions": 100, | ||
"branches": 100 | ||
}, | ||
"xo": { | ||
"space": true, | ||
"rules": { | ||
"guard-for-in": "off", | ||
"no-negated-condition": "off", | ||
"max-depth": "off", | ||
"max-lines": "off", | ||
"complexity": "off", | ||
"no-eq-null": "off", | ||
"eqeqeq": "off" | ||
}, | ||
"ignores": [ | ||
"parse-entities.js", | ||
"parse-entities.min.js" | ||
] | ||
}, | ||
"remarkConfig": { | ||
"output": true, | ||
"plugins": { | ||
"lint": { | ||
"heading-increment": false, | ||
"list-item-spacing": false, | ||
"no-duplicate-headings": false | ||
}, | ||
"github": null, | ||
"comment-config": null, | ||
"validate-links": null | ||
}, | ||
"settings": { | ||
"bullet": "*" | ||
} | ||
} | ||
} |
164
readme.md
@@ -1,8 +0,9 @@ | ||
# parse-entities [![Build Status](https://img.shields.io/travis/wooorm/parse-entities.svg?style=flat)](https://travis-ci.org/wooorm/parse-entities) [![Coverage Status](https://img.shields.io/codecov/c/github/wooorm/parse-entities.svg)](https://codecov.io/github/wooorm/parse-entities) | ||
# parse-entities [![Build Status][build-badge]][build-status] [![Coverage Status][coverage-badge]][coverage-status] | ||
Parse HTML character references: fast, spec-compliant, positional information. | ||
Parse HTML character references: fast, spec-compliant, positional | ||
information. | ||
## Installation | ||
[npm](https://docs.npmjs.com/cli/install): | ||
[npm][]: | ||
@@ -13,6 +14,2 @@ ```bash | ||
**parse-entities** is also available for [duo](http://duojs.org/#getting-started), | ||
and [bundled](https://github.com/wooorm/parse-entities/releases) for AMD, | ||
CommonJS, and globals (uncompressed and compressed). | ||
## Usage | ||
@@ -35,69 +32,60 @@ | ||
## parseEntities(value\[, options]) | ||
## `parseEntities(value[, options])` | ||
**Parameters** | ||
###### `options` | ||
* `value` (`string`) | ||
— Value with entities to parse; | ||
* `additional` (`string`, optional, default: `''`) | ||
— Additional character to accept when following an ampersand (without | ||
error); | ||
* `attribute` (`boolean`, optional, default: `false`) | ||
— Whether to parse `value` as an attribute value; | ||
* `nonTerminated` (`boolean`, default: `true`) | ||
— Whether to allow non-terminated entities, such as `©cat` to | ||
`©cat`. This behaviour is spec-compliant but can lead to unexpected | ||
results; | ||
* `warning` ([`Function`][warning], optional) | ||
— Error handler; | ||
* `text` ([`Function`][text], optional) | ||
— Text handler; | ||
* `reference` ([`Function`][reference], | ||
optional) — Reference handler; | ||
* `warningContext` (`'*'`, optional) | ||
— Context used when invoking `warning`; | ||
* `textContext` (`'*'`, optional) | ||
— Context used when invoking `text`; | ||
* `referenceContext` (`'*'`, optional) | ||
— Context used when invoking `reference`; | ||
* `position` (`Location` or `Position`, optional) | ||
— Starting `position` of `value`, useful when dealing with values | ||
nested in some sort of syntax tree. The default is: | ||
* `options` (`Object`, optional): | ||
```json | ||
{ | ||
"start": { | ||
"line": 1, | ||
"column": 1, | ||
"offset": 0 | ||
}, | ||
"indent": [] | ||
} | ||
``` | ||
* `additional` (`string`, optional, default: `''`) | ||
— Additional character to accept when following an ampersand (without | ||
error); | ||
###### Returns | ||
* `attribute` (`boolean`, optional, default: `false`) | ||
— Whether to parse `value` as an attribute value; | ||
* `position` (`Location` or `Position`, optional) | ||
— Starting `position` of `value`, useful when dealing with values | ||
nested in some sort of syntax tree. The default is: | ||
```json | ||
{ | ||
"start": { | ||
"line": 1, | ||
"column": 1, | ||
"offset": 0 | ||
}, | ||
"indent": [] | ||
} | ||
``` | ||
* `warning` ([`Function`](#function-warningreason-position-code), | ||
optional) — Error handler; | ||
* `text` ([`Function`](#function-textvalue-location), optional) | ||
— Text handler; | ||
* `reference` ([`Function`](#function-referencevalue-location-source), | ||
optional) — Reference handler; | ||
* `warningContext` (`'*'`, optional) | ||
— Context used when invoking `warning`; | ||
* `textContext` (`'*'`, optional) | ||
— Context used when invoking `text`; | ||
* `referenceContext` (`'*'`, optional) | ||
— Context used when invoking `reference`. | ||
**Returns** | ||
`string` — Decoded `value`. | ||
### function warning(reason, position, code) | ||
### `function warning(reason, position, code)` | ||
Error handler. | ||
**Context**: `this` refers to `warningContext` when given to `parseEntities`. | ||
###### Context | ||
**Parameters** | ||
`this` refers to `warningContext` when given to `parseEntities`. | ||
###### Parameters | ||
* `reason` (`string`) | ||
— Reason (human-readable) for triggering a parse error; | ||
* `position` (`Position`) | ||
— Place at which the parse error occurred; | ||
* `code` (`number`) | ||
@@ -108,31 +96,35 @@ — Identifier of reason for triggering a parse error. | ||
| Code | Example | Note | | ||
| ---- | ------------------ | ----------------------------------------------------------------------------- | | ||
| `1` | `foo & bar` | Missing semicolon (named) | | ||
| `2` | `foo { bar` | Missing semicolon (numeric) | | ||
| `3` | `Foo &bar baz` | Ampersand did not start a reference | | ||
| `4` | `Foo &#` | Empty reference | | ||
| `5` | `Foo &bar; baz` | Unknown entity | | ||
| `6` | `Foo € baz` | [Disallowed reference](https://github.com/wooorm/character-reference-invalid) | | ||
| `7` | `Foo � baz` | Prohibited: outside permissible unicode range | | ||
| Code | Example | Note | | ||
| ---- | ------------------ | --------------------------------------------- | | ||
| `1` | `foo & bar` | Missing semicolon (named) | | ||
| `2` | `foo { bar` | Missing semicolon (numeric) | | ||
| `3` | `Foo &bar baz` | Ampersand did not start a reference | | ||
| `4` | `Foo &#` | Empty reference | | ||
| `5` | `Foo &bar; baz` | Unknown entity | | ||
| `6` | `Foo € baz` | [Disallowed reference][invalid] | | ||
| `7` | `Foo � baz` | Prohibited: outside permissible unicode range | | ||
### function text(value, location) | ||
###### `function text(value, location)` | ||
Text handler. | ||
**Context**: `this` refers to `textContext` when given to `parseEntities`. | ||
###### Context | ||
**Parameters** | ||
`this` refers to `textContext` when given to `parseEntities`. | ||
###### Parameters | ||
* `value` (`string`) — String of content; | ||
* `location` (`Location`) — Location at which `value` starts and ends. | ||
### function reference(value, location, source) | ||
### `function reference(value, location, source)` | ||
Character reference handler. | ||
**Context**: `this` refers to `referenceContext` when given to `parseEntities`. | ||
###### Context | ||
**Parameters** | ||
`this` refers to `referenceContext` when given to `parseEntities`. | ||
###### Parameters | ||
* `value` (`string`) — Encoded character reference; | ||
@@ -144,2 +136,26 @@ * `location` (`Location`) — Location at which `value` starts and ends; | ||
[MIT](LICENSE) © [Titus Wormer](http://wooorm.com) | ||
[MIT][license] © [Titus Wormer][author] | ||
<!-- Definitions --> | ||
[build-badge]: https://img.shields.io/travis/wooorm/parse-entities.svg | ||
[build-status]: https://travis-ci.org/wooorm/parse-entities | ||
[coverage-badge]: https://img.shields.io/codecov/c/github/wooorm/parse-entities.svg | ||
[coverage-status]: https://codecov.io/github/wooorm/parse-entities | ||
[npm]: https://docs.npmjs.com/cli/install | ||
[license]: LICENSE | ||
[author]: http://wooorm.com | ||
[warning]: #function-warningreason-position-code | ||
[text]: #function-textvalue-location | ||
[reference]: #function-referencevalue-location-source | ||
[invalid]: https://github.com/wooorm/character-reference-invalid |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
No repository
Supply chain riskPackage does not have a linked source code repository. Without this field, a package will have no reference to the location of the source code use to generate the package.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No bug tracker
MaintenancePackage does not have a linked bug tracker in package.json.
Found 1 instance in 1 package
10
158
21877
7
448
1
1
+ Addedhas@^1.0.1
+ Addedis-alphanumerical@^1.0.0
+ Addedis-decimal@^1.0.0
+ Addedis-hexadecimal@^1.0.0
+ Addedhas@1.0.4(transitive)
+ Addedis-alphabetical@1.0.4(transitive)
+ Addedis-alphanumerical@1.0.4(transitive)
+ Addedis-decimal@1.0.4(transitive)
+ Addedis-hexadecimal@1.0.4(transitive)