Comparing version 2.0.0 to 2.1.0
@@ -0,1 +1,25 @@ | ||
<a name="2.1.0"></a> | ||
# [2.1.0](https://github.com/lddubeau/saxes/compare/v2.0.0...v2.1.0) (2018-08-20) | ||
### Features | ||
* add support for parsing fragments ([1ff2d6a](https://github.com/lddubeau/saxes/commit/1ff2d6a)) | ||
* stronger check on bad cdata closure ([d416760](https://github.com/lddubeau/saxes/commit/d416760)) | ||
### Performance Improvements | ||
* concatenate openWakaBang just once ([07345bf](https://github.com/lddubeau/saxes/commit/07345bf)) | ||
* improve text node checking speed ([f270e8b](https://github.com/lddubeau/saxes/commit/f270e8b)) | ||
* minor optimizations ([c7e36bf](https://github.com/lddubeau/saxes/commit/c7e36bf)) | ||
* remove an unnecessary variable ([ac03a1c](https://github.com/lddubeau/saxes/commit/ac03a1c)) | ||
* remove handler check ([fbe35ff](https://github.com/lddubeau/saxes/commit/fbe35ff)) | ||
* simplify captureWhile ([bb2085c](https://github.com/lddubeau/saxes/commit/bb2085c)) | ||
* simplify the skip functions ([c7b8c3b](https://github.com/lddubeau/saxes/commit/c7b8c3b)) | ||
* the c field has been unused for a while: remove it ([9ca0246](https://github.com/lddubeau/saxes/commit/9ca0246)) | ||
* use strings for the general states ([3869908](https://github.com/lddubeau/saxes/commit/3869908)) | ||
<a name="2.0.0"></a> | ||
@@ -2,0 +26,0 @@ # [2.0.0](https://github.com/lddubeau/saxes/compare/v1.2.4...v2.0.0) (2018-07-23) |
628
lib/saxes.js
@@ -29,67 +29,38 @@ "use strict"; | ||
const S_BEGIN_WHITESPACE = 0; // leading whitespace | ||
const S_TEXT = 1; // general stuff | ||
const S_ENTITY = 2; // & and such. | ||
const S_OPEN_WAKA = 3; // < | ||
const S_OPEN_WAKA_BANG = 4; // <!... | ||
const S_DOCTYPE = 5; // <!DOCTYPE | ||
const S_DOCTYPE_QUOTED = 6; // <!DOCTYPE "//blah | ||
const S_DOCTYPE_DTD = 7; // <!DOCTYPE "//blah" [ ... | ||
const S_DOCTYPE_DTD_QUOTED = 8; // <!DOCTYPE "//blah" [ "foo | ||
const S_COMMENT = 9; // <!-- | ||
const S_COMMENT_ENDING = 10; // <!-- blah - | ||
const S_COMMENT_ENDED = 11; // <!-- blah -- | ||
const S_CDATA = 12; // <![CDATA[ something | ||
const S_CDATA_ENDING = 13; // ] | ||
const S_CDATA_ENDING_2 = 14; // ]] | ||
const S_PI = 15; // <?hi | ||
const S_PI_BODY = 16; // <?hi there | ||
const S_PI_ENDING = 17; // <?hi "there" ? | ||
const S_OPEN_TAG = 18; // <strong | ||
const S_OPEN_TAG_SLASH = 19; // <strong / | ||
const S_ATTRIB = 20; // <a | ||
const S_ATTRIB_NAME = 21; // <a foo | ||
const S_ATTRIB_NAME_SAW_WHITE = 22; // <a foo _ | ||
const S_ATTRIB_VALUE = 23; // <a foo= | ||
const S_ATTRIB_VALUE_QUOTED = 24; // <a foo="bar | ||
const S_ATTRIB_VALUE_CLOSED = 25; // <a foo="bar" | ||
const S_ATTRIB_VALUE_UNQUOTED = 26; // <a foo=bar | ||
const S_CLOSE_TAG = 27; // </a | ||
const S_CLOSE_TAG_SAW_WHITE = 28; // </a > | ||
const S_XML_DECL_NAME_START = 29; // <?xml | ||
const S_XML_DECL_NAME = 30; // <?xml foo | ||
const S_XML_DECL_EQ = 31; // <?xml foo= | ||
const S_XML_DECL_VALUE_START = 32; // <?xml foo= | ||
const S_XML_DECL_VALUE = 33; // <?xml foo="bar" | ||
const S_BEGIN_WHITESPACE = "sBeginWhitespace"; // leading whitespace | ||
const S_TEXT = "sText"; // general stuff | ||
const S_ENTITY = "sEntity"; // & and such. | ||
const S_OPEN_WAKA = "sOpenWaka"; // < | ||
const S_OPEN_WAKA_BANG = "sOpenWakaBang"; // <!... | ||
const S_DOCTYPE = "sDoctype"; // <!DOCTYPE | ||
const S_DOCTYPE_QUOTED = "sDoctypeQuoted"; // <!DOCTYPE "//blah | ||
const S_DOCTYPE_DTD = "sDoctypeDTD"; // <!DOCTYPE "//blah" [ ... | ||
const S_DOCTYPE_DTD_QUOTED = "sDoctypeDTDQuoted"; // <!DOCTYPE "//blah" [ "foo | ||
const S_COMMENT = "sComment"; // <!-- | ||
const S_COMMENT_ENDING = "sCommentEnding"; // <!-- blah - | ||
const S_COMMENT_ENDED = "sCommentEnded"; // <!-- blah -- | ||
const S_CDATA = "sCData"; // <![CDATA[ something | ||
const S_CDATA_ENDING = "sCDataEnding"; // ] | ||
const S_CDATA_ENDING_2 = "sCDataEnding2"; // ]] | ||
const S_PI = "sPI"; // <?hi | ||
const S_PI_BODY = "sPIBody"; // <?hi there | ||
const S_PI_ENDING = "sPIEnding"; // <?hi "there" ? | ||
const S_OPEN_TAG = "sOpenTag"; // <strong | ||
const S_OPEN_TAG_SLASH = "sOpenTagSlash"; // <strong / | ||
const S_ATTRIB = "sAttrib"; // <a | ||
const S_ATTRIB_NAME = "sAttribName"; // <a foo | ||
const S_ATTRIB_NAME_SAW_WHITE = "sAttribNameSawWhite"; // <a foo _ | ||
const S_ATTRIB_VALUE = "sAttribValue"; // <a foo= | ||
const S_ATTRIB_VALUE_QUOTED = "sAttribValueQuoted"; // <a foo="bar | ||
const S_ATTRIB_VALUE_CLOSED = "sAttribValueClosed"; // <a foo="bar" | ||
const S_ATTRIB_VALUE_UNQUOTED = "sAttribValueUnquoted"; // <a foo=bar | ||
const S_CLOSE_TAG = "sCloseTag"; // </a | ||
const S_CLOSE_TAG_SAW_WHITE = "sCloseTagSawWhite"; // </a > | ||
const STATE_TO_METHOD_NAME = new Array(S_CLOSE_TAG_SAW_WHITE + 1); | ||
STATE_TO_METHOD_NAME[S_BEGIN_WHITESPACE] = "sBeginWhitespace"; | ||
STATE_TO_METHOD_NAME[S_TEXT] = "sText"; | ||
STATE_TO_METHOD_NAME[S_ENTITY] = "sEntity"; | ||
STATE_TO_METHOD_NAME[S_OPEN_WAKA] = "sOpenWaka"; | ||
STATE_TO_METHOD_NAME[S_OPEN_WAKA_BANG] = "sOpenWakaBang"; | ||
STATE_TO_METHOD_NAME[S_DOCTYPE] = "sDoctype"; | ||
STATE_TO_METHOD_NAME[S_DOCTYPE_QUOTED] = "sDoctypeQuoted"; | ||
STATE_TO_METHOD_NAME[S_DOCTYPE_DTD] = "sDoctypeDTD"; | ||
STATE_TO_METHOD_NAME[S_DOCTYPE_DTD_QUOTED] = "sDoctypeDTDQuoted"; | ||
STATE_TO_METHOD_NAME[S_COMMENT] = "sComment"; | ||
STATE_TO_METHOD_NAME[S_COMMENT_ENDING] = "sCommentEnding"; | ||
STATE_TO_METHOD_NAME[S_COMMENT_ENDED] = "sCommentEnded"; | ||
STATE_TO_METHOD_NAME[S_CDATA] = "sCData"; | ||
STATE_TO_METHOD_NAME[S_CDATA_ENDING] = "sCDataEnding"; | ||
STATE_TO_METHOD_NAME[S_CDATA_ENDING_2] = "sCDataEnding2"; | ||
STATE_TO_METHOD_NAME[S_PI] = "sPI"; | ||
STATE_TO_METHOD_NAME[S_PI_BODY] = "sPIBody"; | ||
STATE_TO_METHOD_NAME[S_PI_ENDING] = "sPIEnding"; | ||
STATE_TO_METHOD_NAME[S_OPEN_TAG] = "sOpenTag"; | ||
STATE_TO_METHOD_NAME[S_OPEN_TAG_SLASH] = "sOpenTagSlash"; | ||
STATE_TO_METHOD_NAME[S_ATTRIB] = "sAttrib"; | ||
STATE_TO_METHOD_NAME[S_ATTRIB_NAME] = "sAttribName"; | ||
STATE_TO_METHOD_NAME[S_ATTRIB_NAME_SAW_WHITE] = "sAttribNameSawWhite"; | ||
STATE_TO_METHOD_NAME[S_ATTRIB_VALUE] = "sAttribValue"; | ||
STATE_TO_METHOD_NAME[S_ATTRIB_VALUE_QUOTED] = "sAttribValueQuoted"; | ||
STATE_TO_METHOD_NAME[S_ATTRIB_VALUE_CLOSED] = "sAttribValueClosed"; | ||
STATE_TO_METHOD_NAME[S_ATTRIB_VALUE_UNQUOTED] = "sAttribValueUnquoted"; | ||
STATE_TO_METHOD_NAME[S_CLOSE_TAG] = "sCloseTag"; | ||
STATE_TO_METHOD_NAME[S_CLOSE_TAG_SAW_WHITE] = "sCloseTagSawWhite"; | ||
// These states are internal to sPIBody | ||
const S_XML_DECL_NAME_START = 1; // <?xml | ||
const S_XML_DECL_NAME = 2; // <?xml foo | ||
const S_XML_DECL_EQ = 3; // <?xml foo= | ||
const S_XML_DECL_VALUE_START = 4; // <?xml foo= | ||
const S_XML_DECL_VALUE = 5; // <?xml foo="bar" | ||
@@ -116,5 +87,5 @@ const SPACE_SEPARATOR = "SPACE_SEPARATOR"; | ||
const buffers = [ | ||
"comment", "openWakaBang", "textNode", "textFragments", "tagName", "doctype", | ||
"piTarget", "piBody", "entity", "attribName", "attribValue", "cdata", | ||
"xmlDeclName", "xmlDeclValue", | ||
"comment", "openWakaBang", "textNode", "tagName", "doctype", "piTarget", | ||
"piBody", "entity", "attribName", "attribValue", "cdata", "xmlDeclName", | ||
"xmlDeclValue", | ||
]; | ||
@@ -148,2 +119,38 @@ | ||
function nsMappingCheck(parser, mapping) { | ||
const { xml, xmlns } = mapping; | ||
if (xml && xml !== XML_NAMESPACE) { | ||
parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`); | ||
} | ||
if (xmlns && xmlns !== XMLNS_NAMESPACE) { | ||
parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`); | ||
} | ||
for (const local of Object.keys(mapping)) { | ||
const uri = mapping[local]; | ||
switch (uri) { | ||
case XMLNS_NAMESPACE: | ||
parser.fail(local === "" ? | ||
`the default namespace may not be set to ${uri}.` : | ||
`may not assign a prefix (even "xmlns") to the URI \ | ||
${XMLNS_NAMESPACE}.`); | ||
break; | ||
case XML_NAMESPACE: | ||
switch (local) { | ||
case "xml": | ||
// Assinging the XML namespace to "xml" is fine. | ||
break; | ||
case "": | ||
parser.fail(`the default namespace may not be set to ${uri}.`); | ||
break; | ||
default: | ||
parser.fail("may not assign the xml namespace to another prefix."); | ||
} | ||
break; | ||
default: | ||
} | ||
} | ||
} | ||
/** | ||
@@ -225,6 +232,13 @@ * Data structure for an XML tag. | ||
* @property {boolean} [xmlns] Whether to track namespaces. Unset means | ||
*``false``. | ||
* ``false``. | ||
* | ||
* @property {boolean} [fragment] Whether to accept XML fragments. Unset means | ||
* ``false``. | ||
* | ||
* @property {boolean} [additionalNamespaces] A plain object whose key, value | ||
* pairs define namespaces known before parsing the XML file. It is not legal | ||
* to pass bindings for the namespaces ``"xml"`` or ``"xmlns"``. | ||
* | ||
* @property {boolean} [position] Whether to track positions. Unset means | ||
*``true``. | ||
* ``true``. | ||
* | ||
@@ -284,5 +298,3 @@ * @property {string} [fileName] A file name to use for error reporting. Leaving | ||
this.initial = true; | ||
this.closedRoot = this.sawRoot = this.inRoot = false; | ||
this.tag = null; | ||
this.state = S_BEGIN_WHITESPACE; | ||
/** | ||
@@ -295,5 +307,14 @@ * A map of entity name to expansion. | ||
this.attribList = []; | ||
this.reportedTextBeforeRoot = false; | ||
this.reportedTextAfterRoot = false; | ||
this.xmlDeclPossible = true; | ||
// The logic is organized so as to minimize the need to check | ||
// this.opt.fragment while parsing. | ||
this.state = this.opt.fragment ? S_TEXT : S_BEGIN_WHITESPACE; | ||
// We want these to be all true if we are dealing with a fragment. | ||
this.reportedTextBeforeRoot = this.reportedTextAfterRoot = | ||
this.closedRoot = this.sawRoot = this.inRoot = this.opt.fragment; | ||
// An XML declaration is intially possible only when parsing whole | ||
// documents. | ||
this.xmlDeclPossible = !this.opt.fragment; | ||
this.piIsXMLDecl = false; | ||
@@ -305,12 +326,16 @@ this.xmlDeclState = S_XML_DECL_NAME_START; | ||
this.entityReturnState = undefined; | ||
// This records the index before which we don't have to check for the | ||
// presence of ]]]>. The text before that index has been checked already, | ||
// and should not be checked twice. | ||
this.textNodeCheckedBefore = 0; | ||
// namespaces form a prototype chain. | ||
// it always points at the current tag, | ||
// which protos to its parent tag. | ||
if (this.opt.xmlns) { | ||
this.ns = Object.assign({}, rootNS); | ||
this.ns = Object.assign({ __proto__: null }, rootNS); | ||
const additional = this.opt.additionalNamespaces; | ||
if (additional) { | ||
nsMappingCheck(this, additional); | ||
Object.assign(this.ns, additional); | ||
} | ||
} | ||
this.startTagPosition = undefined; | ||
this.trackPosition = this.opt.position !== false; | ||
@@ -431,3 +456,2 @@ if (this.trackPosition) { | ||
fail(er) { | ||
this.closeText(); | ||
const message = (this.trackPosition) ? | ||
@@ -458,2 +482,7 @@ `${this.fileName}:${this.line}:${this.column}: ${er}` : er; | ||
// We checked if performing a pre-decomposition of the string into an array | ||
// of single complete characters (``Array.from(chunk)``) would be faster | ||
// than the current repeated calls to ``codePointAt``. As of August 2018, it | ||
// isn't. (There may be Node-specific code that would perform faster than | ||
// ``Array.from`` but don't want to be dependent on Node.) | ||
const limit = chunk.length; | ||
@@ -464,10 +493,5 @@ const chunkState = { | ||
i: 0, | ||
c: undefined, | ||
}; | ||
while (chunkState.i < limit) { | ||
const handler = this[STATE_TO_METHOD_NAME[this.state]]; | ||
if (!handler) { | ||
throw new Error(this, `Unknown state: ${this.state}`); | ||
} | ||
handler.call(this, chunkState); | ||
this[this.state].call(this, chunkState); | ||
} | ||
@@ -529,6 +553,2 @@ | ||
* | ||
* @param {boolean} atStart Whether the character will be at the start of its | ||
* buffer. If the buffer was empty before capturing this character, this is | ||
* ``true``. | ||
* | ||
* @returns {boolean} ``true`` if the method should continue capturing text, | ||
@@ -539,10 +559,2 @@ * ``false`` otherwise. | ||
/** | ||
* @callback FragmentTest | ||
* | ||
* @private | ||
* | ||
* @param {string} fragment The fragment being captured. | ||
*/ | ||
/** | ||
* Capture characters into a buffer while a condition is true. A sequence of | ||
@@ -564,39 +576,20 @@ * ``write`` calls may require the capture of text into a buffer as multiple | ||
* | ||
* @param {FragmentTest} [checkFragment] A test to perform on the captured | ||
* fragment so as to check the contents of the fragment. | ||
* | ||
* @return {string|undefined} The character that made the test fail, or | ||
* ``undefined`` if we hit the end of the chunk. | ||
*/ | ||
captureWhile(chunkState, test, buffer, checkFragment) { | ||
captureWhile(chunkState, test, buffer) { | ||
const { limit, chunk, i: start } = chunkState; | ||
let skip; | ||
let c; | ||
let atStart = this[buffer].length === 0; | ||
// eslint-disable-next-line no-constant-condition | ||
while (true) { | ||
if (chunkState.i >= limit) { | ||
c = undefined; | ||
skip = 0; | ||
break; | ||
while (chunkState.i < limit) { | ||
const c = this.getCode(chunkState); | ||
if (!test(c)) { | ||
// This is faster than adding codepoints one by one. | ||
this[buffer] += chunk.substring(start, | ||
chunkState.i - (c <= 0xFFFF ? 1 : 2)); | ||
return c; | ||
} | ||
c = this.getCode(chunkState); | ||
const wasAtStart = atStart; | ||
atStart = false; | ||
if (!test(c, wasAtStart)) { | ||
skip = c <= 0xFFFF ? 1 : 2; | ||
break; | ||
} | ||
} | ||
// This is faster than adding codepoints one by one. | ||
const fragment = chunk.substring(start, chunkState.i - skip); | ||
if (checkFragment) { | ||
checkFragment(fragment); | ||
} | ||
this[buffer] += fragment; | ||
return c; | ||
this[buffer] += chunk.substring(start); | ||
return undefined; | ||
} | ||
@@ -619,21 +612,14 @@ | ||
const { limit } = chunkState; | ||
let c; | ||
// eslint-disable-next-line no-constant-condition | ||
while (true) { | ||
if (chunkState.i >= limit) { | ||
c = undefined; | ||
break; | ||
} | ||
c = this.getCode(chunkState); | ||
while (chunkState.i < limit) { | ||
const c = this.getCode(chunkState); | ||
if (!test(c)) { | ||
break; | ||
return c; | ||
} | ||
} | ||
return c; | ||
return undefined; | ||
} | ||
/** | ||
* Skip dwhitespace characters. | ||
* Skip whitespace characters. | ||
* | ||
@@ -649,17 +635,10 @@ * @private | ||
const { limit } = chunkState; | ||
let c; | ||
// eslint-disable-next-line no-constant-condition | ||
while (true) { | ||
if (chunkState.i >= limit) { | ||
c = undefined; | ||
break; | ||
} | ||
c = this.getCode(chunkState); | ||
while (chunkState.i < limit) { | ||
const c = this.getCode(chunkState); | ||
if (!isS(c)) { | ||
break; | ||
return c; | ||
} | ||
} | ||
return c; | ||
return undefined; | ||
} | ||
@@ -692,3 +671,2 @@ | ||
this.state = S_OPEN_WAKA; | ||
this.startTagPosition = this.position; | ||
} | ||
@@ -703,2 +681,3 @@ else { | ||
this.textNode = String.fromCodePoint(c); | ||
this.textNodeCheckedBefore = 0; | ||
this.state = S_TEXT; | ||
@@ -711,46 +690,10 @@ this.xmlDeclPossible = false; | ||
sText(chunkState) { | ||
const c = this.captureWhile( | ||
chunkState, | ||
cx => cx !== LESS && cx !== AMP, | ||
"textNode", | ||
(fragment) => { | ||
// Text fragments is a buffer we use to check for the precence of a | ||
// literal "]]>" in text nodes. We cannot do the check against textNode | ||
// itself because textNode will contain resolve entities so "]]>" | ||
// would turn to "]]>" in textNode and raise a false error. | ||
this.textFragments += fragment; | ||
// We also have to check the end of textFragments because some cases may | ||
// slip through otherwise. For instance, if client code write | ||
// char-by-char. Then fragment will never contain ]]> but instead we'll | ||
// have 3 fragments one with "]", a second with "]" and a third with | ||
// ">". | ||
if (fragment.includes("]]>") || this.textFragments.endsWith("]]>")) { | ||
this.fail("the string \"]]>\" is disallowed in char data."); | ||
} | ||
const c = this.captureWhile(chunkState, | ||
cx => cx !== LESS && cx !== AMP, | ||
"textNode"); | ||
if (!this.inRoot && /\S/.test(fragment)) { | ||
// We use the reportedTextBeforeRoot and reportedTextAfterRoot flags | ||
// to avoid reporting errors for every single character that is out of | ||
// place. | ||
if (!this.sawRoot && !this.reportedTextBeforeRoot) { | ||
this.fail("text data outside of root node."); | ||
this.reportedTextBeforeRoot = true; | ||
} | ||
if (this.closedRoot && !this.reportedTextAfterRoot) { | ||
this.fail("text data outside of root node."); | ||
this.reportedTextAfterRoot = true; | ||
} | ||
} | ||
}); | ||
switch (c) { | ||
case LESS: | ||
this.state = S_OPEN_WAKA; | ||
this.startTagPosition = this.position; | ||
this.textFragments = ""; | ||
break; | ||
case AMP: | ||
// We use the reportedTextBeforeRoot and reportedTextAfterRoot flags to | ||
// avoid reporting errors for every single character that is out of place. | ||
if (!this.inRoot && (/\S/.test(this.textNode) || c === AMP)) { | ||
// We use the reportedTextBeforeRoot and reportedTextAfterRoot flags | ||
// to avoid reporting errors for every single character that is out of | ||
// place. | ||
if (!this.sawRoot && !this.reportedTextBeforeRoot) { | ||
@@ -765,9 +708,21 @@ this.fail("text data outside of root node."); | ||
} | ||
} | ||
if (this.textNode.includes("]]>", this.textNodeCheckedBefore)) { | ||
this.fail("the string \"]]>\" is disallowed in char data."); | ||
} | ||
// We have to go back two spaces so that we can catch the case where on a | ||
// previous write call, the textNode buffer ended on ``]]`` and we started | ||
// with ``>`` this time around. | ||
this.textNodeCheckedBefore = this.textNode.length - 2; | ||
switch (c) { | ||
case LESS: | ||
this.state = S_OPEN_WAKA; | ||
break; | ||
case AMP: | ||
this.state = S_ENTITY; | ||
this.entityBufferName = "textNode"; | ||
this.entityReturnState = S_TEXT; | ||
// If we run into an entity, then necessarily we do not have a "]]>" | ||
// literal. So we flush this.textFragments. | ||
this.textFragments = ""; | ||
break; | ||
@@ -803,13 +758,7 @@ default: | ||
break; | ||
default: { | ||
this.fail("unencoded <."); | ||
// if there was some whitespace, then add that in. | ||
const pad = (this.startTagPosition + 1 < this.position) ? | ||
new Array(this.position - this.startTagPosition).join(" ") : | ||
""; | ||
this.textNode += `<${pad}${String.fromCodePoint(c)}`; | ||
default: | ||
this.fail("disallowed characer in tag name."); | ||
this.state = S_TEXT; | ||
this.xmlDeclPossible = false; | ||
} | ||
} | ||
} | ||
@@ -821,3 +770,4 @@ } | ||
const c = String.fromCodePoint(this.getCode(chunkState)); | ||
switch (this.openWakaBang + c) { | ||
this.openWakaBang += c; | ||
switch (this.openWakaBang) { | ||
case "[CDATA[": | ||
@@ -851,3 +801,2 @@ if (!this.sawRoot && !this.reportedTextBeforeRoot) { | ||
default: | ||
this.openWakaBang += c; | ||
// 7 happens to be the maximum length of the string that can possibly | ||
@@ -886,9 +835,7 @@ // match one of the cases above. | ||
sDoctypeQuoted(chunkState) { | ||
const c = this.captureWhile(chunkState, cx => cx !== this.q, "doctype"); | ||
if (!c) { | ||
const { q } = this; | ||
const c = this.captureWhile(chunkState, cx => cx !== q, "doctype"); | ||
if (!c || c !== q) { | ||
return; | ||
} | ||
if (c !== this.q) { | ||
return; | ||
} | ||
@@ -921,3 +868,4 @@ this.doctype += String.fromCodePoint(c); | ||
sDoctypeDTDQuoted(chunkState) { | ||
const c = this.captureWhile(chunkState, cx => cx !== this.q, "doctype"); | ||
const { q } = this; | ||
const c = this.captureWhile(chunkState, cx => cx !== q, "doctype"); | ||
if (!c) { | ||
@@ -928,3 +876,3 @@ return; | ||
this.doctype += String.fromCodePoint(c); | ||
if (c === this.q) { | ||
if (c === q) { | ||
this.state = S_DOCTYPE_DTD; | ||
@@ -1021,7 +969,10 @@ this.q = null; | ||
sPI(chunkState) { | ||
// We have to perform the isNameStartChar check here because we do not feed | ||
// the first character in piTarget elsehwere. | ||
let check = this.piTarget.length === 0 ? isNameStartChar : isNameChar; | ||
const c = this.captureWhile( | ||
chunkState, | ||
(cx, first) => { | ||
(cx) => { | ||
if (cx !== QUESTION && !isS(cx)) { | ||
if (!((first ? isNameStartChar : isNameChar)(cx) && | ||
if (!(check(cx) && | ||
// When namespaces are used, colons are not allowed in entity | ||
@@ -1035,2 +986,3 @@ // names. | ||
check = isNameStartChar; | ||
return true; | ||
@@ -1154,3 +1106,4 @@ } | ||
case S_XML_DECL_VALUE: | ||
c = this.captureWhile(chunkState, cx => cx !== QUESTION && cx !== this.q, | ||
c = this.captureWhile(chunkState, | ||
cx => cx !== QUESTION && cx !== this.q, | ||
"xmlDeclValue"); | ||
@@ -1278,7 +1231,9 @@ | ||
sOpenTag(chunkState) { | ||
// We don't need to check with isNameStartChar here because the first | ||
// character of tagName is fed elsewhere, and the check is done there. | ||
const c = this.captureWhile( | ||
chunkState, | ||
(cx, first) => { | ||
(cx) => { | ||
if (cx !== GREATER && !isS(cx) && cx !== FORWARD_SLASH) { | ||
if (!((first ? isNameStartChar : isNameChar)(cx))) { | ||
if (!isNameChar(cx)) { | ||
this.fail("disallowed characer in tag name."); | ||
@@ -1361,7 +1316,9 @@ } | ||
sAttribName(chunkState) { | ||
// We don't need to check with isNameStartChar here because the first | ||
// character of attribute is fed elsewhere, and the check is done there. | ||
const c = this.captureWhile( | ||
chunkState, | ||
(cx, first) => { | ||
(cx) => { | ||
if (cx !== EQUAL && !isS(cx) && cx !== GREATER) { | ||
if (!((first ? isNameStartChar : isNameChar)(cx))) { | ||
if (!isNameChar(cx)) { | ||
this.fail("disallowed characer in attribute name."); | ||
@@ -1434,2 +1391,3 @@ } | ||
sAttribValueQuoted(chunkState) { | ||
const { q } = this; | ||
const c = this.captureWhile( | ||
@@ -1441,3 +1399,3 @@ chunkState, | ||
} | ||
return cx !== this.q && cx !== AMP; | ||
return cx !== q && cx !== AMP; | ||
}, | ||
@@ -1517,16 +1475,5 @@ "attribValue"); | ||
sCloseTag(chunkState) { | ||
const c = this.captureWhile( | ||
chunkState, | ||
(cx, first) => { | ||
if (cx !== GREATER && !isS(cx)) { | ||
if (!((first ? isNameStartChar : isNameChar)(cx))) { | ||
this.fail("disallowed characer in tag name."); | ||
} | ||
return true; | ||
} | ||
return false; | ||
}, | ||
"tagName"); | ||
const c = this.captureWhile(chunkState, | ||
cx => cx !== GREATER && !isS(cx), | ||
"tagName"); | ||
if (c === GREATER) { | ||
@@ -1553,17 +1500,28 @@ this.closeTag(); | ||
sEntity(chunkState) { | ||
const c = this.getCode(chunkState); | ||
if ((this.entity.length ? isNameChar : isEntityStartChar)(c) && | ||
// When namespaces are used, colons are not valid in entity | ||
// names. | ||
// https://www.w3.org/XML/xml-names-19990114-errata.html | ||
// NE08 | ||
(!this.opt.xmlns || c !== COLON)) { | ||
this.entity += String.fromCodePoint(c); | ||
} | ||
else if (c === SEMICOLON) { | ||
let check = this.entity.length === 0 ? isEntityStartChar : isNameChar; | ||
const c = this.captureWhile(chunkState, | ||
(cx) => { | ||
if (check(cx) && | ||
// When namespaces are used, colons are | ||
// not valid in entity names. | ||
// https://www.w3.org/XML/xml-names-19990114-errata.html | ||
// NE08 | ||
(!this.opt.xmlns || cx !== COLON)) { | ||
check = isNameChar; | ||
return true; | ||
} | ||
return false; | ||
}, | ||
"entity"); | ||
if (c === SEMICOLON) { | ||
this[this.entityBufferName] += this.parseEntity(); | ||
if (this.entityBufferName === "textNode") { | ||
this.textNodeCheckedBefore = this.textNode.length; | ||
} | ||
this.entity = ""; | ||
this.state = this.entityReturnState; | ||
} | ||
else { | ||
else if (c) { | ||
this.fail("disallowed character in entity name."); | ||
@@ -1615,2 +1573,3 @@ this[this.entityBufferName] += | ||
this.textNode = ""; | ||
this.textNodeCheckedBefore = 0; | ||
} | ||
@@ -1634,15 +1593,17 @@ | ||
_resolve(prefix, index) { | ||
if (index < 0) { | ||
return this.ns[prefix]; | ||
resolve(prefix) { | ||
let uri = this.tag.ns[prefix]; | ||
if (uri !== undefined) { | ||
return uri; | ||
} | ||
const uri = this.tags[index].ns[prefix]; | ||
return uri !== undefined ? uri : this._resolve(prefix, index - 1); | ||
} | ||
const { tags } = this; | ||
for (let index = tags.length - 1; index >= 0; index--) { | ||
uri = tags[index].ns[prefix]; | ||
if (uri !== undefined) { | ||
return uri; | ||
} | ||
} | ||
resolve(prefix) { | ||
const uri = this.tag.ns[prefix]; | ||
return uri !== undefined ? uri : | ||
this._resolve(prefix, this.tags.length - 1); | ||
return this.ns[prefix]; | ||
} | ||
@@ -1698,63 +1659,31 @@ | ||
openTag(selfClosing) { | ||
const { tag } = this; | ||
const { tag, attribList } = this; | ||
if (this.opt.xmlns) { | ||
// emit namespace binding events | ||
const { ns } = tag; | ||
// eslint-disable-next-line prefer-const | ||
for (let [name, uri] of this.attribList) { | ||
const { ns, attributes } = tag; | ||
for (const [name, uri] of attribList) { | ||
const { prefix, local } = this.qname(name, true); | ||
if (prefix === "xmlns") { | ||
uri = uri.trim(); | ||
// namespace binding attribute. push the binding into scope | ||
if (local === "xml" && uri !== XML_NAMESPACE) { | ||
this.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`); | ||
} | ||
else if (local === "xmlns" && uri !== XMLNS_NAMESPACE) { | ||
this.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`); | ||
} | ||
else { | ||
switch (uri) { | ||
case XMLNS_NAMESPACE: | ||
if (local === "") { | ||
this.fail(`the default namespace may not be set to | ||
${XMLNS_NAMESPACE}.`); | ||
} | ||
else { | ||
this.fail(`may not assign a prefix (even "xmlns") to the URI \ | ||
${XMLNS_NAMESPACE}.`); | ||
} | ||
break; | ||
case XML_NAMESPACE: | ||
if (local === "") { | ||
this.fail(`the default namespace may not be set to | ||
${XML_NAMESPACE}.`); | ||
} | ||
else if (local !== "xml") { | ||
this.fail( | ||
"may not assign the xml namespace to another prefix."); | ||
} | ||
break; | ||
default: | ||
} | ||
ns[local] = uri; | ||
} | ||
ns[local] = uri.trim(); | ||
} | ||
} | ||
// add namespace info to tag | ||
const qn = this.qname(this.tagName); | ||
tag.prefix = qn.prefix; | ||
tag.local = qn.local; | ||
tag.uri = this.resolve(qn.prefix) || ""; | ||
nsMappingCheck(this, ns); | ||
if (tag.prefix) { | ||
if (tag.prefix === "xmlns") { | ||
this.fail("tags may not have \"xmlns\" as prefix."); | ||
} | ||
{ | ||
// add namespace info to tag | ||
const { prefix, local } = this.qname(this.tagName); | ||
tag.prefix = prefix; | ||
tag.local = local; | ||
const uri = tag.uri = this.resolve(prefix) || ""; | ||
if (!tag.uri) { | ||
this.fail(`unbound namespace prefix: \ | ||
${JSON.stringify(this.tagName)}.`); | ||
tag.uri = qn.prefix; | ||
if (prefix) { | ||
if (prefix === "xmlns") { | ||
this.fail("tags may not have \"xmlns\" as prefix."); | ||
} | ||
if (!uri) { | ||
this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`); | ||
tag.uri = prefix; | ||
} | ||
} | ||
@@ -1766,3 +1695,3 @@ } | ||
// http://www.w3.org/TR/REC-xml-names/#defaulting | ||
for (const [name, value] of this.attribList) { | ||
for (const [name, value] of attribList) { | ||
const { prefix, local } = this.qname(name, true); | ||
@@ -1786,16 +1715,16 @@ const uri = prefix === "" ? "" : (this.resolve(prefix) || ""); | ||
// then fail on them now. | ||
if (prefix && prefix !== "xmlns" && !uri) { | ||
this.fail(`unbound namespace prefix: ${ | ||
JSON.stringify(prefix)}.`); | ||
if (prefix && !uri) { | ||
this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`); | ||
a.uri = prefix; | ||
} | ||
tag.attributes[name] = a; | ||
attributes[name] = a; | ||
} | ||
} | ||
else { | ||
for (const [name, value] of this.attribList) { | ||
if (this.tag.attributes[name]) { | ||
const { attributes } = this.tag; | ||
for (const [name, value] of attribList) { | ||
if (attributes[name]) { | ||
this.fail(`duplicate attribute: ${name}.`); | ||
} | ||
this.tag.attributes[name] = value; | ||
attributes[name] = value; | ||
} | ||
@@ -1808,9 +1737,9 @@ } | ||
// process the tag | ||
if (this.closedRoot) { | ||
if (!this.opt.fragment && this.closedRoot) { | ||
this.fail("documents may contain only one root."); | ||
} | ||
this.sawRoot = true; | ||
if (!this.closedRoot) { | ||
else { | ||
this.inRoot = true; | ||
} | ||
this.sawRoot = true; | ||
this.tags.push(tag); | ||
@@ -1834,18 +1763,20 @@ this.emitNode("onopentag", tag); | ||
closeTag() { | ||
if (!this.tagName) { | ||
const { tags, tagName } = this; | ||
// Our state after this will be S_TEXT, no matter what, and we can clear | ||
// tagName now. | ||
this.state = S_TEXT; | ||
this.tagName = ""; | ||
if (!tagName) { | ||
this.fail("weird empty close tag."); | ||
this.textNode += "</>"; | ||
this.state = S_TEXT; | ||
return; | ||
} | ||
const { tags } = this; | ||
// first make sure that the closing tag actually exists. | ||
// <a><b></c></b></a> will close everything, otherwise. | ||
let t = tags.length; | ||
const { tagName } = this; | ||
const closeTo = tagName; | ||
while (t--) { | ||
const close = tags[t]; | ||
if (close.name !== closeTo) { | ||
let l = tags.length; | ||
while (l-- > 0) { | ||
const tag = this.tag = tags.pop(); | ||
this.emitNode("onclosetag", tag); | ||
if (tag.name !== tagName) { | ||
this.fail("unexpected close tag."); | ||
@@ -1858,20 +1789,10 @@ } | ||
if (t < 0) { | ||
if (l === 0) { | ||
this.inRoot = false; | ||
this.closedRoot = true; | ||
} | ||
else if (l < 0) { | ||
this.fail(`unmatched closing tag: ${tagName}.`); | ||
this.textNode += `</${tagName}>`; | ||
this.state = S_TEXT; | ||
return; | ||
} | ||
let s = this.tags.length; | ||
while (s-- > t) { | ||
const tag = this.tag = tags.pop(); | ||
this.emitNode("onclosetag", tag); | ||
} | ||
if (t === 0) { | ||
this.inRoot = false; | ||
this.closedRoot = true; | ||
} | ||
this.tagName = this.attribValue = this.attribName = ""; | ||
this.attribList = []; | ||
this.state = S_TEXT; | ||
} | ||
@@ -1897,4 +1818,3 @@ | ||
if (entity[0] === "#") { | ||
if ((entity[1] === "x") && | ||
/^#[x|X][0-9a-fA-F]+$/.test(entity)) { | ||
if (entity[1] === "x" && /^#x[0-9a-f]+$/i.test(entity)) { | ||
num = parseInt(entity.slice(2), 16); | ||
@@ -1907,15 +1827,9 @@ } | ||
if (Number.isNaN(num) || num > 0x10FFFF) { | ||
this.fail("malformed character entity."); | ||
return `&${this.entity};`; | ||
} | ||
const char = String.fromCodePoint(num); | ||
// The character reference is required to match the CHAR production. | ||
if (!isChar(num)) { | ||
this.fail("malformed character entity."); | ||
return `&${this.entity};`; | ||
return `&${entity};`; | ||
} | ||
return char; | ||
return String.fromCodePoint(num); | ||
} | ||
@@ -1922,0 +1836,0 @@ } |
@@ -5,3 +5,3 @@ { | ||
"author": "Louis-Dominique Dubeau <ldd@lddubeau.com>", | ||
"version": "2.0.0", | ||
"version": "2.1.0", | ||
"main": "lib/saxes.js", | ||
@@ -8,0 +8,0 @@ "types": "lib/saxes.d.ts", |
@@ -87,3 +87,3 @@ # saxes | ||
If there's a mistake in the documenation, raise an issue. If you just assume, | ||
If there's a mistake in the documentation, raise an issue. If you just assume, | ||
you may assume incorrectly. | ||
@@ -123,6 +123,6 @@ | ||
* `xmlns` - Boolean. If true, then namespaces are supported. Default | ||
* `xmlns` - Boolean. If `true`, then namespaces are supported. Default | ||
is `false`. | ||
* `position` - Boolean. If false, then don't track line/col/position. Unset is | ||
* `position` - Boolean. If `false`, then don't track line/col/position. Unset is | ||
treated as `true`. Default is unset. | ||
@@ -134,2 +134,9 @@ | ||
* `fragment` - Boolean. If `true`, parse the XML as an XML fragment. Default is | ||
`false`. | ||
* `additionalNamespaces` - A plain object whose key, value pairs define | ||
namespaces known before parsing the XML file. It is not legal to pass | ||
bindings for the namespaces `"xml"` or `"xmlns"`. | ||
### Methods | ||
@@ -173,2 +180,18 @@ | ||
### Parsing XML Fragments | ||
The XML specification does not define any method by which to parse XML | ||
fragments. However, there are usage scenarios in which it is desirable to parse | ||
fragments. In order to allow this, saxes provides two initialization options. | ||
If you pass the option `fragment: true` to the parser constructor, the parser | ||
will expect an XML fragment. It essentially starts with a parsing state | ||
equivalent to the one it would be in if `parser.write("<foo">)` had been called | ||
right after initialization. In other words, it expects content which is | ||
acceptable inside an element. This also turns off well-formedness checks that | ||
are inappropriate when parsing a fragment. | ||
The other option is `additionalNamespaces`, which allows you to define | ||
additional prefix-to-URI bindings known before parsing starts. | ||
## FAQ | ||
@@ -175,0 +198,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
75195
236
1689