Socket
Socket
Sign inDemoInstall

sax

Package Overview
Dependencies
0
Maintainers
0
Versions
48
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 0.1.5 to 0.2.0

733

lib/sax.js
// wrapper for non-node envs
;(function (sax) {
sax.parser = function (strict, opt) { return new SAXParser(strict, opt) };
sax.SAXParser = SAXParser;
sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
sax.SAXParser = SAXParser

@@ -16,3 +16,3 @@ // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.

// Set to Infinity to have unlimited buffers.
sax.MAX_BUFFER_LENGTH = 64 * 1024;
sax.MAX_BUFFER_LENGTH = 64 * 1024

@@ -23,26 +23,27 @@ var buffers = [

"attribValue", "cdata"
];
]
function SAXParser (strict, opt) {
clearBuffers(this);
this.q = this.c = "";
this.bufferCheckPosition = sax.MAX_BUFFER_LENGTH;
this.opt = opt || {};
this.tagCase = this.opt.lowercasetags ? "toLowerCase" : "toUpperCase";
this.tags = [];
this.closed = this.closedRoot = this.sawRoot = false;
this.tag = this.error = null;
this.strict = !!strict;
this.state = S.BEGIN;
this.ENTITIES = Object.create(sax.ENTITIES);
clearBuffers(this)
this.q = this.c = ""
this.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
this.opt = opt || {}
this.tagCase = this.opt.lowercasetags ? "toLowerCase" : "toUpperCase"
this.tags = []
this.closed = this.closedRoot = this.sawRoot = false
this.tag = this.error = null
this.strict = !!strict
this.state = S.BEGIN
this.ENTITIES = Object.create(sax.ENTITIES)
// mostly just for error reporting
this.position = this.line = this.column = 0;
emit(this, "onready");
this.position = this.line = this.column = 0
emit(this, "onready")
}
function checkBufferLength (parser) {
var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10),
maxActual = 0;
var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
, maxActual = 0
for (var i = 0, l = buffers.length; i < l; i ++) {
var len = parser[buffers[i]].length;
var len = parser[buffers[i]].length
if (len > maxAllowed) {

@@ -63,12 +64,15 @@ // Text/cdata nodes can get big, and since they're buffered,

}
maxActual = Math.max(maxActual, len);
maxActual = Math.max(maxActual, len)
}
// schedule the next check for the earliest possible buffer overrun.
parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual) + parser.position;
parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual)
+ parser.position
}
function clearBuffers (parser) {
for (var i = 0, l = buffers.length; i < l; i ++) {
parser[buffers[i]] = "";
parser[buffers[i]] = ""
}
}
SAXParser.prototype = {

@@ -80,19 +84,94 @@ write : write,

try {
var Stream = require("stream").Stream
} catch (ex) {
var Stream = function () {}
}
function createStream (strict, opt) {
return new SAXStream(strict, opt)
}
function SAXStream (strict, opt) {
Stream.apply(me)
this._parser = new SAXParser(strict, opt)
this.writable = true
this.readable = true
var me = this
this._parser.onend = function () {
me.emit("end")
}
this._parser.onerror = function (er) {
me.emit("error", er)
}
}
SAXStream.prototype = Object.create(Stream.prototype,
{ constructor: { value: SAXStream } })
SAXStream.prototype.write = function (data) {
this._parser.write(data.toString())
this.emit(data)
}
SAXStream.prototype.end = function (chunk) {
if (chunk && chunk.length) this._parser.write(chunk.toString())
this._parser.emit("end")
}
var streamWraps =
[ "opentag"
, "closetag"
, "text"
, "attribute"
, "error"
, "doctype"
, "processinginstruction"
, "sgmldeclaration"
, "comment"
, "opencdata"
, "cdata"
, "closecdata"
, "ready"
]
SAXStream.prototype.on = function (ev, handler) {
var me = this
if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) {
me._parser["on"+ev] = function () {
var args = arguments.length === 1 ? [arguments[0]]
: Array.apply(null, arguments)
args.splice(0, 0, ev)
me.emit.apply(me, args)
}
}
return Stream.prototype.on.call(me, ev, handler)
}
// character classes and tokens
var whitespace = "\r\n\t ",
var whitespace = "\r\n\t "
// this really needs to be replaced with character classes.
// XML allows all manner of ridiculous numbers and digits.
number = "0124356789",
letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
// (Letter | '_' | ':')
nameStart = letter+"_:",
nameBody = nameStart+number+"-.",
quote = "'\"",
entity = number+letter+"#",
CDATA = "[CDATA[",
DOCTYPE = "DOCTYPE";
, number = "0124356789"
, letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
// (Letter | "_" | ":")
, nameStart = letter+"_:"
, nameBody = nameStart+number+"-."
, quote = "'\""
, entity = number+letter+"#"
, CDATA = "[CDATA["
, DOCTYPE = "DOCTYPE"
function is (charclass, c) { return charclass.indexOf(c) !== -1 }
function not (charclass, c) { return !is(charclass, c) }
var S = 0;
var S = 0
sax.STATE =

@@ -142,310 +221,320 @@ { BEGIN : S++

for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S;
for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S
// shorthand
S = sax.STATE;
S = sax.STATE
sax.EVENTS = [ // for discoverability.
"text", "processinginstruction", "sgmldeclaration",
"doctype", "comment", "attribute", "opentag", "closetag",
"opencdata", "cdata", "closecdata", "error", "end", "ready" ];
"opencdata", "cdata", "closecdata", "error", "end", "ready" ]
function emit (parser, event, data) {
parser[event] && parser[event](data);
parser[event] && parser[event](data)
}
function emitNode (parser, nodeType, data) {
if (parser.textNode) closeText(parser);
emit(parser, nodeType, data);
if (parser.textNode) closeText(parser)
emit(parser, nodeType, data)
}
function closeText (parser) {
parser.textNode = textopts(parser.opt, parser.textNode);
if (parser.textNode) emit(parser, "ontext", parser.textNode);
parser.textNode = "";
parser.textNode = textopts(parser.opt, parser.textNode)
if (parser.textNode) emit(parser, "ontext", parser.textNode)
parser.textNode = ""
}
function textopts (opt, text) {
if (opt.trim) text = text.trim();
if (opt.normalize) text = text.replace(/\s+/g, " ");
return text;
if (opt.trim) text = text.trim()
if (opt.normalize) text = text.replace(/\s+/g, " ")
return text
}
function error (parser, er) {
closeText(parser);
closeText(parser)
er += "\nLine: "+parser.line+
"\nColumn: "+parser.column+
"\nChar: "+parser.c;
er = new Error(er);
parser.error = er;
emit(parser, "onerror", er);
return parser;
"\nChar: "+parser.c
er = new Error(er)
parser.error = er
emit(parser, "onerror", er)
return parser
}
function end (parser) {
if (parser.state !== S.TEXT) error(parser, "Unexpected end");
closeText(parser);
parser.c = "";
parser.closed = true;
emit(parser, "onend");
SAXParser.call(parser, parser.strict, parser.opt);
return parser;
if (parser.state !== S.TEXT) error(parser, "Unexpected end")
closeText(parser)
parser.c = ""
parser.closed = true
emit(parser, "onend")
SAXParser.call(parser, parser.strict, parser.opt)
return parser
}
function strictFail (parser, message) {
if (parser.strict) error(parser, message);
if (parser.strict) error(parser, message)
}
function newTag (parser) {
if (!parser.strict) parser.tagName = parser.tagName[parser.tagCase]();
parser.tag = { name : parser.tagName, attributes : {} };
if (!parser.strict) parser.tagName = parser.tagName[parser.tagCase]()
parser.tag = { name : parser.tagName, attributes : {} }
}
function openTag (parser, selfClosing) {
parser.sawRoot = true;
parser.tags.push(parser.tag);
emitNode(parser, "onopentag", parser.tag);
parser.sawRoot = true
parser.tags.push(parser.tag)
emitNode(parser, "onopentag", parser.tag)
if (!selfClosing) {
parser.tag = null;
parser.tagName = "";
parser.state = S.TEXT;
parser.tag = null
parser.tagName = ""
parser.state = S.TEXT
}
parser.attribName = parser.attribValue = "";
parser.attribName = parser.attribValue = ""
}
function closeTag (parser) {
if (!parser.tagName) {
strictFail(parser, "Weird empty close tag.");
parser.textNode += "</>";
parser.state = S.TEXT;
return;
strictFail(parser, "Weird empty close tag.")
parser.textNode += "</>"
parser.state = S.TEXT
return
}
// first make sure that the closing tag actually exists.
// <a><b></c></b></a> will close everything, otherwise.
var t = parser.tags.length;
if (!parser.strict) parser.tagName = parser.tagName[parser.tagCase]();
var closeTo = parser.tagName;
var t = parser.tags.length
if (!parser.strict) parser.tagName = parser.tagName[parser.tagCase]()
var closeTo = parser.tagName
while (t --) {
var close = parser.tags[t];
var close = parser.tags[t]
if (close.name !== closeTo) {
// fail the first time in strict mode
strictFail(parser, "Unexpected close tag");
} else break;
strictFail(parser, "Unexpected close tag")
} else break
}
// didn't find it. we already failed for strict, so just abort.
if (t < 0) return;
var s = parser.tags.length;
if (t < 0) return
var s = parser.tags.length
while (s --> t) {
parser.tag = parser.tags.pop();
parser.tagName = parser.tag.name;
emitNode(parser, "onclosetag", parser.tagName);
parser.tag = parser.tags.pop()
parser.tagName = parser.tag.name
emitNode(parser, "onclosetag", parser.tagName)
}
if (t === 0) parser.closedRoot = true;
parser.tagName = parser.attribValue = parser.attribName = "";
parser.tag = null;
parser.state = S.TEXT;
if (t === 0) parser.closedRoot = true
parser.tagName = parser.attribValue = parser.attribName = ""
parser.tag = null
parser.state = S.TEXT
}
function parseEntity (parser) {
var entity = parser.entity.toLowerCase(), num, numStr = "";
if (parser.ENTITIES[entity]) return parser.ENTITIES[entity];
var entity = parser.entity.toLowerCase(), num, numStr = ""
if (parser.ENTITIES[entity]) return parser.ENTITIES[entity]
if (entity.charAt(0) === "#") {
if (entity.charAt(1) === "x") {
entity = entity.slice(2);
num = parseInt(entity, 16), numStr = num.toString(16);
entity = entity.slice(2)
num = parseInt(entity, 16), numStr = num.toString(16)
} else {
entity = entity.slice(1);
num = parseInt(entity, 10), numStr = num.toString(10);
entity = entity.slice(1)
num = parseInt(entity, 10), numStr = num.toString(10)
}
}
if (numStr.toLowerCase() !== entity) {
strictFail(parser, "Invalid character entity");
return "&"+parser.entity + ";";
strictFail(parser, "Invalid character entity")
return "&"+parser.entity + ";"
}
return String.fromCharCode(num);
return String.fromCharCode(num)
}
function write (chunk) {
var parser = this;
if (this.error) throw this.error;
var parser = this
if (this.error) throw this.error
if (parser.closed) return error(parser,
"Cannot write after close. Assign an onready handler.");
if (chunk === null) return end(parser);
"Cannot write after close. Assign an onready handler.")
if (chunk === null) return end(parser)
var i = 0, c = ""
while (parser.c = c = chunk.charAt(i++)) {
parser.position ++;
parser.position ++
if (c === "\n") {
parser.line ++;
parser.column = 0;
} else parser.column ++;
parser.line ++
parser.column = 0
} else parser.column ++
switch (parser.state) {
case S.BEGIN:
if (c === "<") parser.state = S.OPEN_WAKA;
if (c === "<") parser.state = S.OPEN_WAKA
else if (not(whitespace,c)) {
// have to process this as a text node.
// weird, but happens.
strictFail(parser, "Non-whitespace before first tag.");
parser.textNode = c;
state = S.TEXT;
strictFail(parser, "Non-whitespace before first tag.")
parser.textNode = c
state = S.TEXT
}
continue;
continue
case S.TEXT:
if (parser.sawRoot && !parser.closedRoot) {
var starti = i-1;
var starti = i-1
while (c && c!=="<" && c!=="&") {
c = chunk.charAt(i++);
c = chunk.charAt(i++)
if (c) {
parser.position ++;
parser.position ++
if (c === "\n") {
parser.line ++;
parser.column = 0;
} else parser.column ++;
parser.line ++
parser.column = 0
} else parser.column ++
}
}
parser.textNode += chunk.substring(starti, i-1);
parser.textNode += chunk.substring(starti, i-1)
}
if (c === "<") parser.state = S.OPEN_WAKA;
if (c === "<") parser.state = S.OPEN_WAKA
else {
if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot))
strictFail("Text data outside of root node.");
if (c === "&") parser.state = S.TEXT_ENTITY;
else parser.textNode += c;
strictFail("Text data outside of root node.")
if (c === "&") parser.state = S.TEXT_ENTITY
else parser.textNode += c
}
continue;
continue
case S.OPEN_WAKA:
// either a /, ?, !, or text is coming next.
if (c === "!") {
parser.state = S.SGML_DECL;
parser.sgmlDecl = "";
parser.state = S.SGML_DECL
parser.sgmlDecl = ""
} else if (is(whitespace, c)) {
// wait for it...
} else if (is(nameStart,c)) {
parser.state = S.OPEN_TAG;
parser.tagName = c;
parser.state = S.OPEN_TAG
parser.tagName = c
} else if (c === "/") {
parser.state = S.CLOSE_TAG;
parser.tagName = "";
parser.state = S.CLOSE_TAG
parser.tagName = ""
} else if (c === "?") {
parser.state = S.PROC_INST;
parser.procInstName = parser.procInstBody = "";
parser.state = S.PROC_INST
parser.procInstName = parser.procInstBody = ""
} else {
strictFail(parser, "Unencoded <");
parser.textNode += "<" + c;
parser.state = S.TEXT;
strictFail(parser, "Unencoded <")
parser.textNode += "<" + c
parser.state = S.TEXT
}
continue;
continue
case S.SGML_DECL:
if ((parser.sgmlDecl+c).toUpperCase() === CDATA) {
emitNode(parser, "onopencdata");
parser.state = S.CDATA;
parser.sgmlDecl = "";
parser.cdata = "";
emitNode(parser, "onopencdata")
parser.state = S.CDATA
parser.sgmlDecl = ""
parser.cdata = ""
} else if (parser.sgmlDecl+c === "--") {
parser.state = S.COMMENT;
parser.comment = "";
parser.sgmlDecl = "";
parser.state = S.COMMENT
parser.comment = ""
parser.sgmlDecl = ""
} else if ((parser.sgmlDecl+c).toUpperCase() === DOCTYPE) {
parser.state = S.DOCTYPE;
parser.state = S.DOCTYPE
if (parser.doctype || parser.sawRoot) strictFail(parser,
"Inappropriately located doctype declaration");
parser.doctype = "";
parser.sgmlDecl = "";
"Inappropriately located doctype declaration")
parser.doctype = ""
parser.sgmlDecl = ""
} else if (c === ">") {
emitNode(parser, "onsgmldeclaration", parser.sgmlDecl);
parser.sgmlDecl = "";
parser.state = S.TEXT;
emitNode(parser, "onsgmldeclaration", parser.sgmlDecl)
parser.sgmlDecl = ""
parser.state = S.TEXT
} else if (is(quote, c)) {
parser.state = S.SGML_DECL_QUOTED;
parser.sgmlDecl += c;
} else parser.sgmlDecl += c;
continue;
parser.state = S.SGML_DECL_QUOTED
parser.sgmlDecl += c
} else parser.sgmlDecl += c
continue
case S.SGML_DECL_QUOTED:
if (c === parser.q) {
parser.state = S.SGML_DECL;
parser.q = "";
parser.state = S.SGML_DECL
parser.q = ""
}
parser.sgmlDecl += c;
continue;
parser.sgmlDecl += c
continue
case S.DOCTYPE:
if (c === ">") {
parser.state = S.TEXT;
emitNode(parser, "ondoctype", parser.doctype);
parser.doctype = true; // just remember that we saw it.
parser.state = S.TEXT
emitNode(parser, "ondoctype", parser.doctype)
parser.doctype = true // just remember that we saw it.
} else {
parser.doctype += c;
if (c === "[") parser.state = S.DOCTYPE_DTD;
parser.doctype += c
if (c === "[") parser.state = S.DOCTYPE_DTD
else if (is(quote, c)) {
parser.state = S.DOCTYPE_QUOTED;
parser.q = c;
parser.state = S.DOCTYPE_QUOTED
parser.q = c
}
}
continue;
continue
case S.DOCTYPE_QUOTED:
parser.doctype += c;
parser.doctype += c
if (c === parser.q) {
parser.q = "";
parser.state = S.DOCTYPE;
parser.q = ""
parser.state = S.DOCTYPE
}
continue;
continue
case S.DOCTYPE_DTD:
parser.doctype += c;
if (c === "]") parser.state = S.DOCTYPE;
parser.doctype += c
if (c === "]") parser.state = S.DOCTYPE
else if (is(quote,c)) {
parser.state = S.DOCTYPE_DTD_QUOTED;
parser.q = c;
parser.state = S.DOCTYPE_DTD_QUOTED
parser.q = c
}
continue;
continue
case S.DOCTYPE_DTD_QUOTED:
parser.doctype += c;
parser.doctype += c
if (c === parser.q) {
parser.state = S.DOCTYPE_DTD;
parser.q = "";
parser.state = S.DOCTYPE_DTD
parser.q = ""
}
continue;
continue
case S.COMMENT:
if (c === "-") parser.state = S.COMMENT_ENDING;
else parser.comment += c;
continue;
if (c === "-") parser.state = S.COMMENT_ENDING
else parser.comment += c
continue
case S.COMMENT_ENDING:
if (c === "-") {
parser.state = S.COMMENT_ENDED;
parser.comment = textopts(parser.opt, parser.comment);
if (parser.comment) emitNode(parser, "oncomment", parser.comment);
parser.comment = "";
parser.state = S.COMMENT_ENDED
parser.comment = textopts(parser.opt, parser.comment)
if (parser.comment) emitNode(parser, "oncomment", parser.comment)
parser.comment = ""
} else {
strictFail(parser, "Invalid comment");
parser.comment += "-" + c;
strictFail(parser, "Invalid comment")
parser.comment += "-" + c
}
continue;
continue
case S.COMMENT_ENDED:
if (c !== ">") strictFail(parser, "Malformed comment");
else parser.state = S.TEXT;
continue;
if (c !== ">") strictFail(parser, "Malformed comment")
else parser.state = S.TEXT
continue
case S.CDATA:
if (c === "]") parser.state = S.CDATA_ENDING;
else parser.cdata += c;
continue;
if (c === "]") parser.state = S.CDATA_ENDING
else parser.cdata += c
continue
case S.CDATA_ENDING:
if (c === "]") parser.state = S.CDATA_ENDING_2;
if (c === "]") parser.state = S.CDATA_ENDING_2
else {
parser.cdata += "]" + c;
parser.state = S.CDATA;
parser.cdata += "]" + c
parser.state = S.CDATA
}
continue;
continue
case S.CDATA_ENDING_2:
if (c === ">") {
if (parser.cdata) emitNode(parser, "oncdata", parser.cdata);
emitNode(parser, "onclosecdata");
parser.cdata = "";
parser.state = S.TEXT;
if (parser.cdata) emitNode(parser, "oncdata", parser.cdata)
emitNode(parser, "onclosecdata")
parser.cdata = ""
parser.state = S.TEXT
} else if (c === "]") {
parser.cdata += "]"
} else {
parser.cdata += "]]" + c;
parser.state = S.CDATA;
parser.cdata += "]]" + c
parser.state = S.CDATA
}
continue;
continue
case S.PROC_INST:
if (c === "?") parser.state = S.PROC_INST_ENDING;
else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY;
else parser.procInstName += c;
continue;
if (c === "?") parser.state = S.PROC_INST_ENDING
else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY
else parser.procInstName += c
continue
case S.PROC_INST_BODY:
if (!parser.procInstBody && is(whitespace, c)) continue;
else if (c === "?") parser.state = S.PROC_INST_ENDING;
if (!parser.procInstBody && is(whitespace, c)) continue
else if (c === "?") parser.state = S.PROC_INST_ENDING
else if (is(quote, c)) {
parser.state = S.PROC_INST_QUOTED;
parser.q = c;
parser.procInstBody += c;
} else parser.procInstBody += c;
continue;
parser.state = S.PROC_INST_QUOTED
parser.q = c
parser.procInstBody += c
} else parser.procInstBody += c
continue
case S.PROC_INST_ENDING:

@@ -456,131 +545,132 @@ if (c === ">") {

body : parser.procInstBody
});
parser.procInstName = parser.procInstBody = "";
parser.state = S.TEXT;
})
parser.procInstName = parser.procInstBody = ""
parser.state = S.TEXT
} else {
parser.procInstBody += "?" + c;
parser.state = S.PROC_INST_BODY;
parser.procInstBody += "?" + c
parser.state = S.PROC_INST_BODY
}
continue;
continue
case S.PROC_INST_QUOTED:
parser.procInstBody += c;
parser.procInstBody += c
if (c === parser.q) {
parser.state = S.PROC_INST_BODY;
parser.q = "";
parser.state = S.PROC_INST_BODY
parser.q = ""
}
continue;
continue
case S.OPEN_TAG:
if (is(nameBody, c)) parser.tagName += c;
if (is(nameBody, c)) parser.tagName += c
else {
newTag(parser);
if (c === ">") openTag(parser);
else if (c === "/") parser.state = S.OPEN_TAG_SLASH;
newTag(parser)
if (c === ">") openTag(parser)
else if (c === "/") parser.state = S.OPEN_TAG_SLASH
else {
if (not(whitespace, c)) strictFail(
parser, "Invalid character in tag name");
parser.state = S.ATTRIB;
parser, "Invalid character in tag name")
parser.state = S.ATTRIB
}
}
continue;
continue
case S.OPEN_TAG_SLASH:
if (c === ">") {
openTag(parser, true);
closeTag(parser);
openTag(parser, true)
closeTag(parser)
} else {
strictFail(parser, "Forward-slash in opening tag not followed by >");
parser.state = S.ATTRIB;
strictFail(parser, "Forward-slash in opening tag not followed by >")
parser.state = S.ATTRIB
}
continue;
continue
case S.ATTRIB:
// haven't read the attribute name yet.
if (is(whitespace, c)) continue;
else if (c === ">") openTag(parser);
else if (c === "/") parser.state = S.OPEN_TAG_SLASH;
if (is(whitespace, c)) continue
else if (c === ">") openTag(parser)
else if (c === "/") parser.state = S.OPEN_TAG_SLASH
else if (is(nameStart, c)) {
parser.attribName = c;
parser.attribValue = "";
parser.state = S.ATTRIB_NAME;
} else strictFail(parser, "Invalid attribute name");
continue;
parser.attribName = c
parser.attribValue = ""
parser.state = S.ATTRIB_NAME
} else strictFail(parser, "Invalid attribute name")
continue
case S.ATTRIB_NAME:
if (c === "=") parser.state = S.ATTRIB_VALUE;
else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE;
else if (is(nameBody, c)) parser.attribName += c;
else strictFail(parser, "Invalid attribute name");
continue;
if (c === "=") parser.state = S.ATTRIB_VALUE
else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE
else if (is(nameBody, c)) parser.attribName += c
else strictFail(parser, "Invalid attribute name")
continue
case S.ATTRIB_NAME_SAW_WHITE:
if (c === "=") parser.state = S.ATTRIB_VALUE;
else if (is(whitespace, c)) continue;
if (c === "=") parser.state = S.ATTRIB_VALUE
else if (is(whitespace, c)) continue
else {
strictFail(parser, "Attribute without value");
parser.tag.attributes[parser.attribName] = "";
parser.attribValue = "";
emitNode(parser, "onattribute", { name : parser.attribName, value : "" });
parser.attribName = "";
if (c === ">") openTag(parser);
strictFail(parser, "Attribute without value")
parser.tag.attributes[parser.attribName] = ""
parser.attribValue = ""
emitNode(parser, "onattribute",
{ name : parser.attribName, value : "" })
parser.attribName = ""
if (c === ">") openTag(parser)
else if (is(nameStart, c)) {
parser.attribName = c;
parser.state = S.ATTRIB_NAME;
parser.attribName = c
parser.state = S.ATTRIB_NAME
} else {
strictFail(parser, "Invalid attribute name");
parser.state = S.ATTRIB;
strictFail(parser, "Invalid attribute name")
parser.state = S.ATTRIB
}
}
continue;
continue
case S.ATTRIB_VALUE:
if (is(whitespace, c)) continue;
if (is(whitespace, c)) continue
else if (is(quote, c)) {
parser.q = c;
parser.state = S.ATTRIB_VALUE_QUOTED;
parser.q = c
parser.state = S.ATTRIB_VALUE_QUOTED
} else {
strictFail(parser, "Unquoted attribute value");
parser.state = S.ATTRIB_VALUE_UNQUOTED;
parser.attribValue = c;
strictFail(parser, "Unquoted attribute value")
parser.state = S.ATTRIB_VALUE_UNQUOTED
parser.attribValue = c
}
continue;
continue
case S.ATTRIB_VALUE_QUOTED:
if (c !== parser.q) {
if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q;
else parser.attribValue += c;
continue;
if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q
else parser.attribValue += c
continue
}
parser.tag.attributes[parser.attribName] = parser.attribValue;
parser.tag.attributes[parser.attribName] = parser.attribValue
emitNode(parser, "onattribute", {
name:parser.attribName, value:parser.attribValue});
parser.attribName = parser.attribValue = "";
parser.q = "";
parser.state = S.ATTRIB;
continue;
name:parser.attribName, value:parser.attribValue})
parser.attribName = parser.attribValue = ""
parser.q = ""
parser.state = S.ATTRIB
continue
case S.ATTRIB_VALUE_UNQUOTED:
if (not(whitespace+">",c)) {
if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U;
else parser.attribValue += c;
continue;
if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U
else parser.attribValue += c
continue
}
emitNode(parser, "onattribute", {
name:parser.attribName, value:parser.attribValue});
parser.attribName = parser.attribValue = "";
if (c === ">") openTag(parser);
else parser.state = S.ATTRIB;
continue;
emitNode(parser, "onattribute",
{ name: parser.attribName, value: parser.attribValue})
parser.attribName = parser.attribValue = ""
if (c === ">") openTag(parser)
else parser.state = S.ATTRIB
continue
case S.CLOSE_TAG:
if (!parser.tagName) {
if (is(whitespace, c)) continue;
if (is(whitespace, c)) continue
else if (not(nameStart, c)) strictFail(parser,
"Invalid tagname in closing tag.");
else parser.tagName = c;
"Invalid tagname in closing tag.")
else parser.tagName = c
}
else if (c === ">") closeTag(parser);
else if (is(nameBody, c)) parser.tagName += c;
else if (c === ">") closeTag(parser)
else if (is(nameBody, c)) parser.tagName += c
else {
if (not(whitespace, c)) strictFail(parser,
"Invalid tagname in closing tag");
parser.state = S.CLOSE_TAG_SAW_WHITE;
"Invalid tagname in closing tag")
parser.state = S.CLOSE_TAG_SAW_WHITE
}
continue;
continue
case S.CLOSE_TAG_SAW_WHITE:
if (is(whitespace, c)) continue;
if (c === ">") closeTag(parser);
else strictFail("Invalid characters in closing tag");
continue;
if (is(whitespace, c)) continue
if (c === ">") closeTag(parser)
else strictFail("Invalid characters in closing tag")
continue
case S.TEXT_ENTITY:

@@ -591,27 +681,27 @@ case S.ATTRIB_VALUE_ENTITY_Q:

case S.TEXT_ENTITY:
var returnState = S.TEXT, buffer = "textNode";
break;
var returnState = S.TEXT, buffer = "textNode"
break
case S.ATTRIB_VALUE_ENTITY_Q:
var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue";
break;
var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue"
break
case S.ATTRIB_VALUE_ENTITY_U:
var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue";
break;
var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue"
break
}
if (c === ";") {
parser[buffer] += parseEntity(parser);
parser.entity = "";
parser.state = returnState;
parser[buffer] += parseEntity(parser)
parser.entity = ""
parser.state = returnState
}
else if (is(entity, c)) parser.entity += c;
else if (is(entity, c)) parser.entity += c
else {
strictFail("Invalid character entity");
parser[buffer] += "&" + parser.entity;
parser.entity = "";
parser.state = returnState;
strictFail("Invalid character entity")
parser[buffer] += "&" + parser.entity
parser.entity = ""
parser.state = returnState
}
continue;
continue
default:
throw new Error(parser, "Unknown state: " + parser.state);
break;
throw new Error(parser, "Unknown state: " + parser.state)
break
}

@@ -621,8 +711,9 @@ } // while

// if (parser.state === S.CDATA && parser.cdata) {
// emitNode(parser, "oncdata", parser.cdata);
// parser.cdata = "";
// emitNode(parser, "oncdata", parser.cdata)
// parser.cdata = ""
// }
if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser);
return parser;
if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser)
return parser
}
})(typeof exports === "undefined" ? sax = {} : exports)
{ "name" : "sax"
, "author" : "Isaac Z. Schlueter <i@izs.me>"
, "version" : "0.1.5"
, "version" : "0.2.0"
, "main" : "lib/sax"

@@ -5,0 +5,0 @@ , "license" : "MIT"

@@ -60,2 +60,24 @@ # sax js

// stream usage
// takes the same options as the parser
var saxStream = require("sax").createStream(strict, options)
saxStream.on("error", function (e) {
// unhandled errors will throw, since this is a proper node
// event emitter.
console.error("error!", e)
// clear the error
this._parser.error = null
this._parser.resume()
})
saxStream.on("opentag", function (node) {
// same object as above
})
// pipe is supported, and it's readable/writable
// same chunks coming in also go out.
fs.createReadStream("file.xml")
.pipe(saxStream)
.pipe(fs.createReadStream("file-copy.xml"))
## Arguments

@@ -109,2 +131,5 @@

When using the stream interface, assign handlers using the EventEmitter
`on` function in the normal fashion.
`error` - Indication that something bad happened. The error will be hanging out on

@@ -149,8 +174,1 @@ `parser.error`, and must be deleted before parsing can continue. By listening to

`ready` - Indication that the stream has reset, and is ready to be written to.
## Todo
Build an HTML parser on top of this, which follows the same parsing rules as web browsers.
Make it fast by replacing the trampoline with a switch, and not buffering so much
stuff.
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc