parse5
Advanced tools
Comparing version 0.5.2 to 0.5.3
@@ -1,2 +0,3 @@ | ||
var Parser = require('./lib/parser').Parser; | ||
var Parser = require('./lib/parser').Parser, | ||
HTML = require('./lib/html'); | ||
@@ -7,2 +8,2 @@ exports.parse = function (html, treeAdapter) { | ||
return parser.parse(); | ||
}; | ||
}; |
@@ -1,2 +0,1 @@ | ||
//TODO test it | ||
exports.createDocument = function () { | ||
@@ -10,4 +9,12 @@ return { | ||
exports.createElement = function (tagName, attrs, namespaceURI) { | ||
exports.createDocumentFragment = function () { | ||
return { | ||
nodeName: '#document-fragment', | ||
quirksMode: false, | ||
childNodes: [] | ||
}; | ||
}; | ||
exports.createElement = function (tagName, namespaceURI, attrs) { | ||
return { | ||
nodeName: tagName, | ||
@@ -64,2 +71,10 @@ tagName: tagName, | ||
exports.setQuirksMode = function (document) { | ||
document.quirksMode = true; | ||
}; | ||
exports.isQuirksMode = function (document) { | ||
return document.quirksMode; | ||
}; | ||
var appendChild = exports.appendChild = function (parentNode, newNode) { | ||
@@ -66,0 +81,0 @@ parentNode.childNodes.push(newNode); |
@@ -148,7 +148,2 @@ var HTML = require('./html'); | ||
OpenElementStack.prototype.clearBackToNonForeignContext = function () { | ||
while (this.currentNamespaceURI !== NS.HTML && !this.isMathMLTextIntegrationPoint() && !this.isHtmlIntegrationPoint()) | ||
this.pop(); | ||
}; | ||
OpenElementStack.prototype.clearBackToTableContext = function () { | ||
@@ -314,31 +309,2 @@ while (this.currentTagName !== $.TABLE && this.currentTagName !== $.HTML) | ||
//Integration points | ||
OpenElementStack.prototype.isMathMLTextIntegrationPoint = function () { | ||
return this.currentNamespaceURI === NS.MATHML && | ||
(this.currentTagName === $.MI || this.currentTagName === $.MO || | ||
this.currentTagName === $.MN || this.currentTagName === $.MS || | ||
this.currentTagName === $.MTEXT); | ||
}; | ||
OpenElementStack.prototype.isHtmlIntegrationPoint = function () { | ||
if (this.currentNamespaceURI === NS.MATHML && this.currentTagName === $.ANNOTATION_XML) { | ||
var attrs = this.treeAdapter.getAttrList(this.current); | ||
for (var i = 0; i < attrs.length; i++) { | ||
if (attrs[i].name === ENCODING_ATTR) { | ||
var value = attrs[i].value.toLowerCase(); | ||
if (value === APPLICATION_XML_MIME_TYPE || value === TEXT_HTML_MIME_TYPE) | ||
return true; | ||
} | ||
} | ||
} | ||
return this.currentNamespaceURI === NS.SVG && | ||
(this.currentTagName === $.FOREIGN_OBJECT || | ||
this.currentTagName === $.DESC || | ||
this.currentTagName === $.TITLE); | ||
}; | ||
//Implied end tags | ||
@@ -345,0 +311,0 @@ OpenElementStack.prototype.generateImpliedEndTags = function () { |
{ | ||
"name": "parse5", | ||
"description": "Fast full-featured HTML parser for Node. Based on WHATWG HTML5 specification.", | ||
"version": "0.5.2", | ||
"version": "0.5.3", | ||
"author": "Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)", | ||
@@ -6,0 +6,0 @@ "keywords": ["html", "parser", "html5", "WHATWG", "specification", "fast"], |
@@ -249,30 +249,2 @@ var HTML = require('../../lib/html'), | ||
exports['Clear back to non-foreign context'] = function (t) { | ||
var stack = new OpenElementStack('#document', defaultTreeAdapter); | ||
stack.push({tagName: $.HTML, namespaceURI: NS.HTML}); | ||
stack.push({tagName: $.B, namespaceURI: NS.SVG}); | ||
stack.clearBackToNonForeignContext(); | ||
t.strictEqual(stack.stackTop, 0); | ||
t.strictEqual(stack.currentTagName, $.HTML); | ||
stack.push({tagName: $.P, namespaceURI: NS.SVG}); | ||
stack.push({tagName: $.UL, namespaceURI: NS.SVG}); | ||
stack.push({tagName: $.MO, namespaceURI: NS.MATHML}); | ||
stack.push({tagName: $.OPTION, namespaceURI: NS.SVG}); | ||
stack.clearBackToNonForeignContext(); | ||
t.strictEqual(stack.stackTop, 3); | ||
t.strictEqual(stack.currentTagName, $.MO); | ||
stack.push({tagName: $.DESC, namespaceURI: NS.SVG}); | ||
stack.push({tagName: $.P, namespaceURI: NS.SVG}); | ||
stack.push({tagName: $.UL, namespaceURI: NS.SVG}); | ||
stack.clearBackToNonForeignContext(); | ||
t.strictEqual(stack.stackTop, 4); | ||
t.strictEqual(stack.currentTagName, $.DESC); | ||
t.done(); | ||
}; | ||
exports['Remove element'] = function (t) { | ||
@@ -495,44 +467,2 @@ var element = '#element', | ||
exports['Is MathML integration point'] = function (t) { | ||
var stack = new OpenElementStack('#document', defaultTreeAdapter); | ||
stack.push({tagName: $.HTML, namespaceURI: NS.HTML}); | ||
stack.push({tagName: $.DIV, namespaceURI: NS.HTML}); | ||
t.ok(!stack.isMathMLTextIntegrationPoint()); | ||
stack.push({tagName: $.MO, namespaceURI: NS.MATHML}); | ||
t.ok(stack.isMathMLTextIntegrationPoint()); | ||
stack.push({tagName: $.DIV, namespaceURI: NS.HTML}); | ||
t.ok(!stack.isMathMLTextIntegrationPoint()); | ||
t.done(); | ||
}; | ||
exports['Is HTML integration point'] = function (t) { | ||
var stack = new OpenElementStack('#document', defaultTreeAdapter); | ||
stack.push({tagName: $.HTML, namespaceURI: NS.HTML}); | ||
stack.push({tagName: $.DIV, namespaceURI: NS.HTML}); | ||
t.ok(!stack.isHtmlIntegrationPoint()); | ||
stack.push({tagName: $.TITLE, namespaceURI: NS.SVG}); | ||
t.ok(stack.isHtmlIntegrationPoint()); | ||
stack.push({tagName: $.DIV, namespaceURI: NS.HTML}); | ||
t.ok(!stack.isHtmlIntegrationPoint()); | ||
stack.push({tagName: $.ANNOTATION_XML, namespaceURI: NS.MATHML, attrs: [ | ||
{name: 'encoding', value: 'apPlicAtion/xhtml+xml'} | ||
]}); | ||
t.ok(stack.isHtmlIntegrationPoint()); | ||
stack.push({tagName: $.ANNOTATION_XML, namespaceURI: NS.MATHML, attrs: [ | ||
{name: 'encoding', value: 'someValues'} | ||
]}); | ||
t.ok(!stack.isHtmlIntegrationPoint()); | ||
t.done(); | ||
}; | ||
exports['Generate implied end tags'] = function (t) { | ||
@@ -539,0 +469,0 @@ var stack = new OpenElementStack('#document', defaultTreeAdapter); |
var fs = require('fs'), | ||
path = require('path'), | ||
HTML = require('../../lib/html'), | ||
treeAdapter = require('../../lib/default_tree_adapter'), | ||
Parser = require('../../lib/parser').Parser; | ||
@@ -36,11 +37,13 @@ | ||
testDescrs.forEach(function (descr) { | ||
if (!descr['#document-fragment']) { | ||
tests.push({ | ||
idx: ++testIdx, | ||
setName: setName, | ||
input: descr['#data'].join('\r\n'), | ||
expected: descr['#document'].join('\n'), | ||
expectedErrors: descr['#errors'] | ||
}); | ||
} | ||
var fragmentContextTagName = descr['#document-fragment'] && descr['#document-fragment'].join(''); | ||
tests.push({ | ||
idx: ++testIdx, | ||
setName: setName, | ||
input: descr['#data'].join('\r\n'), | ||
expected: descr['#document'].join('\n'), | ||
expectedErrors: descr['#errors'], | ||
fragmentContext: fragmentContextTagName ? | ||
treeAdapter.createElement(fragmentContextTagName, HTML.NAMESPACES.HTML, []) : null | ||
}); | ||
}); | ||
@@ -150,4 +153,4 @@ }); | ||
exports[getFullTestName(test)] = function (t) { | ||
//TODO handler errors | ||
var parser = new Parser(test.input), | ||
//TODO handle errors | ||
var parser = new Parser(test.input, test.fragmentContext), | ||
document = parser.parse(), | ||
@@ -154,0 +157,0 @@ serializedDocument = serializeNodeList(document.childNodes, 0); |
@@ -9,3 +9,6 @@ var fs = require('fs'), | ||
nextToken = null, | ||
out = []; | ||
out = { | ||
tokens: [], | ||
errCount: 0 | ||
}; | ||
@@ -20,22 +23,8 @@ tokenizer.state = initialState; | ||
//NOTE: if we have parse errors append them to the output sequence | ||
if (tokenizer.errs.length) { | ||
for (var i = 0; i < tokenizer.errs.length; i++) | ||
out.push('ParseError'); | ||
tokenizer.errs = []; | ||
} | ||
//NOTE: append current token to the output sequence in html5lib test suite compatible format | ||
switch (nextToken.type) { | ||
case Tokenizer.CHARACTER_TOKEN: | ||
//NOTE: html5lib test suite concatenates all character tokens into one token. | ||
//So if last entry in output sequence is a character token we just append obtained token | ||
//to it's data string. Otherwise we create a new character token entry. | ||
var lastEntry = out[out.length - 1]; | ||
if (util.isArray(lastEntry) && lastEntry[0] === 'Character') | ||
lastEntry[1] += nextToken.ch; | ||
else | ||
out.push(['Character', nextToken.ch]); | ||
case Tokenizer.NULL_CHARACTER_TOKEN: | ||
case Tokenizer.WHITESPACE_CHARACTER_TOKEN: | ||
out.tokens.push(['Character', nextToken.ch]); | ||
break; | ||
@@ -59,15 +48,15 @@ | ||
out.push(startTagEntry); | ||
out.tokens.push(startTagEntry); | ||
break; | ||
case Tokenizer.END_TAG_TOKEN: | ||
out.push(['EndTag', nextToken.tagName]); | ||
out.tokens.push(['EndTag', nextToken.tagName]); | ||
break; | ||
case Tokenizer.COMMENT_TOKEN: | ||
out.push(['Comment', nextToken.data]); | ||
out.tokens.push(['Comment', nextToken.data]); | ||
break; | ||
case Tokenizer.DOCTYPE_TOKEN: | ||
out.push([ | ||
out.tokens.push([ | ||
'DOCTYPE', | ||
@@ -83,2 +72,5 @@ nextToken.name, | ||
out.errCount = tokenizer.errs.length; | ||
out.tokens = concatCharacterTokens(out.tokens); | ||
return out; | ||
@@ -96,4 +88,4 @@ } | ||
testDescr.output.forEach(function (token) { | ||
if (token === 'ParseError') | ||
testDescr.output.forEach(function (tokenEntry) { | ||
if (tokenEntry === 'ParseError') | ||
return; | ||
@@ -103,11 +95,11 @@ | ||
//character token data (for Character token). | ||
token[1] = unicodeUnescape(token[1]); | ||
tokenEntry[1] = unicodeUnescape(tokenEntry[1]); | ||
//NOTE: unescape token attributes(if we have them). | ||
if (token.length > 2) { | ||
Object.keys(token).forEach(function (attrName) { | ||
var attrVal = token[attrName]; | ||
if (tokenEntry.length > 2) { | ||
Object.keys(tokenEntry).forEach(function (attrName) { | ||
var attrVal = tokenEntry[attrName]; | ||
delete token[attrName]; | ||
token[unicodeUnescape(attrName)] = unicodeUnescape(attrVal); | ||
delete tokenEntry[attrName]; | ||
tokenEntry[unicodeUnescape(attrName)] = unicodeUnescape(attrVal); | ||
}); | ||
@@ -118,2 +110,21 @@ } | ||
function concatCharacterTokens(tokenEntries) { | ||
var result = []; | ||
tokenEntries.forEach(function (tokenEntry) { | ||
if (tokenEntry[0] === 'Character') { | ||
var lastEntry = result[result.length - 1]; | ||
if (lastEntry && lastEntry[0] === 'Character') { | ||
lastEntry[1] += tokenEntry[1]; | ||
return; | ||
} | ||
} | ||
result.push(tokenEntry); | ||
}); | ||
return result; | ||
} | ||
function getTokenizerSuitableStateName(testDataStateName) { | ||
@@ -143,2 +154,12 @@ return testDataStateName.toUpperCase().replace(/\s/g, '_'); | ||
var expectedTokens = [], | ||
expectedErrCount = 0; | ||
descr.output.forEach(function (tokenEntry) { | ||
if (tokenEntry === 'ParseError') | ||
expectedErrCount++; | ||
else | ||
expectedTokens.push(tokenEntry); | ||
}); | ||
descr.initialStates.forEach(function (initialState) { | ||
@@ -150,3 +171,4 @@ tests.push({ | ||
input: descr.input, | ||
expected: descr.output, | ||
expectedTokens: concatCharacterTokens(expectedTokens), | ||
expectedErrCount: expectedErrCount, | ||
initialState: getTokenizerSuitableStateName(initialState), | ||
@@ -171,5 +193,7 @@ lastStartTag: descr.lastStartTag | ||
t.deepEqual(out, test.expected); | ||
t.deepEqual(out.tokens, test.expectedTokens); | ||
t.strictEqual(out.errCount, test.expectedErrCount); | ||
t.done(); | ||
}; | ||
}); |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
2267714
6665