parse5 - npm Package Compare versions

5

index.js

		@@ -1,2 +0,3 @@
		var Parser = require('./lib/parser').Parser;
		var Parser = require('./lib/parser').Parser,
		HTML = require('./lib/html');

		@@ -7,2 +8,2 @@ exports.parse = function (html, treeAdapter) {
		return parser.parse();
		};
		};

19

lib/default_tree_adapter.js

		@@ -1,2 +0,1 @@
		//TODO test it
		exports.createDocument = function () {
		@@ -10,4 +9,12 @@ return {

		exports.createElement = function (tagName, attrs, namespaceURI) {
		exports.createDocumentFragment = function () {
		return {
		nodeName: '#document-fragment',
		quirksMode: false,
		childNodes: []
		};
		};

		exports.createElement = function (tagName, namespaceURI, attrs) {
		return {
		nodeName: tagName,
		@@ -64,2 +71,10 @@ tagName: tagName,

		exports.setQuirksMode = function (document) {
		document.quirksMode = true;
		};

		exports.isQuirksMode = function (document) {
		return document.quirksMode;
		};

		var appendChild = exports.appendChild = function (parentNode, newNode) {
		@@ -66,0 +81,0 @@ parentNode.childNodes.push(newNode);

34

lib/open_element_stack.js

		@@ -148,7 +148,2 @@ var HTML = require('./html');

		OpenElementStack.prototype.clearBackToNonForeignContext = function () {
		while (this.currentNamespaceURI !== NS.HTML && !this.isMathMLTextIntegrationPoint() && !this.isHtmlIntegrationPoint())
		this.pop();
		};

		OpenElementStack.prototype.clearBackToTableContext = function () {
		@@ -314,31 +309,2 @@ while (this.currentTagName !== $.TABLE && this.currentTagName !== $.HTML)

		//Integration points
		OpenElementStack.prototype.isMathMLTextIntegrationPoint = function () {
		return this.currentNamespaceURI === NS.MATHML &&
		(this.currentTagName === $.MI \|\| this.currentTagName === $.MO \|\|
		this.currentTagName === $.MN \|\| this.currentTagName === $.MS \|\|
		this.currentTagName === $.MTEXT);
		};

		OpenElementStack.prototype.isHtmlIntegrationPoint = function () {
		if (this.currentNamespaceURI === NS.MATHML && this.currentTagName === $.ANNOTATION_XML) {
		var attrs = this.treeAdapter.getAttrList(this.current);

		for (var i = 0; i < attrs.length; i++) {
		if (attrs[i].name === ENCODING_ATTR) {
		var value = attrs[i].value.toLowerCase();

		if (value === APPLICATION_XML_MIME_TYPE \|\| value === TEXT_HTML_MIME_TYPE)
		return true;
		}
		}
		}

		return this.currentNamespaceURI === NS.SVG &&
		(this.currentTagName === $.FOREIGN_OBJECT \|\|
		this.currentTagName === $.DESC \|\|
		this.currentTagName === $.TITLE);
		};


		//Implied end tags
		@@ -345,0 +311,0 @@ OpenElementStack.prototype.generateImpliedEndTags = function () {

2

package.json

		{
		"name": "parse5",
		"description": "Fast full-featured HTML parser for Node. Based on WHATWG HTML5 specification.",
		"version": "0.5.2",
		"version": "0.5.3",
		"author": "Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)",
		@@ -6,0 +6,0 @@ "keywords": ["html", "parser", "html5", "WHATWG", "specification", "fast"],

70

test/fixtures/open_element_stack_test.js

		@@ -249,30 +249,2 @@ var HTML = require('../../lib/html'),

		exports['Clear back to non-foreign context'] = function (t) {
		var stack = new OpenElementStack('#document', defaultTreeAdapter);

		stack.push({tagName: $.HTML, namespaceURI: NS.HTML});
		stack.push({tagName: $.B, namespaceURI: NS.SVG});
		stack.clearBackToNonForeignContext();
		t.strictEqual(stack.stackTop, 0);
		t.strictEqual(stack.currentTagName, $.HTML);

		stack.push({tagName: $.P, namespaceURI: NS.SVG});
		stack.push({tagName: $.UL, namespaceURI: NS.SVG});
		stack.push({tagName: $.MO, namespaceURI: NS.MATHML});
		stack.push({tagName: $.OPTION, namespaceURI: NS.SVG});
		stack.clearBackToNonForeignContext();
		t.strictEqual(stack.stackTop, 3);
		t.strictEqual(stack.currentTagName, $.MO);

		stack.push({tagName: $.DESC, namespaceURI: NS.SVG});
		stack.push({tagName: $.P, namespaceURI: NS.SVG});
		stack.push({tagName: $.UL, namespaceURI: NS.SVG});
		stack.clearBackToNonForeignContext();
		t.strictEqual(stack.stackTop, 4);
		t.strictEqual(stack.currentTagName, $.DESC);

		t.done();
		};


		exports['Remove element'] = function (t) {
		@@ -495,44 +467,2 @@ var element = '#element',

		exports['Is MathML integration point'] = function (t) {
		var stack = new OpenElementStack('#document', defaultTreeAdapter);

		stack.push({tagName: $.HTML, namespaceURI: NS.HTML});
		stack.push({tagName: $.DIV, namespaceURI: NS.HTML});
		t.ok(!stack.isMathMLTextIntegrationPoint());

		stack.push({tagName: $.MO, namespaceURI: NS.MATHML});
		t.ok(stack.isMathMLTextIntegrationPoint());

		stack.push({tagName: $.DIV, namespaceURI: NS.HTML});
		t.ok(!stack.isMathMLTextIntegrationPoint());

		t.done();
		};

		exports['Is HTML integration point'] = function (t) {
		var stack = new OpenElementStack('#document', defaultTreeAdapter);

		stack.push({tagName: $.HTML, namespaceURI: NS.HTML});
		stack.push({tagName: $.DIV, namespaceURI: NS.HTML});
		t.ok(!stack.isHtmlIntegrationPoint());

		stack.push({tagName: $.TITLE, namespaceURI: NS.SVG});
		t.ok(stack.isHtmlIntegrationPoint());

		stack.push({tagName: $.DIV, namespaceURI: NS.HTML});
		t.ok(!stack.isHtmlIntegrationPoint());

		stack.push({tagName: $.ANNOTATION_XML, namespaceURI: NS.MATHML, attrs: [
		{name: 'encoding', value: 'apPlicAtion/xhtml+xml'}
		]});
		t.ok(stack.isHtmlIntegrationPoint());

		stack.push({tagName: $.ANNOTATION_XML, namespaceURI: NS.MATHML, attrs: [
		{name: 'encoding', value: 'someValues'}
		]});
		t.ok(!stack.isHtmlIntegrationPoint());

		t.done();
		};

		exports['Generate implied end tags'] = function (t) {
		@@ -539,0 +469,0 @@ var stack = new OpenElementStack('#document', defaultTreeAdapter);

25

test/fixtures/parser_test.js

		var fs = require('fs'),
		path = require('path'),
		HTML = require('../../lib/html'),
		treeAdapter = require('../../lib/default_tree_adapter'),
		Parser = require('../../lib/parser').Parser;
		@@ -36,11 +37,13 @@
		testDescrs.forEach(function (descr) {
		if (!descr['#document-fragment']) {
		tests.push({
		idx: ++testIdx,
		setName: setName,
		input: descr['#data'].join('\r\n'),
		expected: descr['#document'].join('\n'),
		expectedErrors: descr['#errors']
		});
		}
		var fragmentContextTagName = descr['#document-fragment'] && descr['#document-fragment'].join('');

		tests.push({
		idx: ++testIdx,
		setName: setName,
		input: descr['#data'].join('\r\n'),
		expected: descr['#document'].join('\n'),
		expectedErrors: descr['#errors'],
		fragmentContext: fragmentContextTagName ?
		treeAdapter.createElement(fragmentContextTagName, HTML.NAMESPACES.HTML, []) : null
		});
		});
		@@ -150,4 +153,4 @@ });
		exports[getFullTestName(test)] = function (t) {
		//TODO handler errors
		var parser = new Parser(test.input),
		//TODO handle errors
		var parser = new Parser(test.input, test.fragmentContext),
		document = parser.parse(),
		@@ -154,0 +157,0 @@ serializedDocument = serializeNodeList(document.childNodes, 0);

88

test/fixtures/tokenizer_test.js

		@@ -9,3 +9,6 @@ var fs = require('fs'),
		nextToken = null,
		out = [];
		out = {
		tokens: [],
		errCount: 0
		};

		@@ -20,22 +23,8 @@ tokenizer.state = initialState;

		//NOTE: if we have parse errors append them to the output sequence
		if (tokenizer.errs.length) {
		for (var i = 0; i < tokenizer.errs.length; i++)
		out.push('ParseError');

		tokenizer.errs = [];
		}

		//NOTE: append current token to the output sequence in html5lib test suite compatible format
		switch (nextToken.type) {
		case Tokenizer.CHARACTER_TOKEN:
		//NOTE: html5lib test suite concatenates all character tokens into one token.
		//So if last entry in output sequence is a character token we just append obtained token
		//to it's data string. Otherwise we create a new character token entry.
		var lastEntry = out[out.length - 1];

		if (util.isArray(lastEntry) && lastEntry[0] === 'Character')
		lastEntry[1] += nextToken.ch;
		else
		out.push(['Character', nextToken.ch]);
		case Tokenizer.NULL_CHARACTER_TOKEN:
		case Tokenizer.WHITESPACE_CHARACTER_TOKEN:
		out.tokens.push(['Character', nextToken.ch]);
		break;
		@@ -59,15 +48,15 @@

		out.push(startTagEntry);
		out.tokens.push(startTagEntry);
		break;

		case Tokenizer.END_TAG_TOKEN:
		out.push(['EndTag', nextToken.tagName]);
		out.tokens.push(['EndTag', nextToken.tagName]);
		break;

		case Tokenizer.COMMENT_TOKEN:
		out.push(['Comment', nextToken.data]);
		out.tokens.push(['Comment', nextToken.data]);
		break;

		case Tokenizer.DOCTYPE_TOKEN:
		out.push([
		out.tokens.push([
		'DOCTYPE',
		@@ -83,2 +72,5 @@ nextToken.name,

		out.errCount = tokenizer.errs.length;
		out.tokens = concatCharacterTokens(out.tokens);

		return out;
		@@ -96,4 +88,4 @@ }

		testDescr.output.forEach(function (token) {
		if (token === 'ParseError')
		testDescr.output.forEach(function (tokenEntry) {
		if (tokenEntry === 'ParseError')
		return;
		@@ -103,11 +95,11 @@
		//character token data (for Character token).
		token[1] = unicodeUnescape(token[1]);
		tokenEntry[1] = unicodeUnescape(tokenEntry[1]);

		//NOTE: unescape token attributes(if we have them).
		if (token.length > 2) {
		Object.keys(token).forEach(function (attrName) {
		var attrVal = token[attrName];
		if (tokenEntry.length > 2) {
		Object.keys(tokenEntry).forEach(function (attrName) {
		var attrVal = tokenEntry[attrName];

		delete token[attrName];
		token[unicodeUnescape(attrName)] = unicodeUnescape(attrVal);
		delete tokenEntry[attrName];
		tokenEntry[unicodeUnescape(attrName)] = unicodeUnescape(attrVal);
		});
		@@ -118,2 +110,21 @@ }

		function concatCharacterTokens(tokenEntries) {
		var result = [];

		tokenEntries.forEach(function (tokenEntry) {
		if (tokenEntry[0] === 'Character') {
		var lastEntry = result[result.length - 1];

		if (lastEntry && lastEntry[0] === 'Character') {
		lastEntry[1] += tokenEntry[1];
		return;
		}
		}

		result.push(tokenEntry);
		});

		return result;
		}

		function getTokenizerSuitableStateName(testDataStateName) {
		@@ -143,2 +154,12 @@ return testDataStateName.toUpperCase().replace(/\s/g, '_');

		var expectedTokens = [],
		expectedErrCount = 0;

		descr.output.forEach(function (tokenEntry) {
		if (tokenEntry === 'ParseError')
		expectedErrCount++;
		else
		expectedTokens.push(tokenEntry);
		});

		descr.initialStates.forEach(function (initialState) {
		@@ -150,3 +171,4 @@ tests.push({
		input: descr.input,
		expected: descr.output,
		expectedTokens: concatCharacterTokens(expectedTokens),
		expectedErrCount: expectedErrCount,
		initialState: getTokenizerSuitableStateName(initialState),
		@@ -171,5 +193,7 @@ lastStartTag: descr.lastStartTag

		t.deepEqual(out, test.expected);
		t.deepEqual(out.tokens, test.expectedTokens);
		t.strictEqual(out.errCount, test.expectedErrCount);

		t.done();
		};
		});

lib/parser.js

Sorry, the diff of this file is too big to display

lib/tokenizer.js

Sorry, the diff of this file is too big to display

Improved metrics