parse5
Advanced tools
Comparing version 1.3.2 to 1.4.0
@@ -0,1 +1,7 @@ | ||
## 1.4.0 | ||
* Add: Parser [decodeHtmlEntities](https://github.com/inikulin/parse5#optionsdecodehtmlentities) option. | ||
* Add: SimpleApiParser [decodeHtmlEntities](https://github.com/inikulin/parse5#optionsdecodehtmlentities-1) option. | ||
* Add: Parser [locationInfo](https://github.com/inikulin/parse5#optionslocationinfo) option. | ||
* Add: SimpleApiParser [locationInfo](https://github.com/inikulin/parse5#optionslocationinfo-1) option. | ||
## 1.3.2 | ||
@@ -2,0 +8,0 @@ * Fix: `<form>` processing in `<template>` (GH [#40](https://github.com/inikulin/parse5/issues/40)) |
@@ -0,0 +0,0 @@ 'use strict'; |
@@ -12,3 +12,5 @@ 'use strict'; | ||
//Default serializer options | ||
var DEFAULT_OPTIONS = {encodeHtmlEntities: true}; | ||
var DEFAULT_OPTIONS = { | ||
encodeHtmlEntities: true | ||
}; | ||
@@ -159,3 +161,3 @@ //Escaping regexes | ||
if(parent && this.treeAdapter.isElementNode(parent)) | ||
if (parent && this.treeAdapter.isElementNode(parent)) | ||
parentTn = this.treeAdapter.getTagName(parent); | ||
@@ -162,0 +164,0 @@ |
'use strict'; | ||
var Tokenizer = require('../tokenization/tokenizer'), | ||
TokenizerProxy = require('./tokenizer_proxy'); | ||
TokenizerProxy = require('./tokenizer_proxy'), | ||
Utils = require('../common/utils'); | ||
//Default options | ||
var DEFAULT_OPTIONS = { | ||
decodeHtmlEntities: true, | ||
locationInfo: false | ||
}; | ||
//Skipping handler | ||
@@ -12,12 +19,29 @@ function skip() { | ||
//SimpleApiParser | ||
var SimpleApiParser = module.exports = function (handlers) { | ||
var SimpleApiParser = module.exports = function (handlers, options) { | ||
this.options = Utils.mergeOptions(DEFAULT_OPTIONS, options); | ||
this.handlers = { | ||
doctype: handlers.doctype || skip, | ||
startTag: handlers.startTag || skip, | ||
endTag: handlers.endTag || skip, | ||
text: handlers.text || skip, | ||
comment: handlers.comment || skip | ||
doctype: this._wrapHandler(handlers.doctype), | ||
startTag: this._wrapHandler(handlers.startTag), | ||
endTag: this._wrapHandler(handlers.endTag), | ||
text: this._wrapHandler(handlers.text), | ||
comment: this._wrapHandler(handlers.comment) | ||
}; | ||
}; | ||
SimpleApiParser.prototype._wrapHandler = function (handler) { | ||
var parser = this; | ||
handler = handler || skip; | ||
if (this.options.locationInfo) { | ||
return function () { | ||
var args = Array.prototype.slice.call(arguments); | ||
args.push(parser.currentTokenLocation); | ||
handler.apply(handler, args); | ||
}; | ||
} | ||
return handler; | ||
}; | ||
//API | ||
@@ -35,2 +59,11 @@ SimpleApiParser.prototype.parse = function (html) { | ||
token.type === Tokenizer.NULL_CHARACTER_TOKEN) { | ||
if (this.options.locationInfo) { | ||
if (this.pendingText === null) | ||
this.currentTokenLocation = token.location; | ||
else | ||
this.currentTokenLocation.end = token.location.end; | ||
} | ||
this.pendingText = (this.pendingText || '') + token.chars; | ||
@@ -43,3 +76,3 @@ } | ||
} | ||
} while (token.type !== Tokenizer.EOF_TOKEN) | ||
} while (token.type !== Tokenizer.EOF_TOKEN); | ||
}; | ||
@@ -49,2 +82,5 @@ | ||
SimpleApiParser.prototype._handleToken = function (token) { | ||
if (this.options.locationInfo) | ||
this.currentTokenLocation = token.location; | ||
if (token.type === Tokenizer.START_TAG_TOKEN) | ||
@@ -65,4 +101,5 @@ this.handlers.startTag(token.tagName, token.attrs, token.selfClosing); | ||
SimpleApiParser.prototype._reset = function (html) { | ||
this.tokenizerProxy = new TokenizerProxy(html); | ||
this.tokenizerProxy = new TokenizerProxy(html, this.options); | ||
this.pendingText = null; | ||
this.currentTokenLocation = null; | ||
}; | ||
@@ -69,0 +106,0 @@ |
@@ -15,4 +15,4 @@ 'use strict'; | ||
//NOTE: this proxy simulates adjustment of the Tokenizer which performed by standard parser during tree construction. | ||
var TokenizerProxy = module.exports = function (html) { | ||
this.tokenizer = new Tokenizer(html); | ||
var TokenizerProxy = module.exports = function (html, options) { | ||
this.tokenizer = new Tokenizer(html, options); | ||
@@ -63,15 +63,15 @@ this.namespaceStack = []; | ||
//Token handlers | ||
TokenizerProxy.prototype._ensureTokenizerState = function (tn) { | ||
TokenizerProxy.prototype._ensureTokenizerMode = function (tn) { | ||
if (tn === $.TEXTAREA || tn === $.TITLE) | ||
this.tokenizer.state = Tokenizer.RCDATA_STATE; | ||
this.tokenizer.state = Tokenizer.MODE.RCDATA; | ||
else if (tn === $.PLAINTEXT) | ||
this.tokenizer.state = Tokenizer.PLAINTEXT_STATE; | ||
this.tokenizer.state = Tokenizer.MODE.PLAINTEXT; | ||
else if (tn === $.SCRIPT) | ||
this.tokenizer.state = Tokenizer.SCRIPT_DATA_STATE; | ||
this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA; | ||
else if (tn === $.STYLE || tn === $.IFRAME || tn === $.XMP || | ||
tn === $.NOEMBED || tn === $.NOFRAMES || tn === $.NOSCRIPT) { | ||
this.tokenizer.state = Tokenizer.RAWTEXT_STATE; | ||
this.tokenizer.state = Tokenizer.MODE.RAWTEXT; | ||
} | ||
@@ -101,3 +101,3 @@ }; | ||
else | ||
this._ensureTokenizerState(tn); | ||
this._ensureTokenizerMode(tn); | ||
} | ||
@@ -119,7 +119,8 @@ }; | ||
else if (tn === $.SCRIPT) | ||
this.tokenizer.state = Tokenizer.DATA_STATE; | ||
this.tokenizer.state = Tokenizer.MODE.DATA; | ||
} | ||
else if ((tn === $.SVG && this.currentNamespace === NS.SVG) || (tn === $.MATH && this.currentNamespace === NS.MATHML)) | ||
else if ((tn === $.SVG && this.currentNamespace === NS.SVG) || | ||
(tn === $.MATH && this.currentNamespace === NS.MATHML)) | ||
this._leaveCurrentNamespace(); | ||
}; |
{ | ||
"name": "parse5", | ||
"description": "WHATWG HTML5 specification-compliant, fast and ready for production HTML parsing/serialization toolset for Node.", | ||
"version": "1.3.2", | ||
"version": "1.4.0", | ||
"author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)", | ||
@@ -6,0 +6,0 @@ "contributors": [ |
@@ -71,5 +71,11 @@ <p align="center"> | ||
#### • Parser.ctor([treeAdapter]) | ||
#### • Parser.ctor([treeAdapter, options]) | ||
Creates new reusable instance of the `Parser`. Optional `treeAdapter` argument specifies resulting tree format. If `treeAdapter` argument is not specified, `default` tree adapter will be used. | ||
`options` object provides the parsing algorithm modifications: | ||
##### options.decodeHtmlEntities | ||
Decode HTML-entities like `&`, ` `, etc. Default: `true`. **Warning:** disabling this option may cause output which is not conform HTML5 specification. | ||
##### options.locationInfo | ||
Enables source code location information for the nodes. Default: `false`. When enabled, each node (except root node) has `__location` property, which contains `start` and `end` indices of the node in the source code. | ||
*Example:* | ||
@@ -114,5 +120,12 @@ ```js | ||
#### • SimpleApiParser.ctor(handlers) | ||
#### • SimpleApiParser.ctor(handlers, [options]) | ||
Creates new reusable instance of the `SimpleApiParser`. `handlers` argument specifies object that contains parser's event handlers. Possible events and their signatures are shown in the example. | ||
`options` object provides the parsing algorithm modifications: | ||
##### options.decodeHtmlEntities | ||
Decode HTML-entities like `&`, ` `, etc. Default: `true`. **Warning:** disabling this option may cause output which is not conform HTML5 specification. | ||
##### options.locationInfo | ||
Enables source code location information for the tokens. Default: `false`. When enabled, each node handler receives `location` object as it's last argument. `location` object contains `start` and `end` indices of the token in the source code. | ||
*Example:* | ||
@@ -123,19 +136,19 @@ ```js | ||
var parser = new parse5.SimpleApiParser({ | ||
doctype: function(name, publicId, systemId) { | ||
doctype: function(name, publicId, systemId /*, [location] */) { | ||
//Handle doctype here | ||
}, | ||
startTag: function(tagName, attrs, selfClosing) { | ||
startTag: function(tagName, attrs, selfClosing /*, [location] */) { | ||
//Handle start tags here | ||
}, | ||
endTag: function(tagName) { | ||
endTag: function(tagName /*, [location] */) { | ||
//Handle end tags here | ||
}, | ||
text: function(text) { | ||
text: function(text /*, [location] */) { | ||
//Handle texts here | ||
}, | ||
comment: function(text) { | ||
comment: function(text /*, [location] */) { | ||
//Handle comments here | ||
@@ -171,7 +184,7 @@ } | ||
`options` object provides the serialization algorithm modifications (**Warning:** switching default options causes HTML5 specification violation. However, it may be useful in some cases, e.g. markup instrumentation. Use it on your own risk.) | ||
`options` object provides the serialization algorithm modifications: | ||
##### options.encodeHtmlEntities | ||
HTML-encode characters like `<`, `>`, `&`, etc. Default: `true`. **Warning:** disabling this option may cause output which is not conform HTML5 specification. | ||
* **options.encodeHtmlEntities** - HTML-encode characters like `<`, `>`, `&`, etc. Default: `true`. | ||
*Example:* | ||
@@ -178,0 +191,0 @@ ```js |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
396916
25
6616
247