Socket
Socket
Sign inDemoInstall

parse5

Package Overview
Dependencies
0
Maintainers
1
Versions
56
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 1.3.2 to 1.4.0

lib/tokenization/location_info_mixin.js

6

CHANGELOG.md

@@ -0,1 +1,7 @@

## 1.4.0
* Add: Parser [decodeHtmlEntities](https://github.com/inikulin/parse5#optionsdecodehtmlentities) option.
* Add: SimpleApiParser [decodeHtmlEntities](https://github.com/inikulin/parse5#optionsdecodehtmlentities-1) option.
* Add: Parser [locationInfo](https://github.com/inikulin/parse5#optionslocationinfo) option.
* Add: SimpleApiParser [locationInfo](https://github.com/inikulin/parse5#optionslocationinfo-1) option.
## 1.3.2

@@ -2,0 +8,0 @@ * Fix: `<form>` processing in `<template>` (GH [#40](https://github.com/inikulin/parse5/issues/40))

0

lib/common/utils.js

@@ -0,0 +0,0 @@ 'use strict';

6

lib/serialization/serializer.js

@@ -12,3 +12,5 @@ 'use strict';

//Default serializer options
var DEFAULT_OPTIONS = {encodeHtmlEntities: true};
var DEFAULT_OPTIONS = {
encodeHtmlEntities: true
};

@@ -159,3 +161,3 @@ //Escaping regexes

if(parent && this.treeAdapter.isElementNode(parent))
if (parent && this.treeAdapter.isElementNode(parent))
parentTn = this.treeAdapter.getTagName(parent);

@@ -162,0 +164,0 @@

'use strict';
var Tokenizer = require('../tokenization/tokenizer'),
TokenizerProxy = require('./tokenizer_proxy');
TokenizerProxy = require('./tokenizer_proxy'),
Utils = require('../common/utils');
//Default options
var DEFAULT_OPTIONS = {
decodeHtmlEntities: true,
locationInfo: false
};
//Skipping handler

@@ -12,12 +19,29 @@ function skip() {

//SimpleApiParser
var SimpleApiParser = module.exports = function (handlers) {
var SimpleApiParser = module.exports = function (handlers, options) {
this.options = Utils.mergeOptions(DEFAULT_OPTIONS, options);
this.handlers = {
doctype: handlers.doctype || skip,
startTag: handlers.startTag || skip,
endTag: handlers.endTag || skip,
text: handlers.text || skip,
comment: handlers.comment || skip
doctype: this._wrapHandler(handlers.doctype),
startTag: this._wrapHandler(handlers.startTag),
endTag: this._wrapHandler(handlers.endTag),
text: this._wrapHandler(handlers.text),
comment: this._wrapHandler(handlers.comment)
};
};
SimpleApiParser.prototype._wrapHandler = function (handler) {
var parser = this;
handler = handler || skip;
if (this.options.locationInfo) {
return function () {
var args = Array.prototype.slice.call(arguments);
args.push(parser.currentTokenLocation);
handler.apply(handler, args);
};
}
return handler;
};
//API

@@ -35,2 +59,11 @@ SimpleApiParser.prototype.parse = function (html) {

token.type === Tokenizer.NULL_CHARACTER_TOKEN) {
if (this.options.locationInfo) {
if (this.pendingText === null)
this.currentTokenLocation = token.location;
else
this.currentTokenLocation.end = token.location.end;
}
this.pendingText = (this.pendingText || '') + token.chars;

@@ -43,3 +76,3 @@ }

}
} while (token.type !== Tokenizer.EOF_TOKEN)
} while (token.type !== Tokenizer.EOF_TOKEN);
};

@@ -49,2 +82,5 @@

SimpleApiParser.prototype._handleToken = function (token) {
if (this.options.locationInfo)
this.currentTokenLocation = token.location;
if (token.type === Tokenizer.START_TAG_TOKEN)

@@ -65,4 +101,5 @@ this.handlers.startTag(token.tagName, token.attrs, token.selfClosing);

SimpleApiParser.prototype._reset = function (html) {
this.tokenizerProxy = new TokenizerProxy(html);
this.tokenizerProxy = new TokenizerProxy(html, this.options);
this.pendingText = null;
this.currentTokenLocation = null;
};

@@ -69,0 +106,0 @@

@@ -15,4 +15,4 @@ 'use strict';

//NOTE: this proxy simulates adjustment of the Tokenizer which performed by standard parser during tree construction.
var TokenizerProxy = module.exports = function (html) {
this.tokenizer = new Tokenizer(html);
var TokenizerProxy = module.exports = function (html, options) {
this.tokenizer = new Tokenizer(html, options);

@@ -63,15 +63,15 @@ this.namespaceStack = [];

//Token handlers
TokenizerProxy.prototype._ensureTokenizerState = function (tn) {
TokenizerProxy.prototype._ensureTokenizerMode = function (tn) {
if (tn === $.TEXTAREA || tn === $.TITLE)
this.tokenizer.state = Tokenizer.RCDATA_STATE;
this.tokenizer.state = Tokenizer.MODE.RCDATA;
else if (tn === $.PLAINTEXT)
this.tokenizer.state = Tokenizer.PLAINTEXT_STATE;
this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
else if (tn === $.SCRIPT)
this.tokenizer.state = Tokenizer.SCRIPT_DATA_STATE;
this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA;
else if (tn === $.STYLE || tn === $.IFRAME || tn === $.XMP ||
tn === $.NOEMBED || tn === $.NOFRAMES || tn === $.NOSCRIPT) {
this.tokenizer.state = Tokenizer.RAWTEXT_STATE;
this.tokenizer.state = Tokenizer.MODE.RAWTEXT;
}

@@ -101,3 +101,3 @@ };

else
this._ensureTokenizerState(tn);
this._ensureTokenizerMode(tn);
}

@@ -119,7 +119,8 @@ };

else if (tn === $.SCRIPT)
this.tokenizer.state = Tokenizer.DATA_STATE;
this.tokenizer.state = Tokenizer.MODE.DATA;
}
else if ((tn === $.SVG && this.currentNamespace === NS.SVG) || (tn === $.MATH && this.currentNamespace === NS.MATHML))
else if ((tn === $.SVG && this.currentNamespace === NS.SVG) ||
(tn === $.MATH && this.currentNamespace === NS.MATHML))
this._leaveCurrentNamespace();
};
{
"name": "parse5",
"description": "WHATWG HTML5 specification-compliant, fast and ready for production HTML parsing/serialization toolset for Node.",
"version": "1.3.2",
"version": "1.4.0",
"author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)",

@@ -6,0 +6,0 @@ "contributors": [

@@ -71,5 +71,11 @@ <p align="center">

#### &bull; Parser.ctor([treeAdapter])
#### &bull; Parser.ctor([treeAdapter, options])
Creates new reusable instance of the `Parser`. Optional `treeAdapter` argument specifies resulting tree format. If `treeAdapter` argument is not specified, `default` tree adapter will be used.
`options` object provides the parsing algorithm modifications:
##### options.decodeHtmlEntities
Decode HTML-entities like `&amp;`, `&nbsp;`, etc. Default: `true`. **Warning:** disabling this option may cause output which is not conform HTML5 specification.
##### options.locationInfo
Enables source code location information for the nodes. Default: `false`. When enabled, each node (except root node) has `__location` property, which contains `start` and `end` indices of the node in the source code.
*Example:*

@@ -114,5 +120,12 @@ ```js

#### &bull; SimpleApiParser.ctor(handlers)
#### &bull; SimpleApiParser.ctor(handlers, [options])
Creates new reusable instance of the `SimpleApiParser`. `handlers` argument specifies object that contains parser's event handlers. Possible events and their signatures are shown in the example.
`options` object provides the parsing algorithm modifications:
##### options.decodeHtmlEntities
Decode HTML-entities like `&amp;`, `&nbsp;`, etc. Default: `true`. **Warning:** disabling this option may cause output which is not conform HTML5 specification.
##### options.locationInfo
Enables source code location information for the tokens. Default: `false`. When enabled, each node handler receives `location` object as it's last argument. `location` object contains `start` and `end` indices of the token in the source code.
*Example:*

@@ -123,19 +136,19 @@ ```js

var parser = new parse5.SimpleApiParser({
doctype: function(name, publicId, systemId) {
doctype: function(name, publicId, systemId /*, [location] */) {
//Handle doctype here
},
startTag: function(tagName, attrs, selfClosing) {
startTag: function(tagName, attrs, selfClosing /*, [location] */) {
//Handle start tags here
},
endTag: function(tagName) {
endTag: function(tagName /*, [location] */) {
//Handle end tags here
},
text: function(text) {
text: function(text /*, [location] */) {
//Handle texts here
},
comment: function(text) {
comment: function(text /*, [location] */) {
//Handle comments here

@@ -171,7 +184,7 @@ }

`options` object provides the serialization algorithm modifications (**Warning:** switching default options causes HTML5 specification violation. However, it may be useful in some cases, e.g. markup instrumentation. Use it on your own risk.)
`options` object provides the serialization algorithm modifications:
##### options.encodeHtmlEntities
HTML-encode characters like `<`, `>`, `&`, etc. Default: `true`. **Warning:** disabling this option may cause output which is not conform HTML5 specification.
* **options.encodeHtmlEntities** - HTML-encode characters like `<`, `>`, `&`, etc. Default: `true`.
*Example:*

@@ -178,0 +191,0 @@ ```js

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc