Socket
Socket
Sign inDemoInstall

htmlparser2

Package Overview
Dependencies
5
Maintainers
1
Versions
76
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 5.0.1 to 6.0.0

2

lib/CollectingHandler.d.ts
import MultiplexHandler from "./MultiplexHandler";
import { Handler } from "./Parser";
export declare class CollectingHandler extends MultiplexHandler {
_cbs: Partial<Handler>;
private readonly cbs;
events: [keyof Handler, ...unknown[]][];

@@ -6,0 +6,0 @@ constructor(cbs?: Partial<Handler>);

@@ -39,5 +39,5 @@ "use strict";

_this.events.push(__spreadArrays([name], args));
(_a = _this._cbs[name]) === null || _a === void 0 ? void 0 : _a.apply(void 0, args);
(_a = _this.cbs[name]) === null || _a === void 0 ? void 0 : _a.apply(void 0, args);
}) || this;
_this._cbs = cbs;
_this.cbs = cbs;
_this.events = [];

@@ -49,10 +49,10 @@ return _this;

this.events = [];
(_b = (_a = this._cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a);
(_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a);
};
CollectingHandler.prototype.restart = function () {
var _a, _b, _c;
(_b = (_a = this._cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a);
(_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a);
for (var _i = 0, _d = this.events; _i < _d.length; _i++) {
var _e = _d[_i], name_1 = _e[0], args = _e.slice(1);
(_c = this._cbs[name_1]) === null || _c === void 0 ? void 0 : _c.apply(void 0, args);
(_c = this.cbs[name_1]) === null || _c === void 0 ? void 0 : _c.apply(void 0, args);
}

@@ -59,0 +59,0 @@ };

@@ -63,3 +63,3 @@ import DomHandler, { DomHandlerOptions } from "domhandler";

* @param feed The feed that should be parsed, as a string.
* @param options Optionally, options for parsing. When using this option, you probably want to set `xmlMode` to `true`.
* @param options Optionally, options for parsing. When using this option, you should set `xmlMode` to `true`.
*/

@@ -66,0 +66,0 @@ export declare function parseFeed(feed: string, options?: ParserOptions & DomHandlerOptions): Feed | undefined;

@@ -75,71 +75,72 @@ "use strict";

var _a, _b;
var feedRoot = getOneElement(isValidFeed, this.dom);
if (!feedRoot) {
this.handleCallback(new Error("couldn't find root of feed"));
return;
}
var feed = {};
var feedRoot = getOneElement(isValidFeed, this.dom);
if (feedRoot) {
if (feedRoot.name === "feed") {
var childs = feedRoot.children;
feed.type = "atom";
addConditionally(feed, "id", "id", childs);
addConditionally(feed, "title", "title", childs);
var href = getAttribute("href", getOneElement("link", childs));
if (feedRoot.name === "feed") {
var childs = feedRoot.children;
feed.type = "atom";
addConditionally(feed, "id", "id", childs);
addConditionally(feed, "title", "title", childs);
var href = getAttribute("href", getOneElement("link", childs));
if (href) {
feed.link = href;
}
addConditionally(feed, "description", "subtitle", childs);
var updated = fetch("updated", childs);
if (updated) {
feed.updated = new Date(updated);
}
addConditionally(feed, "author", "email", childs, true);
feed.items = getElements("entry", childs).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "id", children);
addConditionally(entry, "title", "title", children);
var href = getAttribute("href", getOneElement("link", children));
if (href) {
feed.link = href;
entry.link = href;
}
addConditionally(feed, "description", "subtitle", childs);
var updated = fetch("updated", childs);
if (updated) {
feed.updated = new Date(updated);
var description = fetch("summary", children) || fetch("content", children);
if (description) {
entry.description = description;
}
addConditionally(feed, "author", "email", childs, true);
feed.items = getElements("entry", childs).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "id", children);
addConditionally(entry, "title", "title", children);
var href = getAttribute("href", getOneElement("link", children));
if (href) {
entry.link = href;
}
var description = fetch("summary", children) ||
fetch("content", children);
if (description) {
entry.description = description;
}
var pubDate = fetch("updated", children);
if (pubDate) {
entry.pubDate = new Date(pubDate);
}
entry.media = getMediaElements(children);
return entry;
});
}
else {
var childs = (_b = (_a = getOneElement("channel", feedRoot.children)) === null || _a === void 0 ? void 0 : _a.children) !== null && _b !== void 0 ? _b : [];
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";
addConditionally(feed, "title", "title", childs);
addConditionally(feed, "link", "link", childs);
addConditionally(feed, "description", "description", childs);
var updated = fetch("lastBuildDate", childs);
if (updated) {
feed.updated = new Date(updated);
var pubDate = fetch("updated", children);
if (pubDate) {
entry.pubDate = new Date(pubDate);
}
addConditionally(feed, "author", "managingEditor", childs, true);
feed.items = getElements("item", feedRoot.children).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "guid", children);
addConditionally(entry, "title", "title", children);
addConditionally(entry, "link", "link", children);
addConditionally(entry, "description", "description", children);
var pubDate = fetch("pubDate", children);
if (pubDate)
entry.pubDate = new Date(pubDate);
entry.media = getMediaElements(children);
return entry;
});
entry.media = getMediaElements(children);
return entry;
});
}
else {
var childs = (_b = (_a = getOneElement("channel", feedRoot.children)) === null || _a === void 0 ? void 0 : _a.children) !== null && _b !== void 0 ? _b : [];
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";
addConditionally(feed, "title", "title", childs);
addConditionally(feed, "link", "link", childs);
addConditionally(feed, "description", "description", childs);
var updated = fetch("lastBuildDate", childs);
if (updated) {
feed.updated = new Date(updated);
}
addConditionally(feed, "author", "managingEditor", childs, true);
feed.items = getElements("item", feedRoot.children).map(function (item) {
var entry = {};
var children = item.children;
addConditionally(entry, "id", "guid", children);
addConditionally(entry, "title", "title", children);
addConditionally(entry, "link", "link", children);
addConditionally(entry, "description", "description", children);
var pubDate = fetch("pubDate", children);
if (pubDate)
entry.pubDate = new Date(pubDate);
entry.media = getMediaElements(children);
return entry;
});
}
this.feed = feed;
this.handleCallback(feedRoot ? null : Error("couldn't find root of feed"));
this.handleCallback(null);
};

@@ -221,3 +222,2 @@ return FeedHandler;

}
var defaultOptions = { xmlMode: true };
/**

@@ -227,6 +227,6 @@ * Parse a feed.

* @param feed The feed that should be parsed, as a string.
* @param options Optionally, options for parsing. When using this option, you probably want to set `xmlMode` to `true`.
* @param options Optionally, options for parsing. When using this option, you should set `xmlMode` to `true`.
*/
function parseFeed(feed, options) {
if (options === void 0) { options = defaultOptions; }
if (options === void 0) { options = { xmlMode: true }; }
var handler = new FeedHandler(options);

@@ -233,0 +233,0 @@ new Parser_1.Parser(handler, options).end(feed);

import { Parser, ParserOptions } from "./Parser";
export { Parser, ParserOptions };
import { DomHandler, DomHandlerOptions, Node, Element } from "domhandler";
import { DomHandler, DomHandlerOptions, Node, Element, Document } from "domhandler";
export { DomHandler, DomHandlerOptions };
declare type Options = ParserOptions & DomHandlerOptions;
/**
* Parses data, returns the resulting DOM.
* Parses the data, returns the resulting document.
*

@@ -12,2 +12,13 @@ * @param data The data that should be parsed.

*/
export declare function parseDocument(data: string, options?: Options): Document;
/**
* Parses data, returns an array of the root nodes.
*
* Note that the root nodes still have a `Document` node as their parent.
* Use `parseDocument` to get the `Document` node instead.
*
* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
* @deprecated Use `parseDocument` instead.
*/
export declare function parseDOM(data: string, options?: Options): Node[];

@@ -14,0 +25,0 @@ /**

@@ -28,3 +28,3 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
exports.RssHandler = exports.DefaultHandler = exports.DomUtils = exports.ElementType = exports.Tokenizer = exports.createDomStream = exports.parseDOM = exports.DomHandler = exports.Parser = void 0;
exports.RssHandler = exports.DefaultHandler = exports.DomUtils = exports.ElementType = exports.Tokenizer = exports.createDomStream = exports.parseDOM = exports.parseDocument = exports.DomHandler = exports.Parser = void 0;
var Parser_1 = require("./Parser");

@@ -37,3 +37,3 @@ Object.defineProperty(exports, "Parser", { enumerable: true, get: function () { return Parser_1.Parser; } });

/**
* Parses data, returns the resulting DOM.
* Parses the data, returns the resulting document.
*

@@ -43,7 +43,21 @@ * @param data The data that should be parsed.

*/
function parseDOM(data, options) {
var handler = new domhandler_1.DomHandler(void 0, options);
function parseDocument(data, options) {
var handler = new domhandler_1.DomHandler(undefined, options);
new Parser_1.Parser(handler, options).end(data);
return handler.dom;
return handler.root;
}
exports.parseDocument = parseDocument;
/**
* Parses data, returns an array of the root nodes.
*
* Note that the root nodes still have a `Document` node as their parent.
* Use `parseDocument` to get the `Document` node instead.
*
* @param data The data that should be parsed.
* @param options Optional options for the parser and DOM builder.
* @deprecated Use `parseDocument` instead.
*/
function parseDOM(data, options) {
return parseDocument(data, options).children;
}
exports.parseDOM = parseDOM;

@@ -50,0 +64,0 @@ /**

import type { Parser, Handler } from "./Parser";
/**
* Calls a specific handler function for all events that are encountered.
*
* @param func — The function to multiplex all events to.
*/
export default class MultiplexHandler implements Handler {
_func: (event: keyof Handler, ...args: unknown[]) => void;
private readonly func;
/**
* @param func The function to multiplex all events to.
*/
constructor(func: (event: keyof Handler, ...args: unknown[]) => void);

@@ -10,0 +11,0 @@ onattribute(name: string, value: string, quote: string | null | undefined): void;

@@ -5,50 +5,51 @@ "use strict";

* Calls a specific handler function for all events that are encountered.
*
* @param func — The function to multiplex all events to.
*/
var MultiplexHandler = /** @class */ (function () {
/**
* @param func The function to multiplex all events to.
*/
function MultiplexHandler(func) {
this._func = func;
this.func = func;
}
MultiplexHandler.prototype.onattribute = function (name, value, quote) {
this._func("onattribute", name, value, quote);
this.func("onattribute", name, value, quote);
};
MultiplexHandler.prototype.oncdatastart = function () {
this._func("oncdatastart");
this.func("oncdatastart");
};
MultiplexHandler.prototype.oncdataend = function () {
this._func("oncdataend");
this.func("oncdataend");
};
MultiplexHandler.prototype.ontext = function (text) {
this._func("ontext", text);
this.func("ontext", text);
};
MultiplexHandler.prototype.onprocessinginstruction = function (name, value) {
this._func("onprocessinginstruction", name, value);
this.func("onprocessinginstruction", name, value);
};
MultiplexHandler.prototype.oncomment = function (comment) {
this._func("oncomment", comment);
this.func("oncomment", comment);
};
MultiplexHandler.prototype.oncommentend = function () {
this._func("oncommentend");
this.func("oncommentend");
};
MultiplexHandler.prototype.onclosetag = function (name) {
this._func("onclosetag", name);
this.func("onclosetag", name);
};
MultiplexHandler.prototype.onopentag = function (name, attribs) {
this._func("onopentag", name, attribs);
this.func("onopentag", name, attribs);
};
MultiplexHandler.prototype.onopentagname = function (name) {
this._func("onopentagname", name);
this.func("onopentagname", name);
};
MultiplexHandler.prototype.onerror = function (error) {
this._func("onerror", error);
this.func("onerror", error);
};
MultiplexHandler.prototype.onend = function () {
this._func("onend");
this.func("onend");
};
MultiplexHandler.prototype.onparserinit = function (parser) {
this._func("onparserinit", parser);
this.func("onparserinit", parser);
};
MultiplexHandler.prototype.onreset = function () {
this._func("onreset");
this.func("onreset");
};

@@ -55,0 +56,0 @@ return MultiplexHandler;

import Tokenizer from "./Tokenizer";
export interface ParserOptions {
/**
* Indicates whether special tags (<script>, <style>, and <title>) should get special treatment
* and if "empty" tags (eg. <br>) can have children. If `false`, the content of special tags
* Indicates whether special tags (`<script>`, `<style>`, and `<title>`) should get special treatment
* and if "empty" tags (eg. `<br>`) can have children. If `false`, the content of special tags
* will be text only. For feeds and other XML content (documents that don't consist of HTML),
* set this to `true`. Default: `false`.
* set this to `true`.
*
* @default false
*/
xmlMode?: boolean;
/**
* Decode entities within the document. Defaults to `true`.
* Decode entities within the document.
*
* @default true
*/
decodeEntities?: boolean;
/**
* If set to true, all tags will be lowercased. If xmlMode is disabled, this defaults to `true`.
* If set to true, all tags will be lowercased.
*
* @default !xmlMode
*/
lowerCaseTags?: boolean;
/**
* If set to `true`, all attribute names will be lowercased. This has noticeable impact on speed, so it defaults to `false`.
* If set to `true`, all attribute names will be lowercased. This has noticeable impact on speed.
*
* @default !xmlMode
*/

@@ -25,2 +33,4 @@ lowerCaseAttributeNames?: boolean;

* NOTE: If xmlMode is set to `true` then CDATA sections will always be recognized as text.
*
* @default xmlMode
*/

@@ -31,2 +41,4 @@ recognizeCDATA?: boolean;

* NOTE: If xmlMode is set to `true` then self-closing tags will always be recognized.
*
* @default xmlMode
*/

@@ -108,3 +120,4 @@ recognizeSelfClosing?: boolean;

/**
* Parses a complete document and pushes it to the handler.
* Resets the parser, then parses a complete document and
* pushes it to the handler.
*

@@ -111,0 +124,0 @@ * @param data Document to parse.

@@ -323,3 +323,4 @@ "use strict";

/**
* Parses a complete document and pushes it to the handler.
* Resets the parser, then parses a complete document and
* pushes it to the handler.
*

@@ -326,0 +327,0 @@ * @param data Document to parse.

{
"name": "htmlparser2",
"description": "Fast & forgiving HTML/XML/RSS parser",
"version": "5.0.1",
"description": "Fast & forgiving HTML/XML parser",
"version": "6.0.0",
"author": "Felix Boehm <me@feedic.com>",
"funding": "https://github.com/fb55/htmlparser2?sponsor=1",
"funding": [
"https://github.com/fb55/htmlparser2?sponsor=1",
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
],
"license": "MIT",

@@ -45,4 +51,4 @@ "sideEffects": false,

"domelementtype": "^2.0.1",
"domhandler": "^3.3.0",
"domutils": "^2.4.2",
"domhandler": "^4.0.0",
"domutils": "^2.4.4",
"entities": "^2.0.0"

@@ -53,6 +59,6 @@ },

"@types/node": "^14.0.5",
"@typescript-eslint/eslint-plugin": "^4.1.0",
"@typescript-eslint/parser": "^4.1.0",
"eslint": "^7.0.0",
"eslint-config-prettier": "^6.0.0",
"@typescript-eslint/eslint-plugin": "^4.9.1",
"@typescript-eslint/parser": "^4.9.1",
"eslint": "^7.15.0",
"eslint-config-prettier": "^7.0.0",
"jest": "^26.0.1",

@@ -59,0 +65,0 @@ "prettier": "^2.1.1",

@@ -8,18 +8,37 @@ # htmlparser2

A forgiving HTML/XML/RSS parser.
The parser can handle streams and provides a callback interface.
The fast & forgiving HTML/XML parser.
## Installation
npm install --save htmlparser2
npm install htmlparser2
A live demo of htmlparser2 is available [here](https://astexplorer.net/#/2AmVrGuGVJ).
A live demo of `htmlparser2` is available [here](https://astexplorer.net/#/2AmVrGuGVJ).
## Ecosystem
| Name | Description |
| ------------------------------------------------------------- | ------------------------------------------------------- |
| [htmlparser2](https://github.com/fb55/htmlparser2) | Fast & forgiving HTML/XML parser |
| [domhandler](https://github.com/fb55/domhandler) | Handler for htmlparser2 that turns documents into a DOM |
| [domutils](https://github.com/fb55/domutils) | Utilities for working with domhandler's DOM |
| [css-select](https://github.com/fb55/css-select) | CSS selector engine, compatible with domhandler's DOM |
| [cheerio](https://github.com/cheeriojs/cheerio) | The jQuery API for domhandler's DOM |
| [dom-serializer](https://github.com/cheeriojs/dom-serializer) | Serializer for domhandler's DOM |
## Usage
`htmlparser2` itself provides a callback interface that allows consumption of documents with minimal allocations.
For a more ergonomic experience, read [Getting a DOM](#getting-a-dom) below.
```javascript
const htmlparser2 = require("htmlparser2");
const parser = new htmlparser2.Parser({
onopentag(name, attribs) {
if (name === "script" && attribs.type === "text/javascript") {
onopentag(name, attributes) {
/*
* This fires when a new tag is opened.
*
* If you don't need an aggregated `attributes` object,
* have a look at the `onopentagname` and `onattribute` events.
*/
if (name === "script" && attributes.type === "text/javascript") {
console.log("JS! Hooray!");

@@ -29,5 +48,18 @@ }

ontext(text) {
/*
* Fires whenever a section of text was processed.
*
* Note that this can fire at any point within text and you might
* have to stich together multiple pieces.
*/
console.log("-->", text);
},
onclosetag(tagname) {
/*
* Fires when a tag is closed.
*
* You can rely on this event only firing when you have received an
* equivalent opening tag before. Closing tags without corresponding
* opening tags will be ignored.
*/
if (tagname === "script") {

@@ -39,3 +71,3 @@ console.log("That's it?!");

parser.write(
"Xyz <script type='text/javascript'>var foo = '<<bar>>';</ script>"
"Xyz <script type='text/javascript'>const foo = '<<bar>>';</ script>"
);

@@ -45,3 +77,3 @@ parser.end();

Output (simplified):
Output (with multiple text events combined):

@@ -51,6 +83,9 @@ ```

JS! Hooray!
--> var foo = '<<bar>>';
--> const foo = '<<bar>>';
That's it?!
```
This example only shows three of the possible events.
Read more about the parser, its events and options in the [wiki](https://github.com/fb55/htmlparser2/wiki/Parser-options).
### Usage with streams

@@ -73,9 +108,11 @@

## Documentation
## Getting a DOM
Read more about the parser and its options in the [wiki](https://github.com/fb55/htmlparser2/wiki/Parser-options).
The `DomHandler` produces a DOM (document object model) that can be manipulated using the [`DomUtils`](https://github.com/fb55/DomUtils) helper.
## Get a DOM
```js
const htmlparser2 = require("htmlparser2");
The `DomHandler` produces a DOM (document object model) that can be manipulated using the [`DomUtils`](https://github.com/fb55/DomUtils) helper.
const dom = htmlparser2.parseDocument();
```

@@ -82,0 +119,0 @@ The `DomHandler`, while still bundled with this module, was moved to its [own module](https://github.com/fb55/domhandler).

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc