Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@bonniernews/atlas-html-stream

Package Overview
Dependencies
Maintainers
9
Versions
4
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@bonniernews/atlas-html-stream - npm Package Compare versions

Comparing version 2.0.0 to 2.0.1

CHANGELOG.md

11

package.json
{
"name": "@bonniernews/atlas-html-stream",
"version": "2.0.0",
"version": "2.0.1",
"description": "A super fast html-parser stream that outputs tag, text and closing nodes.",

@@ -8,3 +8,4 @@ "scripts": {

"test": "mocha --colors --recursive",
"posttest": "eslint . --cache"
"posttest": "eslint . --cache",
"cov:html": "nyc mocha && nyc report --reporter=html"
},

@@ -29,4 +30,6 @@ "main": "./src/HtmlParser.js",

"chai": "^4.3.4",
"eslint": "^8.4.1",
"mocha": "^9.1.3"
"eslint": "^8.5.0",
"eslint-config-exp": "0.0.9",
"mocha": "^9.1.3",
"nyc": "^15.1.0"
},

@@ -33,0 +36,0 @@ "engines": {

@@ -0,1 +1,3 @@

"use strict";
const { Transform } = require("stream");

@@ -21,9 +23,9 @@ const { TEXT, NODE, NAME, KEY, VALUE, SCRIPT, STYLE, COMMENT } = require("./states");

module.exports = class HtmlParser extends Transform {
constructor({preserveWS} = {preserveWS: false}){
super({readableObjectMode: true})
constructor({ preserveWS } = {}){
super({ readableObjectMode: true });
this.preserveWS = preserveWS;
this.endScript = new SeqMatcher("</script>")
this.endStyle = new SeqMatcher("</style>")
this.beginComment = new SeqMatcher("!--")
this.endComment = new SeqMatcher("-->")
this.endScript = new SeqMatcher("</script>");
this.endStyle = new SeqMatcher("</style>");
this.beginComment = new SeqMatcher("!--");
this.endComment = new SeqMatcher("-->");

@@ -40,93 +42,17 @@ this.curPos = 0;

this.flags = {
isClose: null,
isSelfClose: null,
hasEqual: null,
valStartChar: null,
};
this.isClose = false;
this.isSelfClose = false;
this.hasEqual = false;
this.valStartChar = null;
}
_transform(chunk, encoding, done){
if (chunk === null) return this.end();
this.parse(this.addToCache(chunk));
done(null)
}
_flush(done){
this.flushAllText();
this.reset();
done(null)
}
parse(cacheLen) {
let i = this.curPos, v = this.minPos, s = this.state, c
while (i < cacheLen){
c = this.cache.charCodeAt(i)
if (s === TEXT){
if (!this.preserveWS && (c === 32 || c >= 9 && c <= 13)) // ws
v < i && this.text.push(this.getCache(v, i)), v = i + 1
else if (c === 60) // <
this.flushText(v, i), s = NODE, v = i + 1
} else if (s === NODE){
if (c === 62) // >
this.key && this.flushKey(), s = this.flushNode(), v = i + 1
else if (c === 47 && !this.flags.hasEqual) // /
this.flags.isClose = !(this.flags.isSelfClose = !!this.name)
else if (c !== 32 && (c < 9 || c > 13)){ // !ws
if (!this.name) // name start
this.beginComment.found(c), v = i, s = NAME
else if (!this.key) // key start
v = i, s = KEY
else if (c === 61) // =
this.flags.hasEqual = true
else if (!this.flags.hasEqual) // next key
this.flushKey(), v = i, s = KEY
else if (c === 34 || c === 39) // ', "
v = i + 1, this.flags.valStartChar = c, s = VALUE
else // un-quoted val
v = i, s = VALUE
}
} else if (s === NAME){
if (this.beginComment.found(c)) // start comment
this.name = this.getCache(v, i + 1), s = this.flushNode(), v = i + 1
else if (c === 32 || c >= 9 && c <= 13) // ws
this.name = this.getCache(v, i), s = NODE, v = i + 1
else if (c === 47) // /
this.flags.isSelfClose = true, this.name = this.getCache(v, i), s = NODE, v = i + 1
else if (c === 62) // >
this.name = this.getCache(v, i), s = this.flushNode(), v = i + 1
} else if (s === KEY){
if (c === 32 || c >= 9 && c <= 13) // ws
this.key = this.getCache(v, i), s = NODE, v = i + 1
else if (c === 61) // =
this.flags.hasEqual = true, this.key = this.getCache(v, i), s = NODE, v = i + 1
else if (c === 47) // /
this.flags.isSelfClose = true, this.key = this.getCache(v, i), s = NODE, v = i + 1
else if (c === 62) // >
this.flushKey(v,i), s = this.flushNode(), v = i + 1
} else if (s === VALUE){
if (this.flags.valStartChar != null){
if (c === this.flags.valStartChar) // found end quote
this.flushVal(v,i), s = NODE, v = i + 1
} else if (c === 32 || c >= 9 && c <= 13) // ws
this.flushVal(v,i), s = NODE, v = i + 1
else if (c === 62) // >
this.flushVal(v,i), s = this.flushNode(), v = i + 1
} else if (s === COMMENT && this.endComment.found(c))
s = this.flushSpecialNode(v, i-2, "!--"), v = i + 1
else if (s === SCRIPT && this.endScript.found(c))
s = this.flushSpecialNode(v, i-8, "script"), v = i + 1
else if (s === STYLE && this.endStyle.found(c))
s = this.flushSpecialNode(v, i-7, "style"), v = i + 1
i = i + 1
}
this.cache = this.cache.substr(v);
this.curPos = i - v;
this.minPos = 0;
this.state = s;
}
reset() {
this.endScript.reset();
this.endStyle.reset();
this.endScript.reset();
this.beginComment.reset();
this.endComment.reset();
this.beginComment.reset();
this.curPos = 0;
this.minPos = 0;
this.state = TEXT;
this.cache = "";

@@ -138,52 +64,195 @@ this.name = "";

this.curPos = 0;
this.isClose = false;
this.isSelfClose = false;
this.hasEqual = false;
this.valStartChar = null;
}
_transform(chunk, encoding, done){
const cache = this.cache += chunk;
const cacheLen = cache.length;
let i = this.curPos, v = this.minPos, s = this.state, c;
while (i < cacheLen) {
c = cache.charCodeAt(i);
switch (s) {
case TEXT: {
if (!this.preserveWS && (c === 32 || c >= 9 && c <= 13)) { // ws
if (v < i) this.text.push(cache.substring(v, i));
v = i + 1;
} else if (c === 60) { // <
this.flushText(v, i);
s = NODE;
v = i + 1;
}
break;
}
case NODE: {
if (c === 62) { // >
if (this.key) this.flushKey();
s = this.flushNode();
v = i + 1;
} else if (c === 47 && !this.hasEqual) { // /
this.isClose = !(this.isSelfClose = !!this.name);
} else if (c !== 32 && (c < 9 || c > 13)) { // !ws
if (!this.name) { // name start
this.beginComment.found(c);
v = i;
s = NAME;
} else if (!this.key) { // key start
v = i;
s = KEY;
} else if (c === 61) { // =
this.hasEqual = true;
} else if (!this.hasEqual) { // next key
this.flushKey();
v = i;
s = KEY;
} else if (c === 34 || c === 39) { // ', "
v = i + 1;
this.valStartChar = c;
s = VALUE;
} else { // un-quoted val
v = i;
s = VALUE;
}
}
break;
}
case NAME: {
if (this.beginComment.found(c)) { // start comment
this.name = cache.substring(v, i + 1);
s = this.flushNode();
v = i + 1;
} else if (c === 32 || c >= 9 && c <= 13) { // ws
this.name = cache.substring(v, i);
s = NODE;
v = i + 1;
} else if (c === 47) { // /
this.isSelfClose = true;
this.name = cache.substring(v, i);
s = NODE;
v = i + 1;
} else if (c === 62) { // >
this.name = cache.substring(v, i);
s = this.flushNode();
v = i + 1;
}
break;
}
case KEY: {
if (c === 32 || c >= 9 && c <= 13) { // ws
this.key = cache.substring(v, i);
s = NODE;
v = i + 1;
} else if (c === 61) { // =
this.hasEqual = true;
this.key = cache.substring(v, i);
s = NODE;
v = i + 1;
} else if (c === 47) { // /
this.isSelfClose = true;
this.key = cache.substring(v, i);
s = NODE;
v = i + 1;
} else if (c === 62) { // >
this.flushKey(v, i);
s = this.flushNode();
v = i + 1;
}
break;
}
case VALUE: {
if (this.valStartChar !== null) {
if (c === this.valStartChar) { // found end quote
this.flushVal(v, i);
s = NODE;
v = i + 1;
}
} else if (c === 32 || c >= 9 && c <= 13) { // ws
this.flushVal(v, i);
s = NODE;
v = i + 1;
} else if (c === 62) { // >
this.flushVal(v, i);
s = this.flushNode();
v = i + 1;
}
break;
}
default: {
if (s === COMMENT && this.endComment.found(c)) {
s = this.flushSpecialNode(v, i - 2, "!--");
v = i + 1;
} else if (s === SCRIPT && this.endScript.found(c)) {
s = this.flushSpecialNode(v, i - 8, "script");
v = i + 1;
} else if (s === STYLE && this.endStyle.found(c)) {
s = this.flushSpecialNode(v, i - 7, "style");
v = i + 1;
}
}
}
i++;
}
this.cache = cache.substring(v);
this.curPos = i - v;
this.minPos = 0;
this.state = s;
this.flags = {
isClose: null,
isSelfClose: null,
hasEqual: null,
valStartChar: null,
};
done(null);
}
getCache(start, end) {
return this.cache.substr(start, end-start);
_flush(done){
this.flushText(this.minPos, this.curPos);
this.reset();
done(null);
}
addToCache(chunk) {
return (this.cache += chunk).length;
}
flushKey(v, i) {
return (this.key = this.data[this.key || this.cache.substr(v, i-v)] = "");
this.key = this.data[this.key || this.cache.substring(v, i)] = "";
}
flushVal(v, i) {
return (this.data[this.key] = this.cache.substr(v, i-v), this.key = "", this.flags.valStartChar = this.flags.hasEqual = null);
this.data[this.key] = this.cache.substring(v, i);
this.key = "";
this.valStartChar = this.hasEqual = null;
}
flushNode() {
const name = this.name;
if (!this.flags.isClose) this.push({name, data: this.data})
if (this.flags.isSelfClose || this.flags.isClose) this.push({name})
const s = name === "script" ? SCRIPT : name === "style" ? STYLE : name === "!--" ? COMMENT : TEXT
if (!this.isClose) this.push({ name, data: this.data });
if (this.isSelfClose || this.isClose) this.push({ name });
let s;
switch (name) {
case "script":
s = SCRIPT;
break;
case "style":
s = STYLE;
break;
case "!--":
s = COMMENT;
break;
default:
s = TEXT;
}
this.data = {};
this.name = "";
this.flags.isClose = this.flags.isSelfClose = null
return s
this.isClose = false;
this.isSelfClose = false;
return s;
}
flushSpecialNode(v, i, name) {
const text = this.cache.substr(v, i-v)
text && this.push({text}), this.push({name})
return TEXT
const text = this.cache.substring(v, i);
if (text) this.push({ text });
this.push({ name });
return TEXT;
}
flushText(v, i) {
if (v < i) {
this.text.push(this.cache.substr(v, i-v))
this.push({text: this.text.join(" ")})
this.text = []
this.text.push(this.cache.substring(v, i));
this.push({ text: this.text.join(" ") });
this.text.length = 0;
} else if (this.text.length) {
this.push({text: this.text.join(" ")})
this.text = []
this.push({ text: this.text.join(" ") });
this.text.length = 0;
}
}
flushAllText() {
return this.flushText(this.minPos, this.curPos);
}
}
};

@@ -0,10 +1,12 @@

"use strict";
module.exports = {
TEXT:0,
NODE:1,
NAME:2,
KEY:3,
VALUE:4,
SCRIPT:5,
TEXT: 0,
NODE: 1,
NAME: 2,
KEY: 3,
VALUE: 4,
SCRIPT: 5,
STYLE: 6,
COMMENT: 7
}
COMMENT: 7,
};
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc