@bonniernews/atlas-html-stream
Advanced tools
Comparing version 2.0.0 to 2.0.1
{ | ||
"name": "@bonniernews/atlas-html-stream", | ||
"version": "2.0.0", | ||
"version": "2.0.1", | ||
"description": "A super fast html-parser stream that outputs tag, text and closing nodes.", | ||
@@ -8,3 +8,4 @@ "scripts": { | ||
"test": "mocha --colors --recursive", | ||
"posttest": "eslint . --cache" | ||
"posttest": "eslint . --cache", | ||
"cov:html": "nyc mocha && nyc report --reporter=html" | ||
}, | ||
@@ -29,4 +30,6 @@ "main": "./src/HtmlParser.js", | ||
"chai": "^4.3.4", | ||
"eslint": "^8.4.1", | ||
"mocha": "^9.1.3" | ||
"eslint": "^8.5.0", | ||
"eslint-config-exp": "0.0.9", | ||
"mocha": "^9.1.3", | ||
"nyc": "^15.1.0" | ||
}, | ||
@@ -33,0 +36,0 @@ "engines": { |
@@ -0,1 +1,3 @@ | ||
"use strict"; | ||
const { Transform } = require("stream"); | ||
@@ -21,9 +23,9 @@ const { TEXT, NODE, NAME, KEY, VALUE, SCRIPT, STYLE, COMMENT } = require("./states"); | ||
module.exports = class HtmlParser extends Transform { | ||
constructor({preserveWS} = {preserveWS: false}){ | ||
super({readableObjectMode: true}) | ||
constructor({ preserveWS } = {}){ | ||
super({ readableObjectMode: true }); | ||
this.preserveWS = preserveWS; | ||
this.endScript = new SeqMatcher("</script>") | ||
this.endStyle = new SeqMatcher("</style>") | ||
this.beginComment = new SeqMatcher("!--") | ||
this.endComment = new SeqMatcher("-->") | ||
this.endScript = new SeqMatcher("</script>"); | ||
this.endStyle = new SeqMatcher("</style>"); | ||
this.beginComment = new SeqMatcher("!--"); | ||
this.endComment = new SeqMatcher("-->"); | ||
@@ -40,93 +42,17 @@ this.curPos = 0; | ||
this.flags = { | ||
isClose: null, | ||
isSelfClose: null, | ||
hasEqual: null, | ||
valStartChar: null, | ||
}; | ||
this.isClose = false; | ||
this.isSelfClose = false; | ||
this.hasEqual = false; | ||
this.valStartChar = null; | ||
} | ||
_transform(chunk, encoding, done){ | ||
if (chunk === null) return this.end(); | ||
this.parse(this.addToCache(chunk)); | ||
done(null) | ||
} | ||
_flush(done){ | ||
this.flushAllText(); | ||
this.reset(); | ||
done(null) | ||
} | ||
parse(cacheLen) { | ||
let i = this.curPos, v = this.minPos, s = this.state, c | ||
while (i < cacheLen){ | ||
c = this.cache.charCodeAt(i) | ||
if (s === TEXT){ | ||
if (!this.preserveWS && (c === 32 || c >= 9 && c <= 13)) // ws | ||
v < i && this.text.push(this.getCache(v, i)), v = i + 1 | ||
else if (c === 60) // < | ||
this.flushText(v, i), s = NODE, v = i + 1 | ||
} else if (s === NODE){ | ||
if (c === 62) // > | ||
this.key && this.flushKey(), s = this.flushNode(), v = i + 1 | ||
else if (c === 47 && !this.flags.hasEqual) // / | ||
this.flags.isClose = !(this.flags.isSelfClose = !!this.name) | ||
else if (c !== 32 && (c < 9 || c > 13)){ // !ws | ||
if (!this.name) // name start | ||
this.beginComment.found(c), v = i, s = NAME | ||
else if (!this.key) // key start | ||
v = i, s = KEY | ||
else if (c === 61) // = | ||
this.flags.hasEqual = true | ||
else if (!this.flags.hasEqual) // next key | ||
this.flushKey(), v = i, s = KEY | ||
else if (c === 34 || c === 39) // ', " | ||
v = i + 1, this.flags.valStartChar = c, s = VALUE | ||
else // un-quoted val | ||
v = i, s = VALUE | ||
} | ||
} else if (s === NAME){ | ||
if (this.beginComment.found(c)) // start comment | ||
this.name = this.getCache(v, i + 1), s = this.flushNode(), v = i + 1 | ||
else if (c === 32 || c >= 9 && c <= 13) // ws | ||
this.name = this.getCache(v, i), s = NODE, v = i + 1 | ||
else if (c === 47) // / | ||
this.flags.isSelfClose = true, this.name = this.getCache(v, i), s = NODE, v = i + 1 | ||
else if (c === 62) // > | ||
this.name = this.getCache(v, i), s = this.flushNode(), v = i + 1 | ||
} else if (s === KEY){ | ||
if (c === 32 || c >= 9 && c <= 13) // ws | ||
this.key = this.getCache(v, i), s = NODE, v = i + 1 | ||
else if (c === 61) // = | ||
this.flags.hasEqual = true, this.key = this.getCache(v, i), s = NODE, v = i + 1 | ||
else if (c === 47) // / | ||
this.flags.isSelfClose = true, this.key = this.getCache(v, i), s = NODE, v = i + 1 | ||
else if (c === 62) // > | ||
this.flushKey(v,i), s = this.flushNode(), v = i + 1 | ||
} else if (s === VALUE){ | ||
if (this.flags.valStartChar != null){ | ||
if (c === this.flags.valStartChar) // found end quote | ||
this.flushVal(v,i), s = NODE, v = i + 1 | ||
} else if (c === 32 || c >= 9 && c <= 13) // ws | ||
this.flushVal(v,i), s = NODE, v = i + 1 | ||
else if (c === 62) // > | ||
this.flushVal(v,i), s = this.flushNode(), v = i + 1 | ||
} else if (s === COMMENT && this.endComment.found(c)) | ||
s = this.flushSpecialNode(v, i-2, "!--"), v = i + 1 | ||
else if (s === SCRIPT && this.endScript.found(c)) | ||
s = this.flushSpecialNode(v, i-8, "script"), v = i + 1 | ||
else if (s === STYLE && this.endStyle.found(c)) | ||
s = this.flushSpecialNode(v, i-7, "style"), v = i + 1 | ||
i = i + 1 | ||
} | ||
this.cache = this.cache.substr(v); | ||
this.curPos = i - v; | ||
this.minPos = 0; | ||
this.state = s; | ||
} | ||
reset() { | ||
this.endScript.reset(); | ||
this.endStyle.reset(); | ||
this.endScript.reset(); | ||
this.beginComment.reset(); | ||
this.endComment.reset(); | ||
this.beginComment.reset(); | ||
this.curPos = 0; | ||
this.minPos = 0; | ||
this.state = TEXT; | ||
this.cache = ""; | ||
@@ -138,52 +64,195 @@ this.name = ""; | ||
this.curPos = 0; | ||
this.isClose = false; | ||
this.isSelfClose = false; | ||
this.hasEqual = false; | ||
this.valStartChar = null; | ||
} | ||
_transform(chunk, encoding, done){ | ||
const cache = this.cache += chunk; | ||
const cacheLen = cache.length; | ||
let i = this.curPos, v = this.minPos, s = this.state, c; | ||
while (i < cacheLen) { | ||
c = cache.charCodeAt(i); | ||
switch (s) { | ||
case TEXT: { | ||
if (!this.preserveWS && (c === 32 || c >= 9 && c <= 13)) { // ws | ||
if (v < i) this.text.push(cache.substring(v, i)); | ||
v = i + 1; | ||
} else if (c === 60) { // < | ||
this.flushText(v, i); | ||
s = NODE; | ||
v = i + 1; | ||
} | ||
break; | ||
} | ||
case NODE: { | ||
if (c === 62) { // > | ||
if (this.key) this.flushKey(); | ||
s = this.flushNode(); | ||
v = i + 1; | ||
} else if (c === 47 && !this.hasEqual) { // / | ||
this.isClose = !(this.isSelfClose = !!this.name); | ||
} else if (c !== 32 && (c < 9 || c > 13)) { // !ws | ||
if (!this.name) { // name start | ||
this.beginComment.found(c); | ||
v = i; | ||
s = NAME; | ||
} else if (!this.key) { // key start | ||
v = i; | ||
s = KEY; | ||
} else if (c === 61) { // = | ||
this.hasEqual = true; | ||
} else if (!this.hasEqual) { // next key | ||
this.flushKey(); | ||
v = i; | ||
s = KEY; | ||
} else if (c === 34 || c === 39) { // ', " | ||
v = i + 1; | ||
this.valStartChar = c; | ||
s = VALUE; | ||
} else { // un-quoted val | ||
v = i; | ||
s = VALUE; | ||
} | ||
} | ||
break; | ||
} | ||
case NAME: { | ||
if (this.beginComment.found(c)) { // start comment | ||
this.name = cache.substring(v, i + 1); | ||
s = this.flushNode(); | ||
v = i + 1; | ||
} else if (c === 32 || c >= 9 && c <= 13) { // ws | ||
this.name = cache.substring(v, i); | ||
s = NODE; | ||
v = i + 1; | ||
} else if (c === 47) { // / | ||
this.isSelfClose = true; | ||
this.name = cache.substring(v, i); | ||
s = NODE; | ||
v = i + 1; | ||
} else if (c === 62) { // > | ||
this.name = cache.substring(v, i); | ||
s = this.flushNode(); | ||
v = i + 1; | ||
} | ||
break; | ||
} | ||
case KEY: { | ||
if (c === 32 || c >= 9 && c <= 13) { // ws | ||
this.key = cache.substring(v, i); | ||
s = NODE; | ||
v = i + 1; | ||
} else if (c === 61) { // = | ||
this.hasEqual = true; | ||
this.key = cache.substring(v, i); | ||
s = NODE; | ||
v = i + 1; | ||
} else if (c === 47) { // / | ||
this.isSelfClose = true; | ||
this.key = cache.substring(v, i); | ||
s = NODE; | ||
v = i + 1; | ||
} else if (c === 62) { // > | ||
this.flushKey(v, i); | ||
s = this.flushNode(); | ||
v = i + 1; | ||
} | ||
break; | ||
} | ||
case VALUE: { | ||
if (this.valStartChar !== null) { | ||
if (c === this.valStartChar) { // found end quote | ||
this.flushVal(v, i); | ||
s = NODE; | ||
v = i + 1; | ||
} | ||
} else if (c === 32 || c >= 9 && c <= 13) { // ws | ||
this.flushVal(v, i); | ||
s = NODE; | ||
v = i + 1; | ||
} else if (c === 62) { // > | ||
this.flushVal(v, i); | ||
s = this.flushNode(); | ||
v = i + 1; | ||
} | ||
break; | ||
} | ||
default: { | ||
if (s === COMMENT && this.endComment.found(c)) { | ||
s = this.flushSpecialNode(v, i - 2, "!--"); | ||
v = i + 1; | ||
} else if (s === SCRIPT && this.endScript.found(c)) { | ||
s = this.flushSpecialNode(v, i - 8, "script"); | ||
v = i + 1; | ||
} else if (s === STYLE && this.endStyle.found(c)) { | ||
s = this.flushSpecialNode(v, i - 7, "style"); | ||
v = i + 1; | ||
} | ||
} | ||
} | ||
i++; | ||
} | ||
this.cache = cache.substring(v); | ||
this.curPos = i - v; | ||
this.minPos = 0; | ||
this.state = s; | ||
this.flags = { | ||
isClose: null, | ||
isSelfClose: null, | ||
hasEqual: null, | ||
valStartChar: null, | ||
}; | ||
done(null); | ||
} | ||
getCache(start, end) { | ||
return this.cache.substr(start, end-start); | ||
_flush(done){ | ||
this.flushText(this.minPos, this.curPos); | ||
this.reset(); | ||
done(null); | ||
} | ||
addToCache(chunk) { | ||
return (this.cache += chunk).length; | ||
} | ||
flushKey(v, i) { | ||
return (this.key = this.data[this.key || this.cache.substr(v, i-v)] = ""); | ||
this.key = this.data[this.key || this.cache.substring(v, i)] = ""; | ||
} | ||
flushVal(v, i) { | ||
return (this.data[this.key] = this.cache.substr(v, i-v), this.key = "", this.flags.valStartChar = this.flags.hasEqual = null); | ||
this.data[this.key] = this.cache.substring(v, i); | ||
this.key = ""; | ||
this.valStartChar = this.hasEqual = null; | ||
} | ||
flushNode() { | ||
const name = this.name; | ||
if (!this.flags.isClose) this.push({name, data: this.data}) | ||
if (this.flags.isSelfClose || this.flags.isClose) this.push({name}) | ||
const s = name === "script" ? SCRIPT : name === "style" ? STYLE : name === "!--" ? COMMENT : TEXT | ||
if (!this.isClose) this.push({ name, data: this.data }); | ||
if (this.isSelfClose || this.isClose) this.push({ name }); | ||
let s; | ||
switch (name) { | ||
case "script": | ||
s = SCRIPT; | ||
break; | ||
case "style": | ||
s = STYLE; | ||
break; | ||
case "!--": | ||
s = COMMENT; | ||
break; | ||
default: | ||
s = TEXT; | ||
} | ||
this.data = {}; | ||
this.name = ""; | ||
this.flags.isClose = this.flags.isSelfClose = null | ||
return s | ||
this.isClose = false; | ||
this.isSelfClose = false; | ||
return s; | ||
} | ||
flushSpecialNode(v, i, name) { | ||
const text = this.cache.substr(v, i-v) | ||
text && this.push({text}), this.push({name}) | ||
return TEXT | ||
const text = this.cache.substring(v, i); | ||
if (text) this.push({ text }); | ||
this.push({ name }); | ||
return TEXT; | ||
} | ||
flushText(v, i) { | ||
if (v < i) { | ||
this.text.push(this.cache.substr(v, i-v)) | ||
this.push({text: this.text.join(" ")}) | ||
this.text = [] | ||
this.text.push(this.cache.substring(v, i)); | ||
this.push({ text: this.text.join(" ") }); | ||
this.text.length = 0; | ||
} else if (this.text.length) { | ||
this.push({text: this.text.join(" ")}) | ||
this.text = [] | ||
this.push({ text: this.text.join(" ") }); | ||
this.text.length = 0; | ||
} | ||
} | ||
flushAllText() { | ||
return this.flushText(this.minPos, this.curPos); | ||
} | ||
} | ||
}; |
@@ -0,10 +1,12 @@ | ||
"use strict"; | ||
module.exports = { | ||
TEXT:0, | ||
NODE:1, | ||
NAME:2, | ||
KEY:3, | ||
VALUE:4, | ||
SCRIPT:5, | ||
TEXT: 0, | ||
NODE: 1, | ||
NAME: 2, | ||
KEY: 3, | ||
VALUE: 4, | ||
SCRIPT: 5, | ||
STYLE: 6, | ||
COMMENT: 7 | ||
} | ||
COMMENT: 7, | ||
}; |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
18551
6
254
1
5