Comparing version 2.2.1 to 2.3.0
@@ -7,2 +7,18 @@ (function (global, factory) { | ||
function _typeof(obj) { | ||
"@babel/helpers - typeof"; | ||
if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") { | ||
_typeof = function (obj) { | ||
return typeof obj; | ||
}; | ||
} else { | ||
_typeof = function (obj) { | ||
return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; | ||
}; | ||
} | ||
return _typeof(obj); | ||
} | ||
function _classCallCheck(instance, Constructor) { | ||
@@ -195,6 +211,91 @@ if (!(instance instanceof Constructor)) { | ||
var phrasingConstructs = ['a', 'abbr', 'audio', 'b', 'bdo', 'br', 'button', 'canvas', 'cite', 'code', 'command', 'data', 'datalist', 'dfn', 'em', 'embed', 'i', 'iframe', 'img', 'input', 'kbd', 'keygen', 'label', 'mark', 'math', 'meter', 'noscript', 'object', 'output', 'progress', 'q', 'ruby', 'samp', 'script', 'select', 'small', 'span', 'strong', 'sub', 'sup', 'svg', 'textarea', 'time', 'var', 'video', 'wbr', // special cases | ||
'map', 'area']; | ||
'map', 'area']; // copied from readium-cfi-js library | ||
// original function called "isElementBlacklisted" | ||
var isElementBlacklisted = function isElementBlacklisted(element, classBlacklist, elementBlacklist, idBlacklist) { | ||
if (classBlacklist && classBlacklist.length) { | ||
var classList = getClassNameArray(element); | ||
if (classList.length === 1 && classBlacklist.includes(classList[0])) { | ||
return true; | ||
} | ||
if (classList.length && intersection(classBlacklist, classList).length) { | ||
return true; | ||
} | ||
} | ||
if (elementBlacklist && elementBlacklist.length) { | ||
if (element.tagName) { | ||
var isElementInBlacklist = elementBlacklist.find(function (blacklistedTag) { | ||
return matchesLocalNameOrElement(element, blacklistedTag.toLowerCase()); | ||
}); | ||
if (isElementInBlacklist) { | ||
return true; | ||
} | ||
} | ||
} | ||
if (idBlacklist && idBlacklist.length) { | ||
var id = element.id; | ||
if (id && id.length && idBlacklist.includes(id)) { | ||
return true; | ||
} | ||
} | ||
return false; | ||
}; | ||
var intersection = function intersection(array1, array2) { | ||
var intersectionArray = []; | ||
var _iterator2 = _createForOfIteratorHelper(array1), | ||
_step2; | ||
try { | ||
for (_iterator2.s(); !(_step2 = _iterator2.n()).done;) { | ||
var value = _step2.value; | ||
var index = array2.indexOf(value); | ||
if (index !== -1) { | ||
intersectionArray.push(value); | ||
} | ||
} | ||
} catch (err) { | ||
_iterator2.e(err); | ||
} finally { | ||
_iterator2.f(); | ||
} | ||
return intersectionArray; | ||
}; | ||
var getClassNameArray = function getClassNameArray(element) { | ||
var className = element.className; | ||
if (typeof className === 'string') { | ||
return className.split(/\s/); | ||
} | ||
if (_typeof(className) === 'object' && 'baseVal' in className) { | ||
return className.baseVal.split(/\s/); | ||
} | ||
return []; | ||
}; | ||
var matchesLocalNameOrElement = function matchesLocalNameOrElement(element, otherNameOrElement) { | ||
if (typeof otherNameOrElement === 'string') { | ||
return (element.localName || element.nodeName) === otherNameOrElement; | ||
} | ||
return element === otherNameOrElement; | ||
}; | ||
var StringCollector = /*#__PURE__*/function () { | ||
function StringCollector() { | ||
var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; | ||
_classCallCheck(this, StringCollector); | ||
@@ -204,2 +305,3 @@ | ||
this.text = []; | ||
this.options = options; | ||
this.hasEncounteredFirstCell = false; | ||
@@ -270,2 +372,6 @@ this.lastBreak = null; | ||
value: function processElementNode(node, isOpening) { | ||
if (isElementBlacklisted(node, this.options.classBlacklist, this.options.elementBlacklist, this.options.idBlacklist)) { | ||
return true; | ||
} | ||
var tag = node.tagName.toLowerCase(); // Special case for Preformatted | ||
@@ -374,2 +480,4 @@ | ||
function MapCollector() { | ||
var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; | ||
_classCallCheck(this, MapCollector); | ||
@@ -379,2 +487,3 @@ | ||
this.text = []; | ||
this.options = options; | ||
this.hasEncounteredFirstCell = false; | ||
@@ -502,2 +611,6 @@ this.lastBreak = null; | ||
value: function processElementNode(node, isOpening) { | ||
if (isElementBlacklisted(node, this.options.classBlacklist, this.options.elementBlacklist, this.options.idBlacklist)) { | ||
return true; | ||
} | ||
var tag = node.tagName.toLowerCase(); // Special case for Preformatted | ||
@@ -743,6 +856,6 @@ | ||
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; | ||
var collector = new StringCollector(); | ||
var collector = new StringCollector(options); | ||
if (options.map) { | ||
collector = new MapCollector(); | ||
collector = new MapCollector(options); | ||
} | ||
@@ -749,0 +862,0 @@ |
{ | ||
"name": "degausser", | ||
"version": "2.2.1", | ||
"version": "2.3.0", | ||
"description": "Transforms HTML to plain text by eliminating tags from a document.", | ||
@@ -32,9 +32,9 @@ "author": "FlowPub", | ||
"devDependencies": { | ||
"@babel/core": "7.12.9", | ||
"@babel/preset-env": "7.12.7", | ||
"@babel/core": "7.13.15", | ||
"@babel/preset-env": "7.13.15", | ||
"@rollup/plugin-node-resolve": "8.4.0", | ||
"glob": "7.1.6", | ||
"jest": "26.6.3", | ||
"prettier": "2.1.2", | ||
"rollup": "2.23.0", | ||
"prettier": "2.2.1", | ||
"rollup": "2.39.0", | ||
"rollup-plugin-babel": "4.4.0" | ||
@@ -41,0 +41,0 @@ }, |
@@ -6,6 +6,6 @@ import { StringCollector } from './stringCollector' | ||
export const degausser = (parentNode, options = {}) => { | ||
let collector = new StringCollector() | ||
let collector = new StringCollector(options) | ||
if (options.map) { | ||
collector = new MapCollector() | ||
collector = new MapCollector(options) | ||
} | ||
@@ -16,8 +16,3 @@ | ||
export const getRangeFromOffset = ( | ||
start, | ||
end, | ||
doc = document, | ||
map = null, | ||
) => { | ||
export const getRangeFromOffset = (start, end, doc = document, map = null) => { | ||
const docType = doc.nodeType | ||
@@ -24,0 +19,0 @@ if ( |
@@ -8,2 +8,3 @@ import { | ||
phrasingConstructs, | ||
isElementBlacklisted, | ||
} from './util' | ||
@@ -17,6 +18,8 @@ | ||
export class MapCollector { | ||
constructor() { | ||
constructor(options = {}) { | ||
this.map = [] | ||
this.text = [] | ||
this.options = options | ||
this.hasEncounteredFirstCell = false | ||
@@ -131,2 +134,13 @@ this.lastBreak = null | ||
processElementNode(node, isOpening) { | ||
if ( | ||
isElementBlacklisted( | ||
node, | ||
this.options.classBlacklist, | ||
this.options.elementBlacklist, | ||
this.options.idBlacklist, | ||
) | ||
) { | ||
return true | ||
} | ||
const tag = node.tagName.toLowerCase() | ||
@@ -252,11 +266,25 @@ | ||
if (entity.node.nodeType === Node.TEXT_NODE || entity.node.tagName === 'img') { | ||
const nodeContent = entity.node.tagName === 'img' ? | ||
entity.node.getAttribute('alt').normalize() : | ||
entity.node.textContent.normalize() | ||
if ( | ||
entity.node.nodeType === Node.TEXT_NODE || | ||
entity.node.tagName === 'img' | ||
) { | ||
const nodeContent = | ||
entity.node.tagName === 'img' | ||
? entity.node.getAttribute('alt').normalize() | ||
: entity.node.textContent.normalize() | ||
for (let charInMap = 0, charInNode = 0; charInNode < nodeContent.length; ++charInNode) { | ||
const isEqual = entity.content.charAt(charInMap) === nodeContent.charAt(charInNode) | ||
const isMapWhitespace = isCharWhitespace(entity.content.charCodeAt(charInMap)) | ||
const isNodeWhitespace = isCharWhitespace(nodeContent.charCodeAt(charInNode)) | ||
for ( | ||
let charInMap = 0, charInNode = 0; | ||
charInNode < nodeContent.length; | ||
++charInNode | ||
) { | ||
const isEqual = | ||
entity.content.charAt(charInMap) === | ||
nodeContent.charAt(charInNode) | ||
const isMapWhitespace = isCharWhitespace( | ||
entity.content.charCodeAt(charInMap), | ||
) | ||
const isNodeWhitespace = isCharWhitespace( | ||
nodeContent.charCodeAt(charInNode), | ||
) | ||
@@ -268,7 +296,9 @@ if (isEqual || (isMapWhitespace && isNodeWhitespace)) { | ||
after: charInMap - 1, | ||
position: charInNode | ||
position: charInNode, | ||
} | ||
whitespace.push(skips) | ||
} else { | ||
throw new Error(`Degauss error, character mismatch and not a whitespace`) | ||
throw new Error( | ||
`Degauss error, character mismatch and not a whitespace`, | ||
) | ||
} | ||
@@ -275,0 +305,0 @@ } |
@@ -7,8 +7,10 @@ import { | ||
phrasingConstructs, | ||
isElementBlacklisted, | ||
} from './util' | ||
export class StringCollector { | ||
constructor() { | ||
constructor(options = {}) { | ||
this.runs = [] | ||
this.text = [] | ||
this.options = options | ||
@@ -74,2 +76,13 @@ this.hasEncounteredFirstCell = false | ||
processElementNode(node, isOpening) { | ||
if ( | ||
isElementBlacklisted( | ||
node, | ||
this.options.classBlacklist, | ||
this.options.elementBlacklist, | ||
this.options.idBlacklist, | ||
) | ||
) { | ||
return true | ||
} | ||
const tag = node.tagName.toLowerCase() | ||
@@ -76,0 +89,0 @@ |
@@ -147,2 +147,72 @@ function autoBind() { | ||
// copied from readium-cfi-js library | ||
// original function called "isElementBlacklisted" | ||
const isElementBlacklisted = ( | ||
element, | ||
classBlacklist, | ||
elementBlacklist, | ||
idBlacklist, | ||
) => { | ||
if (classBlacklist && classBlacklist.length) { | ||
const classList = getClassNameArray(element) | ||
if (classList.length === 1 && classBlacklist.includes(classList[0])) { | ||
return true | ||
} | ||
if (classList.length && intersection(classBlacklist, classList).length) { | ||
return true | ||
} | ||
} | ||
if (elementBlacklist && elementBlacklist.length) { | ||
if (element.tagName) { | ||
const isElementInBlacklist = elementBlacklist.find((blacklistedTag) => | ||
matchesLocalNameOrElement(element, blacklistedTag.toLowerCase()), | ||
) | ||
if (isElementInBlacklist) { | ||
return true | ||
} | ||
} | ||
} | ||
if (idBlacklist && idBlacklist.length) { | ||
const { id } = element | ||
if (id && id.length && idBlacklist.includes(id)) { | ||
return true | ||
} | ||
} | ||
return false | ||
} | ||
const intersection = (array1, array2) => { | ||
const intersectionArray = [] | ||
for (let value of array1) { | ||
const index = array2.indexOf(value) | ||
if (index !== -1) { | ||
intersectionArray.push(value) | ||
} | ||
} | ||
return intersectionArray | ||
} | ||
const getClassNameArray = (element) => { | ||
const { className } = element | ||
if (typeof className === 'string') { | ||
return className.split(/\s/) | ||
} | ||
if (typeof className === 'object' && 'baseVal' in className) { | ||
return className.baseVal.split(/\s/) | ||
} | ||
return [] | ||
} | ||
const matchesLocalNameOrElement = (element, otherNameOrElement) => { | ||
if (typeof otherNameOrElement === 'string') { | ||
return (element.localName || element.nodeName) === otherNameOrElement | ||
} | ||
return element === otherNameOrElement | ||
} | ||
export { | ||
@@ -155,3 +225,4 @@ autoBind, | ||
phrasingConstructs, | ||
isCharWhitespace | ||
isElementBlacklisted, | ||
isCharWhitespace, | ||
} |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
48355
1491