Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

degausser

Package Overview
Dependencies
Maintainers
2
Versions
17
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

degausser - npm Package Compare versions

Comparing version 2.2.1 to 2.3.0

119

dist/degausser.js

@@ -7,2 +7,18 @@ (function (global, factory) {

function _typeof(obj) {
"@babel/helpers - typeof";
if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") {
_typeof = function (obj) {
return typeof obj;
};
} else {
_typeof = function (obj) {
return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj;
};
}
return _typeof(obj);
}
function _classCallCheck(instance, Constructor) {

@@ -195,6 +211,91 @@ if (!(instance instanceof Constructor)) {

var phrasingConstructs = ['a', 'abbr', 'audio', 'b', 'bdo', 'br', 'button', 'canvas', 'cite', 'code', 'command', 'data', 'datalist', 'dfn', 'em', 'embed', 'i', 'iframe', 'img', 'input', 'kbd', 'keygen', 'label', 'mark', 'math', 'meter', 'noscript', 'object', 'output', 'progress', 'q', 'ruby', 'samp', 'script', 'select', 'small', 'span', 'strong', 'sub', 'sup', 'svg', 'textarea', 'time', 'var', 'video', 'wbr', // special cases
'map', 'area'];
'map', 'area']; // copied from readium-cfi-js library
// original function called "isElementBlacklisted"
var isElementBlacklisted = function isElementBlacklisted(element, classBlacklist, elementBlacklist, idBlacklist) {
if (classBlacklist && classBlacklist.length) {
var classList = getClassNameArray(element);
if (classList.length === 1 && classBlacklist.includes(classList[0])) {
return true;
}
if (classList.length && intersection(classBlacklist, classList).length) {
return true;
}
}
if (elementBlacklist && elementBlacklist.length) {
if (element.tagName) {
var isElementInBlacklist = elementBlacklist.find(function (blacklistedTag) {
return matchesLocalNameOrElement(element, blacklistedTag.toLowerCase());
});
if (isElementInBlacklist) {
return true;
}
}
}
if (idBlacklist && idBlacklist.length) {
var id = element.id;
if (id && id.length && idBlacklist.includes(id)) {
return true;
}
}
return false;
};
var intersection = function intersection(array1, array2) {
var intersectionArray = [];
var _iterator2 = _createForOfIteratorHelper(array1),
_step2;
try {
for (_iterator2.s(); !(_step2 = _iterator2.n()).done;) {
var value = _step2.value;
var index = array2.indexOf(value);
if (index !== -1) {
intersectionArray.push(value);
}
}
} catch (err) {
_iterator2.e(err);
} finally {
_iterator2.f();
}
return intersectionArray;
};
var getClassNameArray = function getClassNameArray(element) {
var className = element.className;
if (typeof className === 'string') {
return className.split(/\s/);
}
if (_typeof(className) === 'object' && 'baseVal' in className) {
return className.baseVal.split(/\s/);
}
return [];
};
var matchesLocalNameOrElement = function matchesLocalNameOrElement(element, otherNameOrElement) {
if (typeof otherNameOrElement === 'string') {
return (element.localName || element.nodeName) === otherNameOrElement;
}
return element === otherNameOrElement;
};
var StringCollector = /*#__PURE__*/function () {
function StringCollector() {
var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
_classCallCheck(this, StringCollector);

@@ -204,2 +305,3 @@

this.text = [];
this.options = options;
this.hasEncounteredFirstCell = false;

@@ -270,2 +372,6 @@ this.lastBreak = null;

value: function processElementNode(node, isOpening) {
if (isElementBlacklisted(node, this.options.classBlacklist, this.options.elementBlacklist, this.options.idBlacklist)) {
return true;
}
var tag = node.tagName.toLowerCase(); // Special case for Preformatted

@@ -374,2 +480,4 @@

function MapCollector() {
var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
_classCallCheck(this, MapCollector);

@@ -379,2 +487,3 @@

this.text = [];
this.options = options;
this.hasEncounteredFirstCell = false;

@@ -502,2 +611,6 @@ this.lastBreak = null;

value: function processElementNode(node, isOpening) {
if (isElementBlacklisted(node, this.options.classBlacklist, this.options.elementBlacklist, this.options.idBlacklist)) {
return true;
}
var tag = node.tagName.toLowerCase(); // Special case for Preformatted

@@ -743,6 +856,6 @@

var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
var collector = new StringCollector();
var collector = new StringCollector(options);
if (options.map) {
collector = new MapCollector();
collector = new MapCollector(options);
}

@@ -749,0 +862,0 @@

10

package.json
{
"name": "degausser",
"version": "2.2.1",
"version": "2.3.0",
"description": "Transforms HTML to plain text by eliminating tags from a document.",

@@ -32,9 +32,9 @@ "author": "FlowPub",

"devDependencies": {
"@babel/core": "7.12.9",
"@babel/preset-env": "7.12.7",
"@babel/core": "7.13.15",
"@babel/preset-env": "7.13.15",
"@rollup/plugin-node-resolve": "8.4.0",
"glob": "7.1.6",
"jest": "26.6.3",
"prettier": "2.1.2",
"rollup": "2.23.0",
"prettier": "2.2.1",
"rollup": "2.39.0",
"rollup-plugin-babel": "4.4.0"

@@ -41,0 +41,0 @@ },

@@ -6,6 +6,6 @@ import { StringCollector } from './stringCollector'

export const degausser = (parentNode, options = {}) => {
let collector = new StringCollector()
let collector = new StringCollector(options)
if (options.map) {
collector = new MapCollector()
collector = new MapCollector(options)
}

@@ -16,8 +16,3 @@

export const getRangeFromOffset = (
start,
end,
doc = document,
map = null,
) => {
export const getRangeFromOffset = (start, end, doc = document, map = null) => {
const docType = doc.nodeType

@@ -24,0 +19,0 @@ if (

@@ -8,2 +8,3 @@ import {

phrasingConstructs,
isElementBlacklisted,
} from './util'

@@ -17,6 +18,8 @@

export class MapCollector {
constructor() {
constructor(options = {}) {
this.map = []
this.text = []
this.options = options
this.hasEncounteredFirstCell = false

@@ -131,2 +134,13 @@ this.lastBreak = null

processElementNode(node, isOpening) {
if (
isElementBlacklisted(
node,
this.options.classBlacklist,
this.options.elementBlacklist,
this.options.idBlacklist,
)
) {
return true
}
const tag = node.tagName.toLowerCase()

@@ -252,11 +266,25 @@

if (entity.node.nodeType === Node.TEXT_NODE || entity.node.tagName === 'img') {
const nodeContent = entity.node.tagName === 'img' ?
entity.node.getAttribute('alt').normalize() :
entity.node.textContent.normalize()
if (
entity.node.nodeType === Node.TEXT_NODE ||
entity.node.tagName === 'img'
) {
const nodeContent =
entity.node.tagName === 'img'
? entity.node.getAttribute('alt').normalize()
: entity.node.textContent.normalize()
for (let charInMap = 0, charInNode = 0; charInNode < nodeContent.length; ++charInNode) {
const isEqual = entity.content.charAt(charInMap) === nodeContent.charAt(charInNode)
const isMapWhitespace = isCharWhitespace(entity.content.charCodeAt(charInMap))
const isNodeWhitespace = isCharWhitespace(nodeContent.charCodeAt(charInNode))
for (
let charInMap = 0, charInNode = 0;
charInNode < nodeContent.length;
++charInNode
) {
const isEqual =
entity.content.charAt(charInMap) ===
nodeContent.charAt(charInNode)
const isMapWhitespace = isCharWhitespace(
entity.content.charCodeAt(charInMap),
)
const isNodeWhitespace = isCharWhitespace(
nodeContent.charCodeAt(charInNode),
)

@@ -268,7 +296,9 @@ if (isEqual || (isMapWhitespace && isNodeWhitespace)) {

after: charInMap - 1,
position: charInNode
position: charInNode,
}
whitespace.push(skips)
} else {
throw new Error(`Degauss error, character mismatch and not a whitespace`)
throw new Error(
`Degauss error, character mismatch and not a whitespace`,
)
}

@@ -275,0 +305,0 @@ }

@@ -7,8 +7,10 @@ import {

phrasingConstructs,
isElementBlacklisted,
} from './util'
export class StringCollector {
constructor() {
constructor(options = {}) {
this.runs = []
this.text = []
this.options = options

@@ -74,2 +76,13 @@ this.hasEncounteredFirstCell = false

processElementNode(node, isOpening) {
if (
isElementBlacklisted(
node,
this.options.classBlacklist,
this.options.elementBlacklist,
this.options.idBlacklist,
)
) {
return true
}
const tag = node.tagName.toLowerCase()

@@ -76,0 +89,0 @@

@@ -147,2 +147,72 @@ function autoBind() {

// copied from readium-cfi-js library
// original function called "isElementBlacklisted"
const isElementBlacklisted = (
element,
classBlacklist,
elementBlacklist,
idBlacklist,
) => {
if (classBlacklist && classBlacklist.length) {
const classList = getClassNameArray(element)
if (classList.length === 1 && classBlacklist.includes(classList[0])) {
return true
}
if (classList.length && intersection(classBlacklist, classList).length) {
return true
}
}
if (elementBlacklist && elementBlacklist.length) {
if (element.tagName) {
const isElementInBlacklist = elementBlacklist.find((blacklistedTag) =>
matchesLocalNameOrElement(element, blacklistedTag.toLowerCase()),
)
if (isElementInBlacklist) {
return true
}
}
}
if (idBlacklist && idBlacklist.length) {
const { id } = element
if (id && id.length && idBlacklist.includes(id)) {
return true
}
}
return false
}
const intersection = (array1, array2) => {
const intersectionArray = []
for (let value of array1) {
const index = array2.indexOf(value)
if (index !== -1) {
intersectionArray.push(value)
}
}
return intersectionArray
}
const getClassNameArray = (element) => {
const { className } = element
if (typeof className === 'string') {
return className.split(/\s/)
}
if (typeof className === 'object' && 'baseVal' in className) {
return className.baseVal.split(/\s/)
}
return []
}
const matchesLocalNameOrElement = (element, otherNameOrElement) => {
if (typeof otherNameOrElement === 'string') {
return (element.localName || element.nodeName) === otherNameOrElement
}
return element === otherNameOrElement
}
export {

@@ -155,3 +225,4 @@ autoBind,

phrasingConstructs,
isCharWhitespace
isElementBlacklisted,
isCharWhitespace,
}
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc