Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

degausser

Package Overview
Dependencies
Maintainers
3
Versions
17
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

degausser - npm Package Compare versions

Comparing version 2.4.0 to 2.4.1

192

dist/degausser.js

@@ -143,2 +143,6 @@ (function (global, factory) {

var isCharNewLine = function isCharNewLine(charCode) {
return charCode === 10 || charCode === 13;
};
var BreakType = {

@@ -149,3 +153,88 @@ NONE: 'none',

};
/**
* Trim whitespace from the start of the string
* @param string
* @returns { string }
*/
var trimBeginOnly = function trimBeginOnly(string) {
// Get the first non-whitespace character index
var firstNonWhite = null;
for (var index = 0; index < string.length; index++) {
if (!isCharWhitespace(string.charCodeAt(index))) {
firstNonWhite = index;
break;
}
} // If the first non-whitespace character is null, the string is entirely whitespace
if (firstNonWhite === null) {
return string;
} // Return the non-empty sections of the string
return string.slice(firstNonWhite);
};
/**
* Trim any new line characters from the end of the string
* Also trim any whitespace that comes after that new line character, but not any that comes before.
* @param string
* @returns {*}
*/
var trimEndNewLine = function trimEndNewLine(string) {
var lastNonNewLine = null;
var foundNewLineCharacter = false;
var foundNonWhiteSpaceCharacter = false;
for (var index = string.length - 1; index >= 0; index--) {
var charCode = string.charCodeAt(index);
var isNewLine = isCharNewLine(charCode);
if (isCharWhitespace(charCode)) {
if (!isNewLine) {
// okay to trim out any white space
continue;
} else {
foundNewLineCharacter = true;
}
} else {
foundNonWhiteSpaceCharacter = true;
}
if (!isNewLine) {
if (foundNewLineCharacter) {
lastNonNewLine = index;
}
break;
}
}
if (!foundNonWhiteSpaceCharacter) {
return null;
} // If both are null, the string is entirely whitespace
if (lastNonNewLine === null) {
return string;
} // Return the non-empty sections of the string
return string.slice(0, lastNonNewLine ? lastNonNewLine + 1 : undefined);
};
/**
* Trims any whitespace at the start and trims any newline characters at the end of the string.
* Trims any whitespace after newline characters at the end of the string, but not any that comes before.
* @param string
* @returns {*}
*/
var trimAllExceptEndWhiteSpace = function trimAllExceptEndWhiteSpace(string) {
return trimEndNewLine(trimBeginOnly(string));
};
var trimBeginAndEnd = function trimBeginAndEnd(string) {

@@ -208,2 +297,6 @@ // Get the first and last non-whitespace character index

var trimAndCollapseWhitespace = function trimAndCollapseWhitespace(string) {
return trimBeginAndEnd(collapseWhitespace(string));
};
var blacklist = ['base', 'command', 'link', 'meta', 'noscript', 'script', 'style', 'title', // special cases

@@ -374,3 +467,21 @@ // "html",

case BreakType.DOUBLE:
this.runs.push('\n\n');
var paragraphBreakAdded = false; // iterate through runs backwards:
for (var i = this.runs.length - 1; i >= 0; i--) {
var run = this.runs[i];
if (run === '\n\n') {
// found double break
paragraphBreakAdded = true;
break;
} else if (run !== '\n') {
// found text content
break;
}
}
if (!paragraphBreakAdded) {
this.runs.push('\n\n');
}
break;

@@ -382,4 +493,4 @@ }

}, {
key: "processText",
value: function processText() {
key: "processTextAndTrim",
value: function processTextAndTrim(trimmingFunction) {
if (this.text.length === 0) {

@@ -390,7 +501,7 @@ return;

var trimmed = trimBeginAndEnd(this.text.join(''));
var trimmed = trimmingFunction(this.text.join(''));
if (!trimmed) {
// Trimmed into an empty string
// Preserve all preceding breaks
// Preserve all preceding breaks
this.text = [];

@@ -404,6 +515,17 @@ return;

this.runs.push(trimBeginAndEnd(collapseWhitespace(trimmed)));
this.runs.push(trimmingFunction(trimmed));
this.text = [];
}
}, {
key: "processText",
value: function processText() {
var trimEndSpaces = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : true;
if (trimEndSpaces) {
this.processTextAndTrim(trimAndCollapseWhitespace);
} else {
this.processTextAndTrim(trimAllExceptEndWhiteSpace);
}
}
}, {
key: "processElementNode",

@@ -429,3 +551,3 @@ value: function processElementNode(node, isOpening) {

case 'br':
this.processText();
this.processText(false);
this.processBreaks();

@@ -562,6 +684,22 @@ this.runs.push('\n');

case BreakType.DOUBLE:
this.map.push({
type: MapType.BREAK,
"double": true
});
var paragraphBreakAdded = false; // iterate through map backwards:
for (var i = this.map.length - 1; i >= 0; --i) {
var map = this.map[i];
if (map.type === MapType.BREAK && map["double"]) {
paragraphBreakAdded = true;
break;
} else if (!this.isSingleBreak(map)) {
break;
}
}
if (!paragraphBreakAdded) {
this.map.push({
type: MapType.BREAK,
"double": true
});
}
break;

@@ -573,4 +711,11 @@ }

}, {
key: "processText",
value: function processText() {
key: "isSingleBreak",
value: function isSingleBreak(mapObject) {
var isSingleBreak = mapObject.type === MapType.BREAK && !mapObject["double"];
var isNewLine = mapObject.type === MapType.TEXT && mapObject.content === '\n';
return isSingleBreak || isNewLine;
}
}, {
key: "processTextAndTrim",
value: function processTextAndTrim(trimmingFunction) {
var _this$map;

@@ -586,7 +731,7 @@

var trimmed = trimBeginAndEnd(joinedText);
var trimmed = trimmingFunction(joinedText);
if (!trimmed) {
// Trimmed into an empty string
// Preserve all preceding breaks
// Preserve all preceding breaks
this.text = [];

@@ -596,3 +741,3 @@ return;

var fullText = trimBeginAndEnd(collapseWhitespace(trimmed));
var fullText = trimmingFunction(trimmed);
var blockMap = [];

@@ -607,3 +752,3 @@ var currentIndexOfString = 0;

var textMap = _step.value;
var shrunkText = trimBeginAndEnd(collapseWhitespace(textMap.string));
var shrunkText = trimmingFunction(textMap.string);

@@ -652,2 +797,13 @@ if (!shrunkText) {

}, {
key: "processText",
value: function processText() {
var trimEndSpaces = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : true;
if (trimEndSpaces) {
this.processTextAndTrim(trimAndCollapseWhitespace);
} else {
this.processTextAndTrim(trimAllExceptEndWhiteSpace);
}
}
}, {
key: "processElementNode",

@@ -678,3 +834,3 @@ value: function processElementNode(node, isOpening) {

case 'br':
this.processText();
this.processText(false);
this.processBreaks();

@@ -681,0 +837,0 @@ this.map.push({

2

package.json
{
"name": "degausser",
"version": "2.4.0",
"version": "2.4.1",
"description": "Transforms HTML to plain text by eliminating tags from a document.",

@@ -5,0 +5,0 @@ "author": "FlowPub",

@@ -5,3 +5,2 @@ import {

trimBeginAndEnd,
collapseWhitespace,
isCharWhitespace,

@@ -12,2 +11,4 @@ phrasingConstructs,

elementCanHaveAltText,
trimAllExceptEndWhiteSpace,
trimAndCollapseWhitespace,
} from './util'

@@ -59,6 +60,19 @@

case BreakType.DOUBLE:
this.map.push({
type: MapType.BREAK,
double: true,
})
let paragraphBreakAdded = false
// iterate through map backwards:
for (let i = this.map.length - 1; i >= 0; --i) {
const map = this.map[i]
if (map.type === MapType.BREAK && map.double) {
paragraphBreakAdded = true
break
} else if (!this.isSingleBreak(map)) {
break
}
}
if (!paragraphBreakAdded) {
this.map.push({
type: MapType.BREAK,
double: true,
})
}
break

@@ -70,3 +84,9 @@ }

processText() {
isSingleBreak(mapObject) {
const isSingleBreak = mapObject.type === MapType.BREAK && !mapObject.double
const isNewLine = mapObject.type === MapType.TEXT && mapObject.content === '\n'
return isSingleBreak || isNewLine
}
processTextAndTrim(trimmingFunction) {
if (this.text.length === 0) {

@@ -78,7 +98,6 @@ return

// TODO: might have to check for null string here
const trimmed = trimBeginAndEnd(joinedText)
const trimmed = trimmingFunction(joinedText)
if (!trimmed) {
// Trimmed into an empty string
// Preserve all preceding breaks
// Preserve all preceding breaks
this.text = []

@@ -88,3 +107,3 @@ return

let fullText = trimBeginAndEnd(collapseWhitespace(trimmed))
let fullText = trimmingFunction(trimmed)

@@ -95,3 +114,3 @@ let blockMap = []

for (const textMap of this.text) {
const shrunkText = trimBeginAndEnd(collapseWhitespace(textMap.string))
const shrunkText = trimmingFunction(textMap.string)
if (!shrunkText) {

@@ -140,2 +159,10 @@ continue

processText(trimEndSpaces = true) {
if (trimEndSpaces) {
this.processTextAndTrim(trimAndCollapseWhitespace)
} else {
this.processTextAndTrim(trimAllExceptEndWhiteSpace)
}
}
processElementNode(node, isOpening) {

@@ -176,3 +203,3 @@ if (

case 'br':
this.processText()
this.processText(false)
this.processBreaks()

@@ -179,0 +206,0 @@

@@ -5,3 +5,2 @@ import {

trimBeginAndEnd,
collapseWhitespace,
phrasingConstructs,

@@ -11,2 +10,4 @@ isElementBlacklisted,

elementCanHaveAltText,
trimAndCollapseWhitespace,
trimAllExceptEndWhiteSpace,
} from './util'

@@ -49,3 +50,18 @@

case BreakType.DOUBLE:
this.runs.push('\n\n')
let paragraphBreakAdded = false
// iterate through runs backwards:
for (let i = this.runs.length - 1; i >= 0; i--) {
const run = this.runs[i]
if (run === '\n\n') {
// found double break
paragraphBreakAdded = true
break
} else if (run !== '\n') {
// found text content
break
}
}
if (!paragraphBreakAdded) {
this.runs.push('\n\n')
}
break

@@ -57,3 +73,3 @@ }

processText() {
processTextAndTrim(trimmingFunction) {
if (this.text.length === 0) {

@@ -64,6 +80,6 @@ return

// Trim
const trimmed = trimBeginAndEnd(this.text.join(''))
const trimmed = trimmingFunction(this.text.join(''))
if (!trimmed) {
// Trimmed into an empty string
// Preserve all preceding breaks
// Preserve all preceding breaks
this.text = []

@@ -77,6 +93,14 @@ return

this.runs.push(trimBeginAndEnd(collapseWhitespace(trimmed)))
this.runs.push(trimmingFunction(trimmed))
this.text = []
}
processText(trimEndSpaces = true) {
if (trimEndSpaces) {
this.processTextAndTrim(trimAndCollapseWhitespace)
} else {
this.processTextAndTrim(trimAllExceptEndWhiteSpace)
}
}
processElementNode(node, isOpening) {

@@ -111,3 +135,3 @@ if (

case 'br':
this.processText()
this.processText(false)
this.processBreaks()

@@ -114,0 +138,0 @@ this.runs.push('\n')

@@ -14,2 +14,6 @@ function autoBind() {

const isCharNewLine = (charCode) => {
return charCode === 10 || charCode === 13
}
const BreakType = {

@@ -21,2 +25,82 @@ NONE: 'none',

/**
* Trim whitespace from the start of the string
* @param string
* @returns { string }
*/
const trimBeginOnly = (string) => {
// Get the first non-whitespace character index
let firstNonWhite = null
for (let index = 0; index < string.length; index++) {
if (!isCharWhitespace(string.charCodeAt(index))) {
firstNonWhite = index
break
}
}
// If the first non-whitespace character is null, the string is entirely whitespace
if (firstNonWhite === null) {
return string
}
// Return the non-empty sections of the string
return string.slice(firstNonWhite)
}
/**
* Trim any new line characters from the end of the string
* Also trim any whitespace that comes after that new line character, but not any that comes before.
* @param string
* @returns {*}
*/
const trimEndNewLine = (string) => {
let lastNonNewLine = null
let foundNewLineCharacter = false
let foundNonWhiteSpaceCharacter = false
for (let index = string.length - 1; index >= 0; index--) {
const charCode = string.charCodeAt(index)
const isNewLine = isCharNewLine(charCode)
if (isCharWhitespace(charCode)) {
if (!isNewLine) {
// okay to trim out any white space
continue
} else {
foundNewLineCharacter = true
}
} else {
foundNonWhiteSpaceCharacter = true
}
if (!isNewLine) {
if (foundNewLineCharacter) {
lastNonNewLine = index
}
break
}
}
if (!foundNonWhiteSpaceCharacter) {
return null
}
// If both are null, the string is entirely whitespace
if (lastNonNewLine === null) {
return string
}
// Return the non-empty sections of the string
return string.slice(
0,
lastNonNewLine ? lastNonNewLine + 1 : undefined,
)
}
/**
* Trims any whitespace at the start and trims any newline characters at the end of the string.
* Trims any whitespace after newline characters at the end of the string, but not any that comes before.
* @param string
* @returns {*}
*/
const trimAllExceptEndWhiteSpace = (string) => {
return trimEndNewLine(trimBeginOnly(string))
}
const trimBeginAndEnd = (string) => {

@@ -53,2 +137,3 @@ // Get the first and last non-whitespace character index

}
const collapseWhitespace = (string) => {

@@ -84,2 +169,6 @@ // Collapse all other sequential whitespace into a single whitespace

const trimAndCollapseWhitespace = (string) => {
return trimBeginAndEnd(collapseWhitespace(string))
}
const blacklist = [

@@ -257,3 +346,7 @@ 'base',

BreakType,
trimBeginOnly,
trimEndNewLine,
trimBeginAndEnd,
trimAllExceptEndWhiteSpace,
trimAndCollapseWhitespace,
collapseWhitespace,

@@ -260,0 +353,0 @@ phrasingConstructs,

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc