Comparing version 2.2.0 to 2.2.1
(function (global, factory) { | ||
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) : | ||
typeof define === 'function' && define.amd ? define(['exports'], factory) : | ||
(global = global || self, factory(global.degausser = {})); | ||
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.degausser = {})); | ||
}(this, (function (exports) { 'use strict'; | ||
@@ -46,5 +46,8 @@ | ||
function _createForOfIteratorHelper(o) { | ||
function _createForOfIteratorHelper(o, allowArrayLike) { | ||
var it; | ||
if (typeof Symbol === "undefined" || o[Symbol.iterator] == null) { | ||
if (Array.isArray(o) || (o = _unsupportedIterableToArray(o))) { | ||
if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { | ||
if (it) o = it; | ||
var i = 0; | ||
@@ -75,4 +78,3 @@ | ||
var it, | ||
normalCompletion = true, | ||
var normalCompletion = true, | ||
didErr = false, | ||
@@ -190,21 +192,2 @@ err; | ||
var collectTrimmedWhitespace = function collectTrimmedWhitespace(string) { | ||
var spaces = []; | ||
var encounteredWhitespace = false; | ||
for (var index = 0; index < string.length; ++index) { | ||
if (isCharWhitespace(string.charCodeAt(index))) { | ||
if (encounteredWhitespace) { | ||
spaces.push(index); | ||
} else { | ||
encounteredWhitespace = true; | ||
} | ||
} else { | ||
encounteredWhitespace = false; | ||
} | ||
} | ||
return spaces; | ||
}; | ||
var blacklist = ['base', 'command', 'link', 'meta', 'noscript', 'script', 'style', 'title', // special cases | ||
@@ -482,3 +465,2 @@ // "html", | ||
var whitespace = collectTrimmedWhitespace(textMap.node.textContent); | ||
blockMap.push({ | ||
@@ -489,4 +471,3 @@ type: MapType.TEXT, | ||
length: shrunkText.length, | ||
content: shrunkText, | ||
whitespace: whitespace | ||
content: shrunkText | ||
}); | ||
@@ -645,6 +626,31 @@ fullText = fullText.slice(index + shrunkText.length); | ||
case MapType.TEXT: | ||
// TODO: Tests | ||
var whitespace = []; | ||
if (entity.node.nodeType === Node.TEXT_NODE || entity.node.tagName === 'img') { | ||
var nodeContent = entity.node.tagName === 'img' ? entity.node.getAttribute('alt').normalize() : entity.node.textContent.normalize(); | ||
for (var charInMap = 0, charInNode = 0; charInNode < nodeContent.length; ++charInNode) { | ||
var isEqual = entity.content.charAt(charInMap) === nodeContent.charAt(charInNode); | ||
var isMapWhitespace = isCharWhitespace(entity.content.charCodeAt(charInMap)); | ||
var isNodeWhitespace = isCharWhitespace(nodeContent.charCodeAt(charInNode)); | ||
if (isEqual || isMapWhitespace && isNodeWhitespace) { | ||
++charInMap; | ||
} else if (isMapWhitespace || isNodeWhitespace) { | ||
var skips = { | ||
after: charInMap - 1, | ||
position: charInNode | ||
}; | ||
whitespace.push(skips); | ||
} else { | ||
throw new Error("Degauss error, character mismatch and not a whitespace"); | ||
} | ||
} | ||
} | ||
result.push({ | ||
node: entity.node, | ||
content: entity.content, | ||
whitespace: entity.whitespace, | ||
whitespace: whitespace, | ||
start: runningIndex, | ||
@@ -778,3 +784,3 @@ length: entity.length | ||
if (whitespaceEntry <= adjustedStart) { | ||
if (whitespaceEntry.after < adjustedStart) { | ||
++skips; | ||
@@ -807,3 +813,3 @@ } | ||
if (_whitespaceEntry <= adjustedEnd) { | ||
if (_whitespaceEntry.after < adjustedEnd) { | ||
++_skips; | ||
@@ -810,0 +816,0 @@ } |
{ | ||
"name": "degausser", | ||
"version": "2.2.0", | ||
"version": "2.2.1", | ||
"description": "Transforms HTML to plain text by eliminating tags from a document.", | ||
@@ -32,4 +32,4 @@ "author": "FlowPub", | ||
"devDependencies": { | ||
"@babel/core": "7.12.3", | ||
"@babel/preset-env": "7.12.1", | ||
"@babel/core": "7.12.9", | ||
"@babel/preset-env": "7.12.7", | ||
"@rollup/plugin-node-resolve": "8.4.0", | ||
@@ -36,0 +36,0 @@ "glob": "7.1.6", |
@@ -46,3 +46,3 @@ import { StringCollector } from './stringCollector' | ||
for (const whitespaceEntry of entry.whitespace) { | ||
if (whitespaceEntry <= adjustedStart) { | ||
if (whitespaceEntry.after < adjustedStart) { | ||
++skips | ||
@@ -64,3 +64,3 @@ } | ||
for (const whitespaceEntry of entry.whitespace) { | ||
if (whitespaceEntry <= adjustedEnd) { | ||
if (whitespaceEntry.after < adjustedEnd) { | ||
++skips | ||
@@ -67,0 +67,0 @@ } |
@@ -6,3 +6,3 @@ import { | ||
collapseWhitespace, | ||
collectTrimmedWhitespace, | ||
isCharWhitespace, | ||
phrasingConstructs, | ||
@@ -98,4 +98,2 @@ } from './util' | ||
const whitespace = collectTrimmedWhitespace(textMap.node.textContent) | ||
blockMap.push({ | ||
@@ -107,3 +105,2 @@ type: MapType.TEXT, | ||
content: shrunkText, | ||
whitespace | ||
}) | ||
@@ -251,6 +248,34 @@ | ||
case MapType.TEXT: | ||
// TODO: Tests | ||
const whitespace = [] | ||
if (entity.node.nodeType === Node.TEXT_NODE || entity.node.tagName === 'img') { | ||
const nodeContent = entity.node.tagName === 'img' ? | ||
entity.node.getAttribute('alt').normalize() : | ||
entity.node.textContent.normalize() | ||
for (let charInMap = 0, charInNode = 0; charInNode < nodeContent.length; ++charInNode) { | ||
const isEqual = entity.content.charAt(charInMap) === nodeContent.charAt(charInNode) | ||
const isMapWhitespace = isCharWhitespace(entity.content.charCodeAt(charInMap)) | ||
const isNodeWhitespace = isCharWhitespace(nodeContent.charCodeAt(charInNode)) | ||
if (isEqual || (isMapWhitespace && isNodeWhitespace)) { | ||
++charInMap | ||
} else if (isMapWhitespace || isNodeWhitespace) { | ||
const skips = { | ||
after: charInMap - 1, | ||
position: charInNode | ||
} | ||
whitespace.push(skips) | ||
} else { | ||
throw new Error(`Degauss error, character mismatch and not a whitespace`) | ||
} | ||
} | ||
} | ||
result.push({ | ||
node: entity.node, | ||
content: entity.content, | ||
whitespace: entity.whitespace, | ||
whitespace: whitespace, | ||
start: runningIndex, | ||
@@ -257,0 +282,0 @@ length: entity.length, |
@@ -81,21 +81,2 @@ function autoBind() { | ||
const collectTrimmedWhitespace = (string) => { | ||
const spaces = [] | ||
let encounteredWhitespace = false | ||
for (let index = 0; index < string.length; ++index) { | ||
if (isCharWhitespace(string.charCodeAt(index))) { | ||
if (encounteredWhitespace) { | ||
spaces.push(index) | ||
} else { | ||
encounteredWhitespace = true | ||
} | ||
} else { | ||
encounteredWhitespace = false | ||
} | ||
} | ||
return spaces | ||
} | ||
const blacklist = [ | ||
@@ -173,4 +154,4 @@ 'base', | ||
collapseWhitespace, | ||
collectTrimmedWhitespace, | ||
phrasingConstructs, | ||
isCharWhitespace | ||
} |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
42576
1307