html-to-text
Advanced tools
Comparing version 2.1.3 to 3.0.0
# Changelog | ||
## Version 3.0.0 | ||
* Switched from `htmlparser` to `htmlparser2` #113 | ||
* Treat non-numeric colspans as zero and handle them gracefully #105 | ||
## Version 2.1.1 | ||
@@ -4,0 +9,0 @@ |
@@ -8,3 +8,3 @@ var _ = require('underscore'); | ||
function formatText(elem, options) { | ||
var text = elem.raw; | ||
var text = elem.data || ""; | ||
text = he.decode(text, options.decodeOptions); | ||
@@ -86,3 +86,3 @@ | ||
return formatText({ raw: result || href, trimLeadingSpace: elem.trimLeadingSpace }, options); | ||
return formatText({ data: result || href, trimLeadingSpace: elem.trimLeadingSpace }, options); | ||
} | ||
@@ -113,3 +113,3 @@ | ||
var nonWhiteSpaceChildren = (elem.children || []).filter(function(child) { | ||
return child.type !== 'text' || !whiteSpaceRegex.test(child.raw); | ||
return child.type !== 'text' || !whiteSpaceRegex.test(child.data); | ||
}); | ||
@@ -125,3 +125,3 @@ _.each(nonWhiteSpaceChildren, function(elem) { | ||
var nonWhiteSpaceChildren = (elem.children || []).filter(function(child) { | ||
return child.type !== 'text' || !whiteSpaceRegex.test(child.raw); | ||
return child.type !== 'text' || !whiteSpaceRegex.test(child.data); | ||
}); | ||
@@ -203,3 +203,3 @@ // Make sure there are list items present | ||
if (elem.attribs && elem.attribs.colspan) { | ||
times = elem.attribs.colspan - 1; | ||
times = elem.attribs.colspan - 1 || 0; | ||
_.times(times, function() { | ||
@@ -206,0 +206,0 @@ rows.push(['']); |
@@ -6,3 +6,3 @@ var fs = require('fs'); | ||
var _s = require('underscore.string'); | ||
var htmlparser = require('htmlparser'); | ||
var htmlparser = require('htmlparser2'); | ||
@@ -160,3 +160,3 @@ var helper = require('./helper'); | ||
case 'text': | ||
if (elem.raw !== '\r\n') { | ||
if (elem.data !== '\r\n') { | ||
// Text needs its leading space to be trimmed if `result` | ||
@@ -163,0 +163,0 @@ // currently ends with whitespace |
{ | ||
"name": "html-to-text", | ||
"version": "2.1.3", | ||
"version": "3.0.0", | ||
"description": "Advanced html to plain text converter", | ||
@@ -25,3 +25,3 @@ "main": "index.js", | ||
"he": "^1.0.0", | ||
"htmlparser": "^1.7.7", | ||
"htmlparser2": "^3.9.2", | ||
"optimist": "^0.6.1", | ||
@@ -47,4 +47,4 @@ "underscore": "^1.8.3", | ||
"chai": "^3.5.0", | ||
"mocha": "^2.4.5" | ||
"mocha": "^3.0.0" | ||
} | ||
} |
@@ -171,2 +171,19 @@ var expect = require('chai').expect; | ||
}); | ||
it('does handle non-integer colspan on td element gracefully', function () { | ||
var html = 'Good morning Jacob, \ | ||
<TABLE> \ | ||
<CENTER> \ | ||
<TBODY> \ | ||
<TR> \ | ||
<TD colspan="abc">Lorem ipsum dolor sit amet.</TD> \ | ||
</TR> \ | ||
</CENTER> \ | ||
</TBODY> \ | ||
</TABLE> \ | ||
'; | ||
var resultExpected = 'Good morning Jacob, Lorem ipsum dolor sit amet.'; | ||
var result = htmlToText.fromString(html, { tables: true }); | ||
expect(result).to.equal(resultExpected); | ||
}); | ||
}); | ||
@@ -218,3 +235,3 @@ | ||
}); | ||
}); | ||
@@ -347,3 +364,3 @@ | ||
}); | ||
it('should not wrap a string if not wrapCharacters are found and forceWrapOnLimit is not set', function() { | ||
@@ -440,2 +457,18 @@ var testString = '<p>_This_string_is_meant_to_test_if_a_string_is_split_properly_across_anewlineandlong\nword_with_following_text.</p>'; | ||
}); | ||
describe('wbr', function() { | ||
it('should handle a large number of wbr tags w/o stack overflow', function() { | ||
var testString = "<!DOCTYPE html><html><head></head><body>\n"; | ||
var expectedResult = ""; | ||
for (var i = 0; i < 1000; i++){ | ||
if (i !== 0 && i % 80 === 0) { | ||
expectedResult += "\n"; | ||
} | ||
expectedResult += "n"; | ||
testString += "<wbr>n"; | ||
} | ||
testString += "</body></html>"; | ||
expect(htmlToText.fromString(testString)).to.equal(expectedResult); | ||
}); | ||
}) | ||
}); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
70679
938
+ Addedhtmlparser2@^3.9.2
+ Addeddom-serializer@0.2.2(transitive)
+ Addeddomelementtype@1.3.12.3.0(transitive)
+ Addeddomhandler@2.4.2(transitive)
+ Addeddomutils@1.7.0(transitive)
+ Addedentities@1.1.22.2.0(transitive)
+ Addedhtmlparser2@3.10.1(transitive)
+ Addedinherits@2.0.4(transitive)
+ Addedreadable-stream@3.6.2(transitive)
+ Addedsafe-buffer@5.2.1(transitive)
+ Addedstring_decoder@1.3.0(transitive)
- Removedhtmlparser@^1.7.7
- Removedhtmlparser@1.7.7(transitive)