html-to-text
Advanced tools
Comparing version 0.0.8 to 0.1.0
@@ -9,3 +9,3 @@ var _ = require('underscore'); | ||
text = helper.decodeHTMLEntities(text); | ||
return helper.wordwrap(text, options.wordwrap); | ||
return helper.wordwrap(elem.needsSpace ? ' ' + text : text, options.wordwrap); | ||
}; | ||
@@ -25,9 +25,19 @@ | ||
// If we have both href and anchor text, format it in a useful manner: | ||
// - "anchor text [href]" | ||
// Otherwise if we have only anchor text or an href, we return the part we have: | ||
// - "anchor text" or | ||
// - "href" | ||
function formatAnchor(elem, fn, options) { | ||
var href = ''; | ||
// Always get the anchor text | ||
var result = _s.strip(fn(elem.children || [], options)); | ||
// Get the href, if present | ||
if (elem.attribs && elem.attribs.href) { | ||
return elem.attribs.href.replace(/^mailto\:/, ''); | ||
} | ||
else { | ||
return helper.wordwrap(helper.decodeHTMLEntities(_s.strip(elem.raw)), options.wordwrap); | ||
} | ||
href = elem.attribs.href.replace(/^mailto\:/, ''); | ||
} | ||
if (result && href) { | ||
result += ' [' + href + ']'; | ||
} | ||
return formatText({ raw: result || href, needsSpace: elem.needsSpace }, options); | ||
}; | ||
@@ -34,0 +44,0 @@ |
@@ -53,5 +53,6 @@ var _ = require('underscore'); | ||
exports.wordwrap = function wordwrap(text, max) { | ||
var result = ''; | ||
// Preserve leading space | ||
var result = _s.startsWith(text, ' ') ? ' ' : ''; | ||
var words = _s.words(text); | ||
var length = 0; | ||
var length = result.length; | ||
var buffer = []; | ||
@@ -58,0 +59,0 @@ _.each(words, function(word) { |
@@ -76,2 +76,3 @@ var fs = require('fs'); | ||
var result = ''; | ||
var whiteSpaceRegex = /\S$/; | ||
_.each(dom, function(elem) { | ||
@@ -82,2 +83,5 @@ switch(elem.type) { | ||
case 'a': | ||
// Inline element needs a leading space if `result` currently | ||
// doesn't end with whitespace | ||
elem.needsSpace = whiteSpaceRegex.test(result); | ||
result += format.anchor(elem, walk, options); | ||
@@ -116,3 +120,8 @@ break; | ||
case 'text': | ||
if (elem.raw !== '\r\n') result += format.text(elem, options); | ||
if (elem.raw !== '\r\n') { | ||
// Text needs a leading space if `result` currently | ||
// doesn't end with whitespace | ||
elem.needsSpace = whiteSpaceRegex.test(result); | ||
result += format.text(elem, options); | ||
} | ||
break; | ||
@@ -119,0 +128,0 @@ default: |
{ | ||
"name": "html-to-text", | ||
"version": "0.0.8", | ||
"description": "Simple html to plain text converter", | ||
"version": "0.1.0", | ||
"description": "Advanced html to plain text converter", | ||
"main": "index.js", | ||
"scripts": { | ||
}, | ||
"author": "Malte Legenhausen", | ||
"license": "MIT", | ||
"author": { | ||
"name": "Malte Legenhausen", | ||
"email": "legenhausen@werk85.de" | ||
}, | ||
"homepage": "https://github.com/werk85/node-html-to-text", | ||
"licenses": [ | ||
{ | ||
"type": "MIT", | ||
"url": "https://github.com/werk85/node-html-to-text/blob/master/LICENSE-MIT" | ||
} | ||
], | ||
"repository": { | ||
"type": "git", | ||
"url": "git://github.com/werk85/node-html-to-text.git" | ||
}, | ||
"bugs": { | ||
"url": "https://github.com/werk85/node-html-to-text/issues" | ||
}, | ||
"dependencies": { | ||
"htmlparser": "1.x.x", | ||
"underscore": "1.x.x", | ||
"underscore.string": "2.x.x", | ||
"htmlparser": "1.x.x", | ||
"underscore": "1.x.x", | ||
"underscore.string": "2.x.x", | ||
"optimist": "0.x.x" | ||
@@ -19,10 +34,10 @@ }, | ||
"html", | ||
"node", | ||
"text", | ||
"mail", | ||
"node", | ||
"text", | ||
"mail", | ||
"plain", | ||
"converter" | ||
"converter" | ||
], | ||
"engines": { | ||
"node": "~0.8.0" | ||
"node": "~0.8.0" | ||
}, | ||
@@ -29,0 +44,0 @@ "bin": { |
@@ -212,3 +212,3 @@ # node-html-to-text | ||
et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea | ||
takimata sanctus est Lorem ipsum dolor sit amet.www.github.com | ||
takimata sanctus est Lorem ipsum dolor sit amet. Github [www.github.com] | ||
@@ -260,3 +260,3 @@ At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd | ||
Somewhere | ||
E-Mail:test@example.com | ||
E-Mail: Click here [test@example.com] | ||
``` | ||
@@ -263,0 +263,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No bug tracker
MaintenancePackage does not have a linked bug tracker in package.json.
Found 1 instance in 1 package
No repository
Supply chain riskPackage does not have a linked source code repository. Without this field, a package will have no reference to the location of the source code use to generate the package.
Found 1 instance in 1 package
No website
QualityPackage does not have a website.
Found 1 instance in 1 package
30119
12
379
0
1
1