html-to-text
Advanced tools
Comparing version 1.1.1 to 1.2.0
@@ -7,5 +7,10 @@ var _ = require('underscore'); | ||
function formatText(elem, options) { | ||
var text = _s.strip(elem.raw); | ||
var text = (options.isInPre ? elem.raw : _s.strip(elem.raw)); | ||
text = helper.decodeHTMLEntities(text); | ||
return helper.wordwrap(elem.needsSpace ? ' ' + text : text, options.wordwrap); | ||
if (options.isInPre) { | ||
return text; | ||
} else { | ||
return helper.wordwrap(elem.needsSpace ? ' ' + text : text, options.wordwrap); | ||
} | ||
} | ||
@@ -56,3 +61,8 @@ | ||
if (href) { | ||
result += ' [' + href + ']'; | ||
if (options.linkHrefBaseUrl && href.indexOf('/') == 0) { | ||
href = options.linkHrefBaseUrl + href; | ||
} | ||
if (!options.hideLinkHrefIfSameAsText || href != result) { | ||
result += ' [' + href + ']'; | ||
} | ||
} | ||
@@ -63,3 +73,3 @@ return formatText({ raw: result || href, needsSpace: elem.needsSpace }, options); | ||
function formatHorizontalLine(elem, fn, options) { | ||
return _s.repeat('-', options.wordwrap) + '\n\n'; | ||
return '\n' + _s.repeat('-', options.wordwrap) + '\n\n'; | ||
} | ||
@@ -66,0 +76,0 @@ |
@@ -59,3 +59,3 @@ var _ = require('underscore'); | ||
_.each(words, function(word) { | ||
if (length + word.length > max) { | ||
if ((max || max === 0) && length + word.length > max) { | ||
// Concat buffer and add it to the result | ||
@@ -62,0 +62,0 @@ result += buffer.join(' ') + '\n'; |
@@ -21,3 +21,5 @@ var fs = require('fs'); | ||
wordwrap: 80, | ||
tables: [] | ||
tables: [], | ||
hideLinkHrefIfSameAsText: false, | ||
linkHrefBaseUrl: null, | ||
}); | ||
@@ -75,4 +77,6 @@ | ||
function walk(dom, options) { | ||
var result = ''; | ||
function walk(dom, options, result) { | ||
if (arguments.length < 3) { | ||
result = ''; | ||
} | ||
var whiteSpaceRegex = /\S$/; | ||
@@ -113,2 +117,7 @@ _.each(dom, function(elem) { | ||
break; | ||
case 'pre': | ||
var newOptions = _(options).clone(); | ||
newOptions.isInPre = true; | ||
result += format.paragraph(elem, walk, newOptions); | ||
break; | ||
case 'table': | ||
@@ -120,3 +129,3 @@ if (containsTable(elem.attribs, options.tables)) { | ||
default: | ||
result += walk(elem.children || [], options); | ||
result = walk(elem.children || [], options, result); | ||
} | ||
@@ -134,3 +143,3 @@ break; | ||
if (!_.include(SKIP_TYPES, elem.type)) { | ||
result += walk(elem.children || [], options); | ||
result = walk(elem.children || [], options, result); | ||
} | ||
@@ -137,0 +146,0 @@ } |
{ | ||
"name": "html-to-text", | ||
"version": "1.1.1", | ||
"version": "1.2.0", | ||
"description": "Advanced html to plain text converter", | ||
"main": "index.js", | ||
"scripts": { | ||
"test": "node_modules/.bin/mocha" | ||
}, | ||
@@ -30,3 +31,3 @@ "author": { | ||
"underscore.string": "2.x.x", | ||
"optimist": "0.x.x" | ||
"optimist": "0.x.x" | ||
}, | ||
@@ -46,3 +47,8 @@ "keywords": [ | ||
"html-to-text": "./bin/cli.js" | ||
}, | ||
"devDependencies": { | ||
"chai": "^1.10.0", | ||
"install": "^0.1.8", | ||
"mocha": "^2.1.0" | ||
} | ||
} |
@@ -55,3 +55,5 @@ # node-html-to-text | ||
* `tables` allows to select certain tables by the `class` or `id` attribute from the HTML document. This is necessary because the majority of HTML E-Mails uses a table based layout. Prefix your table selectors with an `.` for the `class` and with a `#` for the `id` attribute. All other tables are ignored. You can assign `true` to this attribute to select all tables. Default: `[]` | ||
* `wordwrap` defines after how many chars a line break should follow in `p` elements. Default: `80` | ||
* `wordwrap` defines after how many chars a line break should follow in `p` elements. Set to `null` or `false` to disable word-wrapping. Default: `80` | ||
* `linkHrefBaseUrl` allows you to specify the server host for href attributes, where the links start at the root (`/`). For example, `linkHrefBaseUrl = 'http://asdf.com'` and `<a href='/dir/subdir'>...</a>` the link in the text will be `http://asdf.com/dir/subdir`. Keep in mind that `linkHrefBaseUrl` shouldn't end with a `/`. | ||
* `hideLinkHrefIfSameAsText` by default links are translated the following `<a href='link'>text</a>` => becomes => `text [link]`. If this option is set to true and `link` and `text` are the same, `[link]` will be hidden and only `text` visible. | ||
@@ -58,0 +60,0 @@ ## Command Line Interface |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
41147
14
465
296
3
2