html-to-text
Advanced tools
Comparing version 0.0.3 to 0.0.4
@@ -14,3 +14,3 @@ var path = require('path'); | ||
htmlToText.fromFile(path.join(__dirname, 'test.html'), { | ||
tables: ['invoice', 'address'] | ||
tables: ['#invoice', '.address'] | ||
}, function(err, text) { | ||
@@ -17,0 +17,0 @@ if (err) return console.error(err); |
@@ -8,3 +8,11 @@ var fs = require('fs'); | ||
var helper = require('./helper'); | ||
function htmlToText(html, options) { | ||
options = options || {}; | ||
_.defaults(options, { | ||
wordwrap: 80, | ||
tables: [] | ||
}); | ||
var handler = new htmlparser.DefaultHandler(function (error, dom) { | ||
@@ -16,4 +24,4 @@ | ||
}); | ||
var parser = new htmlparser.Parser(handler); | ||
parser.parseComplete(html); | ||
new htmlparser.Parser(handler).parseComplete(html); | ||
var result = buildText(filterBody(handler.dom), options); | ||
@@ -43,6 +51,2 @@ return _s.strip(result); | ||
function convertHtmlSpecials(text) { | ||
return text.replace(/ | /, ' ').replace('€', '€'); | ||
} | ||
function wordwrap(text, max) { | ||
@@ -74,4 +78,4 @@ if (text.length > max) { | ||
text = _s.strip(text); | ||
text = convertHtmlSpecials(text); | ||
text = wordwrap(text, options.wordwrap || 80); | ||
text = helper.decodeHTMLEntities(text); | ||
text = wordwrap(text, options.wordwrap); | ||
return text; | ||
@@ -96,2 +100,6 @@ } | ||
function formatHorizontalLine(elem, fn, options) { | ||
return '\n' + _s.repeat('-', options.wordwrap) + '\n'; | ||
} | ||
function tableToString(table) { | ||
@@ -160,5 +168,19 @@ // Determine space width per column | ||
function containsTable(attr, tables) { | ||
if (tables === true) return true; | ||
function removePrefix(key) { | ||
return key.substr(1); | ||
} | ||
function checkPrefix(prefix) { | ||
return function(key) { | ||
return _s.startsWith(key, prefix); | ||
}; | ||
} | ||
var classes = _(tables).chain().filter(checkPrefix('.')).map(removePrefix).value(); | ||
var ids = _(tables).chain().filter(checkPrefix('#')).map(removePrefix).value(); | ||
return attr && (_.include(classes, attr.class) || _.include(ids, attr.id)); | ||
} | ||
function buildText(dom, options) { | ||
options = options || {}; | ||
var tables = options.tables || []; | ||
function walk(dom) { | ||
@@ -185,4 +207,7 @@ var result = ''; | ||
break; | ||
case 'hr': | ||
result += formatHorizontalLine(elem, walk, options); | ||
break; | ||
case 'table': | ||
if (elem.attribs && elem.attribs.class && _.include(tables, elem.attribs.class)) { | ||
if (containsTable(elem.attribs, options.tables)) { | ||
result += formatTable(elem, walk); | ||
@@ -189,0 +214,0 @@ break; |
{ | ||
"name": "html-to-text", | ||
"version": "0.0.3", | ||
"description": "Simple html to text converter", | ||
"version": "0.0.4", | ||
"description": "Simple html to plain text converter", | ||
"main": "index.js", | ||
@@ -14,9 +14,11 @@ "scripts": { | ||
"underscore": "1.x.x", | ||
"underscore.string": "2.x.x" | ||
"underscore.string": "2.x.x", | ||
"optimist": "0.x.x" | ||
}, | ||
"keywords": [ | ||
"html", | ||
"html", | ||
"node", | ||
"text", | ||
"mail", | ||
"plain", | ||
"converter" | ||
@@ -26,3 +28,6 @@ ], | ||
"node": "*" | ||
}, | ||
"bin": { | ||
"html-to-text": "./bin/cli.js" | ||
} | ||
} |
@@ -26,3 +26,3 @@ # node-html-to-text | ||
htmlToText.fromFile(path.join(__dirname, 'test.html'), { | ||
tables: ['invoice', 'address'] | ||
tables: ['#invoice', '.address'] | ||
}, function(err, text) { | ||
@@ -49,6 +49,23 @@ if (err) return console.error(err); | ||
* `tables` allows to select certain tables by the `class` attribute from the HTML document. This is necessary because the majority of HTML E-Mails uses a table based layout. So you have to define which tables should be treaded as `table`. All other tables are ignored. | ||
* `wordwrap` defines after how many chars a line break should follow in `p` elements. | ||
* `tables` allows to select certain tables by the `class` or `id` attribute from the HTML document. This is necessary because the majority of HTML E-Mails uses a table based layout. Prefix your table selectors with an `.` for the `class` and with a `#` for the `id` attribute. All other tables are ignored. You can assign `true` to this attribute to select all tables. Default: `[]` | ||
* `wordwrap` defines after how many chars a line break should follow in `p` elements. Default: `80` | ||
## Command Line Interface | ||
It is possible to use html-to-text as command line interface. This allows an easy validation of your generated text and the integration in other systems that does not run on node.js. | ||
`html-to-text` uses `stdin` and `stdout` for data in and output. So you can use `html-to-html` the following way: | ||
``` | ||
cat examples/test.html | html-to-text > test.txt | ||
``` | ||
There also all options available as described above. You can use them like this: | ||
``` | ||
cat examples/test.html | html-to-text --tables=#invoice,.address --wordwrap=100 > test.txt | ||
``` | ||
The `tables` option has to be declared as comma separated list without whitespaces. | ||
## Example | ||
@@ -81,3 +98,3 @@ | ||
<td> | ||
<table> | ||
<table id="invoice"> | ||
<tr> | ||
@@ -97,3 +114,3 @@ <th>Article</th> | ||
</td> | ||
<td align="right" valign="top">6,99€</td> | ||
<td align="right" valign="top">6,99€</td> | ||
<td align="right" valign="top">7%</td> | ||
@@ -113,3 +130,3 @@ <td align="right" valign="top">1</td> | ||
<td> </td> | ||
<td>to pay: 10,24€</td> | ||
<td colspan="3">to pay: 10,24€</td> | ||
</tr> | ||
@@ -129,3 +146,3 @@ <tr> | ||
<hr /> | ||
<table> | ||
<table class="address"> | ||
<tr> | ||
@@ -197,8 +214,10 @@ <th align="left">Invoice Address</th> | ||
ARTICLE PRICE TAXES AMOUNT TOTAL | ||
Product 1 6,99€ 7% 1 6,99€ | ||
Contains: 1x Product 1 | ||
Shipment costs 3,25€ 7% 1 3,25€ | ||
to pay: 10,24€ | ||
Taxes 7%: 0,72€ | ||
Product 1 6,99€ 7% 1 6,99€ | ||
Contains: 1x Product 1 | ||
Shipment costs 3,25€ 7% 1 3,25€ | ||
to pay: 10,24€ | ||
Taxes 7%: 0,72€ | ||
-------------------------------------------------------------------------------- | ||
INVOICE ADDRESS SHIPMENT ADDRESS | ||
@@ -210,2 +229,4 @@ Mr. Mr. | ||
-------------------------------------------------------------------------------- | ||
LAW OF REVOCATION | ||
@@ -218,2 +239,4 @@ At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd no sea | ||
-------------------------------------------------------------------------------- | ||
TERMS OF CONDITION | ||
@@ -220,0 +243,0 @@ At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd no sea |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
27390
9
290
276
4
+ Addedoptimist@0.x.x
+ Addedminimist@0.0.10(transitive)
+ Addedoptimist@0.6.1(transitive)
+ Addedwordwrap@0.0.3(transitive)