Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

html-to-text

Package Overview
Dependencies
Maintainers
1
Versions
55
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

html-to-text - npm Package Compare versions

Comparing version 0.0.3 to 0.0.4

bin/cli.js

2

example/html-to-text.js

@@ -14,3 +14,3 @@ var path = require('path');

htmlToText.fromFile(path.join(__dirname, 'test.html'), {
tables: ['invoice', 'address']
tables: ['#invoice', '.address']
}, function(err, text) {

@@ -17,0 +17,0 @@ if (err) return console.error(err);

@@ -8,3 +8,11 @@ var fs = require('fs');

var helper = require('./helper');
function htmlToText(html, options) {
options = options || {};
_.defaults(options, {
wordwrap: 80,
tables: []
});
var handler = new htmlparser.DefaultHandler(function (error, dom) {

@@ -16,4 +24,4 @@

});
var parser = new htmlparser.Parser(handler);
parser.parseComplete(html);
new htmlparser.Parser(handler).parseComplete(html);
var result = buildText(filterBody(handler.dom), options);

@@ -43,6 +51,2 @@ return _s.strip(result);

function convertHtmlSpecials(text) {
return text.replace(/ | /, ' ').replace('€', '€');
}
function wordwrap(text, max) {

@@ -74,4 +78,4 @@ if (text.length > max) {

text = _s.strip(text);
text = convertHtmlSpecials(text);
text = wordwrap(text, options.wordwrap || 80);
text = helper.decodeHTMLEntities(text);
text = wordwrap(text, options.wordwrap);
return text;

@@ -96,2 +100,6 @@ }

function formatHorizontalLine(elem, fn, options) {
return '\n' + _s.repeat('-', options.wordwrap) + '\n';
}
function tableToString(table) {

@@ -160,5 +168,19 @@ // Determine space width per column

function containsTable(attr, tables) {
if (tables === true) return true;
function removePrefix(key) {
return key.substr(1);
}
function checkPrefix(prefix) {
return function(key) {
return _s.startsWith(key, prefix);
};
}
var classes = _(tables).chain().filter(checkPrefix('.')).map(removePrefix).value();
var ids = _(tables).chain().filter(checkPrefix('#')).map(removePrefix).value();
return attr && (_.include(classes, attr.class) || _.include(ids, attr.id));
}
function buildText(dom, options) {
options = options || {};
var tables = options.tables || [];
function walk(dom) {

@@ -185,4 +207,7 @@ var result = '';

break;
case 'hr':
result += formatHorizontalLine(elem, walk, options);
break;
case 'table':
if (elem.attribs && elem.attribs.class && _.include(tables, elem.attribs.class)) {
if (containsTable(elem.attribs, options.tables)) {
result += formatTable(elem, walk);

@@ -189,0 +214,0 @@ break;

{
"name": "html-to-text",
"version": "0.0.3",
"description": "Simple html to text converter",
"version": "0.0.4",
"description": "Simple html to plain text converter",
"main": "index.js",

@@ -14,9 +14,11 @@ "scripts": {

"underscore": "1.x.x",
"underscore.string": "2.x.x"
"underscore.string": "2.x.x",
"optimist": "0.x.x"
},
"keywords": [
"html",
"html",
"node",
"text",
"mail",
"plain",
"converter"

@@ -26,3 +28,6 @@ ],

"node": "*"
},
"bin": {
"html-to-text": "./bin/cli.js"
}
}

@@ -26,3 +26,3 @@ # node-html-to-text

htmlToText.fromFile(path.join(__dirname, 'test.html'), {
tables: ['invoice', 'address']
tables: ['#invoice', '.address']
}, function(err, text) {

@@ -49,6 +49,23 @@ if (err) return console.error(err);

* `tables` allows to select certain tables by the `class` attribute from the HTML document. This is necessary because the majority of HTML E-Mails uses a table based layout. So you have to define which tables should be treaded as `table`. All other tables are ignored.
* `wordwrap` defines after how many chars a line break should follow in `p` elements.
* `tables` allows to select certain tables by the `class` or `id` attribute from the HTML document. This is necessary because the majority of HTML E-Mails uses a table based layout. Prefix your table selectors with an `.` for the `class` and with a `#` for the `id` attribute. All other tables are ignored. You can assign `true` to this attribute to select all tables. Default: `[]`
* `wordwrap` defines after how many chars a line break should follow in `p` elements. Default: `80`
## Command Line Interface
It is possible to use html-to-text as command line interface. This allows an easy validation of your generated text and the integration in other systems that does not run on node.js.
`html-to-text` uses `stdin` and `stdout` for data in and output. So you can use `html-to-html` the following way:
```
cat examples/test.html | html-to-text > test.txt
```
There also all options available as described above. You can use them like this:
```
cat examples/test.html | html-to-text --tables=#invoice,.address --wordwrap=100 > test.txt
```
The `tables` option has to be declared as comma separated list without whitespaces.
## Example

@@ -81,3 +98,3 @@

<td>
<table>
<table id="invoice">
<tr>

@@ -97,3 +114,3 @@ <th>Article</th>

</td>
<td align="right" valign="top">6,99€</td>
<td align="right" valign="top">6,99&euro;</td>
<td align="right" valign="top">7%</td>

@@ -113,3 +130,3 @@ <td align="right" valign="top">1</td>

<td>&nbsp;</td>
<td>to pay: 10,24€</td>
<td colspan="3">to pay: 10,24€</td>
</tr>

@@ -129,3 +146,3 @@ <tr>

<hr />
<table>
<table class="address">
<tr>

@@ -197,8 +214,10 @@ <th align="left">Invoice Address</th>

ARTICLE PRICE TAXES AMOUNT TOTAL
Product 1 6,99€ 7% 1 6,99€
Contains: 1x Product 1
Shipment costs 3,25€ 7% 1 3,25€
to pay: 10,24€
Taxes 7%: 0,72€
Product 1 6,99€ 7% 1 6,99€
Contains: 1x Product 1
Shipment costs 3,25€ 7% 1 3,25€
to pay: 10,24€
Taxes 7%: 0,72€
--------------------------------------------------------------------------------
INVOICE ADDRESS SHIPMENT ADDRESS

@@ -210,2 +229,4 @@ Mr. Mr.

--------------------------------------------------------------------------------
LAW OF REVOCATION

@@ -218,2 +239,4 @@ At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd no sea

--------------------------------------------------------------------------------
TERMS OF CONDITION

@@ -220,0 +243,0 @@ At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd no sea

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc