@@ -7,8 +7,8 @@ #!/usr/bin/env node
		var argv = optimist
		.string('tables')
		.default('wordwrap', 80)
		.default('ignore-href', false)
		.default('ignore-image', false)
		.default('noLinkBrackets', false)
		.argv;
		.string('tables')
		.default('wordwrap', 80)
		.default('ignore-href', false)
		.default('ignore-image', false)
		.default('noLinkBrackets', false)
		.argv;

		@@ -22,21 +22,21 @@ var text = '';
		process.stdin.on('data', function data(data) {
		text += data;
		text += data;
		});

		process.stdin.on('end', function end() {
		text = htmlToText.fromString(text, {
		tables: interpretTables(argv.tables),
		wordwrap: argv.wordwrap,
		ignoreHref: argv['ignore-href'],
		ignoreImage: argv['ignore-image'],
		noLinkBrackets: argv['noLinkBrackets']
		});
		process.stdout.write(text + '\n', 'utf-8');
		text = htmlToText.fromString(text, {
		tables: interpretTables(argv.tables),
		wordwrap: argv.wordwrap,
		ignoreHref: argv['ignore-href'],
		ignoreImage: argv['ignore-image'],
		noLinkBrackets: argv['noLinkBrackets']
		});
		process.stdout.write(text + '\n', 'utf-8');
		});

		function interpretTables(tables) {
		if (!tables \|\| tables === '' \|\| tables === 'false') {
		return [];
		}
		return tables === 'true' \|\| tables.split(',');
		if (!tables \|\| tables === '' \|\| tables === 'false') {
		return [];
		}
		return tables === 'true' \|\| tables.split(',');
		}

CHANGELOG.md

		# Changelog

		## Version 3.2.0

		* Basic support for alpha ordered list types added #122
		* This includes support for the `ol` type values `1`, `a` and `A`

		## Version 3.1.0

		* Support for the ordered list start attribute added #117
		* Option to format paragraph with single new line #112
		* `noLinksBrackets` options added #119
		* Support for the ordered list start attribute added #117
		* Option to format paragraph with single new line #112
		* `noLinksBrackets` options added #119

		## Version 3.0.0

		* Switched from `htmlparser` to `htmlparser2` #113
		* Treat non-numeric colspans as zero and handle them gracefully #105
		* Switched from `htmlparser` to `htmlparser2` #113
		* Treat non-numeric colspans as zero and handle them gracefully #105

		@@ -20,40 +25,40 @@ ## Version 2.1.1

		* New option to disable `uppercaseHeadings` added. #86
		* Starting point of html to text conversion can now be defined in the options via the `baseElement` option. #83
		* Support for long words added. The behaviour can be configured via the `longWordSplit` option. #83
		* New option to disable `uppercaseHeadings` added. #86
		* Starting point of html to text conversion can now be defined in the options via the `baseElement` option. #83
		* Support for long words added. The behaviour can be configured via the `longWordSplit` option. #83

		## Version 2.0.0

		* Unicode support added. #81
		* New option `decodeOptions` added.
		* Dependencies updated.
		* Unicode support added. #81
		* New option `decodeOptions` added.
		* Dependencies updated.

		Breaking Changes:

		* Minimum node version increased to >=0.10.0
		* Minimum node version increased to >=0.10.0

		## Version 1.6.2

		* Fixed: correctly handle HTML entities for images #82
		* Fixed: correctly handle HTML entities for images #82

		## Version 1.6.1

		* Fixed: using --tables=true doesn't produce the expected results. #80
		* Fixed: using --tables=true doesn't produce the expected results. #80

		## Version 1.6.0

		* Preserve newlines in text feature added #75
		* Preserve newlines in text feature added #75

		## Version 1.5.1

		* Support for h5 and h6 tags added #74
		* Support for h5 and h6 tags added #74

		## Version 1.5.0

		* Entity regex is now less greedy #69 #70
		* Entity regex is now less greedy #69 #70

		## Version 1.4.0

		* Uppercase tag processing added. Table center support added. #56
		* Unuused dependencies removed.
		* Uppercase tag processing added. Table center support added. #56
		* Unuused dependencies removed.

		@@ -60,0 +65,0 @@ ## Version 1.3.2

example/html-to-text.js

		@@ -7,3 +7,3 @@ var path = require('path');
		var text = htmlToText.fromString('<h1>Hello World</h1>', {
		wordwrap: 130
		wordwrap: 130
		});
		@@ -15,6 +15,6 @@ console.log(text);
		htmlToText.fromFile(path.join(__dirname, 'test.html'), {
		tables: ['#invoice', '.address']
		tables: ['#invoice', '.address']
		}, function(err, text) {
		if (err) return console.error(err);
		console.log(text);
		if (err) return console.error(err);
		console.log(text);
		});

347

lib/formatter.js

		@@ -8,49 +8,49 @@ var _ = require('underscore');
		function formatText(elem, options) {
		var text = elem.data \|\| "";
		text = he.decode(text, options.decodeOptions);
		var text = elem.data \|\| "";
		text = he.decode(text, options.decodeOptions);

		if (options.isInPre) {
		return text;
		} else {
		return helper.wordwrap(elem.trimLeadingSpace ? _s.lstrip(text) : text, options);
		}
		if (options.isInPre) {
		return text;
		} else {
		return helper.wordwrap(elem.trimLeadingSpace ? _s.lstrip(text) : text, options);
		}
		}

		function formatImage(elem, options) {
		if (options.ignoreImage) {
		return '';
		}
		if (options.ignoreImage) {
		return '';
		}

		var result = '', attribs = elem.attribs \|\| {};
		if (attribs.alt) {
		result += he.decode(attribs.alt, options.decodeOptions);
		if (attribs.src) {
		result += ' ';
		}
		}
		if (attribs.src) {
		result += '[' + attribs.src + ']';
		}
		return (result);
		var result = '', attribs = elem.attribs \|\| {};
		if (attribs.alt) {
		result += he.decode(attribs.alt, options.decodeOptions);
		if (attribs.src) {
		result += ' ';
		}
		}
		if (attribs.src) {
		result += '[' + attribs.src + ']';
		}
		return (result);
		}

		function formatLineBreak(elem, fn, options) {
		return '\n' + fn(elem.children, options);
		return '\n' + fn(elem.children, options);
		}

		function formatParagraph(elem, fn, options) {
		var paragraph = fn(elem.children, options)
		if (options.singleNewLineParagraphs) {
		return paragraph + '\n'
		} else {
		return paragraph + '\n\n'
		}
		var paragraph = fn(elem.children, options)
		if (options.singleNewLineParagraphs) {
		return paragraph + '\n'
		} else {
		return paragraph + '\n\n'
		}
		}

		function formatHeading(elem, fn, options) {
		var heading = fn(elem.children, options);
		if (options.uppercaseHeadings) {
		heading = heading.toUpperCase();
		}
		return heading + '\n';
		var heading = fn(elem.children, options);
		if (options.uppercaseHeadings) {
		heading = heading.toUpperCase();
		}
		return heading + '\n';
		}
		@@ -64,52 +64,52 @@
		function formatAnchor(elem, fn, options) {
		var href = '';
		// Always get the anchor text
		var storedCharCount = options.lineCharCount;
		var text = fn(elem.children \|\| [], options);
		if (!text) {
		text = '';
		}
		var href = '';
		// Always get the anchor text
		var storedCharCount = options.lineCharCount;
		var text = fn(elem.children \|\| [], options);
		if (!text) {
		text = '';
		}

		var result = elem.trimLeadingSpace ? _s.lstrip(text) : text;
		var result = elem.trimLeadingSpace ? _s.lstrip(text) : text;

		if (!options.ignoreHref) {
		// Get the href, if present
		if (elem.attribs && elem.attribs.href) {
		href = elem.attribs.href.replace(/^mailto\:/, '');
		}
		if (href) {
		if (options.linkHrefBaseUrl && href.indexOf('/') == 0) {
		href = options.linkHrefBaseUrl + href;
		}
		if (!options.hideLinkHrefIfSameAsText \|\| href != _s.replaceAll(result, '\n', '')) {
		if (!options.noLinkBrackets) {
		result += ' [' + href + ']';
		} else {
		result += ' ' + href;
		}
		}
		}
		}
		if (!options.ignoreHref) {
		// Get the href, if present
		if (elem.attribs && elem.attribs.href) {
		href = elem.attribs.href.replace(/^mailto\:/, '');
		}
		if (href) {
		if (options.linkHrefBaseUrl && href.indexOf('/') === 0) {
		href = options.linkHrefBaseUrl + href;
		}
		if (!options.hideLinkHrefIfSameAsText \|\| href !== _s.replaceAll(result, '\n', '')) {
		if (!options.noLinkBrackets) {
		result += ' [' + href + ']';
		} else {
		result += ' ' + href;
		}
		}
		}
		}

		options.lineCharCount = storedCharCount;
		options.lineCharCount = storedCharCount;

		return formatText({ data: result \|\| href, trimLeadingSpace: elem.trimLeadingSpace }, options);
		return formatText({ data: result \|\| href, trimLeadingSpace: elem.trimLeadingSpace }, options);
		}

		function formatHorizontalLine(elem, fn, options) {
		return '\n' + _s.repeat('-', options.wordwrap) + '\n\n';
		return '\n' + _s.repeat('-', options.wordwrap) + '\n\n';
		}

		function formatListItem(prefix, elem, fn, options) {
		options = _.clone(options);
		// Reduce the wordwrap for sub elements.
		options = _.clone(options);
		// Reduce the wordwrap for sub elements.
		if (options.wordwrap) {
		options.wordwrap -= prefix.length;
		options.wordwrap -= prefix.length;
		}
		// Process sub elements.
		var text = fn(elem.children, options);
		// Replace all line breaks with line break + prefix spacing.
		text = text.replace(/\n/g, '\n' + _s.repeat(' ', prefix.length));
		// Add first prefix and line break at the end.
		return prefix + text + '\n';
		// Process sub elements.
		var text = fn(elem.children, options);
		// Replace all line breaks with line break + prefix spacing.
		text = text.replace(/\n/g, '\n' + _s.repeat(' ', prefix.length));
		// Add first prefix and line break at the end.
		return prefix + text + '\n';
		}
		@@ -120,113 +120,122 @@
		function formatUnorderedList(elem, fn, options) {
		var result = '';
		var nonWhiteSpaceChildren = (elem.children \|\| []).filter(function(child) {
		return child.type !== 'text' \|\| !whiteSpaceRegex.test(child.data);
		});
		_.each(nonWhiteSpaceChildren, function(elem) {
		result += formatListItem(' * ', elem, fn, options);
		});
		return result + '\n';
		var result = '';
		var nonWhiteSpaceChildren = (elem.children \|\| []).filter(function(child) {
		return child.type !== 'text' \|\| !whiteSpaceRegex.test(child.data);
		});
		_.each(nonWhiteSpaceChildren, function(elem) {
		result += formatListItem(' * ', elem, fn, options);
		});
		return result + '\n';
		}

		function formatOrderedList(elem, fn, options) {
		var result = '';
		var nonWhiteSpaceChildren = (elem.children \|\| []).filter(function(child) {
		return child.type !== 'text' \|\| !whiteSpaceRegex.test(child.data);
		});
		// Make sure there are list items present
		if (nonWhiteSpaceChildren.length) {
		// Calculate initial start from ol attribute
		var start = parseInt(elem.attribs.start \|\| '1') - 1
		// Calculate the maximum length to i.
		var maxLength = (nonWhiteSpaceChildren.length + start).toString().length;
		_.each(nonWhiteSpaceChildren, function(elem, i) {
		var index = i + 1 + start;
		// Calculate the needed spacing for nice indentation.
		var spacing = maxLength - index.toString().length;
		var prefix = ' ' + index + '. ' + _s.repeat(' ', spacing);
		result += formatListItem(prefix, elem, fn, options);
		});
		}
		return result + '\n';
		var result = '';
		var nonWhiteSpaceChildren = (elem.children \|\| []).filter(function(child) {
		return child.type !== 'text' \|\| !whiteSpaceRegex.test(child.data);
		});
		// Return different functions for different OL types
		var typeFunctions = {
		1: function(start, i) { return i + 1 + start},
		a: function(start, i) { return String.fromCharCode(i + start + 97)},
		A: function(start, i) { return String.fromCharCode(i + start + 65)}
		};
		// Determine type
		var olType = elem.attribs.type \|\| '1'
		// Make sure there are list items present
		if (nonWhiteSpaceChildren.length) {
		// Calculate initial start from ol attribute
		var start = Number(elem.attribs.start \|\| '1') - 1
		// Calculate the maximum length to i.
		var maxLength = (nonWhiteSpaceChildren.length + start).toString().length;
		_.each(nonWhiteSpaceChildren, function(elem, i) {
		// Use different function depending on type
		var index = typeFunctions[olType](start, i);
		// Calculate the needed spacing for nice indentation.
		var spacing = maxLength - index.toString().length;
		var prefix = (olType === '1') ? ' ' + index + '. ' + _s.repeat(' ', spacing) : index + '. ';
		result += formatListItem(prefix, elem, fn, options);
		});
		}
		return result + '\n';
		}

		function tableToString(table) {
		// Determine space width per column
		// Convert all rows to lengths
		var widths = _.map(table, function(row) {
		return _.map(row, function(col) {
		return col.length;
		});
		});
		// Invert rows with colums
		widths = helper.arrayZip(widths);
		// Determine the max values for each column
		widths = _.map(widths, function(col) {
		return _.max(col);
		});
		// Determine space width per column
		// Convert all rows to lengths
		var widths = _.map(table, function(row) {
		return _.map(row, function(col) {
		return col.length;
		});
		});
		// Invert rows with colums
		widths = helper.arrayZip(widths);
		// Determine the max values for each column
		widths = _.map(widths, function(col) {
		return _.max(col);
		});

		// Build the table
		var text = '';
		_.each(table, function(row) {
		var i = 0;
		_.each(row, function(col) {
		text += _s.rpad(_s.strip(col), widths[i++], ' ') + ' ';
		});
		text += '\n';
		});
		return text + '\n';
		// Build the table
		var text = '';
		_.each(table, function(row) {
		var i = 0;
		_.each(row, function(col) {
		text += _s.rpad(_s.strip(col), widths[i++], ' ') + ' ';
		});
		text += '\n';
		});
		return text + '\n';
		}

		function formatTable(elem, fn, options) {
		var table = [];
		_.each(elem.children, tryParseRows);
		return tableToString(table);
		var table = [];
		_.each(elem.children, tryParseRows);
		return tableToString(table);

		function tryParseRows(elem) {
		if (elem.type !== 'tag') {
		return;
		}
		switch (elem.name.toLowerCase()) {
		case "thead":
		case "tbody":
		case "tfoot":
		case "center":
		_.each(elem.children, tryParseRows);
		return;
		function tryParseRows(elem) {
		if (elem.type !== 'tag') {
		return;
		}
		switch (elem.name.toLowerCase()) {
		case "thead":
		case "tbody":
		case "tfoot":
		case "center":
		_.each(elem.children, tryParseRows);
		return;

		case 'tr':
		var rows = [];
		_.each(elem.children, function(elem) {
		var tokens, times;
		if (elem.type === 'tag') {
		switch (elem.name.toLowerCase()) {
		case 'th':
		tokens = formatHeading(elem, fn, options).split('\n');
		rows.push(_.compact(tokens));
		break;
		case 'tr':
		var rows = [];
		_.each(elem.children, function(elem) {
		var tokens, times;
		if (elem.type === 'tag') {
		switch (elem.name.toLowerCase()) {
		case 'th':
		tokens = formatHeading(elem, fn, options).split('\n');
		rows.push(_.compact(tokens));
		break;

		case 'td':
		tokens = fn(elem.children, options).split('\n');
		rows.push(_.compact(tokens));
		// Fill colspans with empty values
		if (elem.attribs && elem.attribs.colspan) {
		times = elem.attribs.colspan - 1 \|\| 0;
		_.times(times, function() {
		rows.push(['']);
		});
		}
		break;
		}
		}
		});
		rows = helper.arrayZip(rows);
		_.each(rows, function(row) {
		row = _.map(row, function(col) {
		return col \|\| '';
		});
		table.push(row);
		});
		break;
		}
		}
		case 'td':
		tokens = fn(elem.children, options).split('\n');
		rows.push(_.compact(tokens));
		// Fill colspans with empty values
		if (elem.attribs && elem.attribs.colspan) {
		times = elem.attribs.colspan - 1 \|\| 0;
		_.times(times, function() {
		rows.push(['']);
		});
		}
		break;
		}
		}
		});
		rows = helper.arrayZip(rows);
		_.each(rows, function(row) {
		row = _.map(row, function(col) {
		return col \|\| '';
		});
		table.push(row);
		});
		break;
		}
		}
		}
		@@ -233,0 +242,0 @@

198

lib/helper.js

		@@ -8,129 +8,127 @@ var _ = require('underscore');
		function splitLongWord(word, options) {
		var wrapCharacters = options.longWordSplit.wrapCharacters \|\| [];
		var forceWrapOnLimit = options.longWordSplit.forceWrapOnLimit \|\| false;
		var max = options.wordwrap;
		var wrapCharacters = options.longWordSplit.wrapCharacters \|\| [];
		var forceWrapOnLimit = options.longWordSplit.forceWrapOnLimit \|\| false;
		var max = options.wordwrap;

		var fuseWord = [];
		var idx = 0;
		while (word.length > max) {
		var firstLine = word.substr(0, max);
		var remainingChars = word.substr(max);
		var fuseWord = [];
		var idx = 0;
		while (word.length > max) {
		var firstLine = word.substr(0, max);
		var remainingChars = word.substr(max);

		var splitIndex = firstLine.lastIndexOf(wrapCharacters[idx]);
		var splitIndex = firstLine.lastIndexOf(wrapCharacters[idx]);

		if (splitIndex > -1) {
		// We've found a character to split on, store before the split then check if we
		// need to split again
		word = firstLine.substr(splitIndex + 1) + remainingChars;
		fuseWord.push(firstLine.substr(0, splitIndex + 1));
		if (splitIndex > -1) {
		// We've found a character to split on, store before the split then check if we
		// need to split again
		word = firstLine.substr(splitIndex + 1) + remainingChars;
		fuseWord.push(firstLine.substr(0, splitIndex + 1));
		} else {
		idx++;
		if (idx >= wrapCharacters.length) {
		// Cannot split on character, so either split at 'max' or preserve length
		if (forceWrapOnLimit) {
		fuseWord.push(firstLine);
		word = remainingChars;
		if (word.length > max) {
		continue;
		}
		} else {
		idx++;
		if (idx >= wrapCharacters.length) {
		// Cannot split on character, so either split at 'max' or preserve length
		if (forceWrapOnLimit) {
		fuseWord.push(firstLine);
		word = remainingChars;
		if (word.length > max) {
		continue;
		}
		} else {
		word = firstLine + remainingChars;
		if (!options.preserveNewlines) {
		word += '\n';
		}
		}
		break;
		} else {
		word = firstLine + remainingChars;
		}
		word = firstLine + remainingChars;
		if (!options.preserveNewlines) {
		word += '\n';
		}
		}
		break;
		} else {
		word = firstLine + remainingChars;
		}
		}
		fuseWord.push(word);
		}
		fuseWord.push(word);

		return fuseWord.join('\n');
		return fuseWord.join('\n');
		}

		exports.wordwrap = function wordwrap(text, options) {
		var max = options.wordwrap;
		var preserveNewlines = options.preserveNewlines;
		var length = options.lineCharCount;
		var max = options.wordwrap;
		var preserveNewlines = options.preserveNewlines;
		var length = options.lineCharCount;

		// Preserve leading space
		var result = _s.startsWith(text, ' ') ? ' ' : '';
		length += result.length;
		var buffer = [];
		// Split the text into words, decide to preserve new lines or not.
		var words = preserveNewlines
		? text.replace(/\n/g, '\n ').split(/\ +/)
		: _s.words(text);
		// Preserve leading space
		var result = _s.startsWith(text, ' ') ? ' ' : '';
		length += result.length;
		var buffer = [];
		// Split the text into words, decide to preserve new lines or not.
		var words = preserveNewlines
		? text.replace(/\n/g, '\n ').split(/\ +/)
		: _s.words(text);

		// Determine where to end line word by word.
		_.each(words, function(word) {
		// Add buffer to result if we can't fit any more words in the buffer.
		if ((max \|\| max === 0) && length > 0 &&
		((length + word.length > max) \|\| (length + word.indexOf('\n') > max)))
		{
		// Concat buffer and add it to the result
		result += buffer.join(' ') + '\n';
		// Reset buffer and length
		buffer.length = length = 0;
		}
		// Determine where to end line word by word.
		_.each(words, function(word) {
		// Add buffer to result if we can't fit any more words in the buffer.
		if ((max \|\| max === 0) && length > 0 && ((length + word.length > max) \|\| (length + word.indexOf('\n') > max))) {
		// Concat buffer and add it to the result
		result += buffer.join(' ') + '\n';
		// Reset buffer and length
		buffer.length = length = 0;
		}

		// Check if the current word is long enough to be wrapped
		if ((max \|\| max === 0) && (options.longWordSplit) && (word.length > max)) {
		word = splitLongWord(word, options);
		}
		// Check if the current word is long enough to be wrapped
		if ((max \|\| max === 0) && (options.longWordSplit) && (word.length > max)) {
		word = splitLongWord(word, options);
		}

		buffer.push(word);
		buffer.push(word);

		// If the word contains a newline then restart the count and add the buffer to the result
		if (word.indexOf('\n') != -1) {
		result += buffer.join(' ');
		// If the word contains a newline then restart the count and add the buffer to the result
		if (word.indexOf('\n') !== -1) {
		result += buffer.join(' ');

		// Reset the buffer, let the length include any characters after the last newline
		buffer.length = 0;
		length = word.length - (word.lastIndexOf('\n') + 1);
		// If there are characters after the newline, add a space and increase the length by 1
		if (length) {
		result += ' ';
		length++;
		}
		} else {
		// Add word length + one whitespace
		length += word.length + 1;
		}
		});
		// Add the rest to the result.
		result += buffer.join(' ');

		// Preserve trailing space
		if (!_s.endsWith(text, ' ')) {
		result = _s.rtrim(result);
		} else if (!_s.endsWith(result, ' ')) {
		result = result + ' ';
		// Reset the buffer, let the length include any characters after the last newline
		buffer.length = 0;
		length = word.length - (word.lastIndexOf('\n') + 1);
		// If there are characters after the newline, add a space and increase the length by 1
		if (length) {
		result += ' ';
		length++;
		}
		} else {
		// Add word length + one whitespace
		length += word.length + 1;
		}
		});
		// Add the rest to the result.
		result += buffer.join(' ');

		return result;
		// Preserve trailing space
		if (!_s.endsWith(text, ' ')) {
		result = _s.rtrim(result);
		} else if (!_s.endsWith(result, ' ')) {
		result = result + ' ';
		}

		return result;
		};

		exports.arrayZip = function arrayZip(array) {
		return _.zip.apply(_, array);
		return _.zip.apply(_, array);
		};

		exports.splitCssSearchTag = function splitCssSearchTag(tagString) {
		function getParams(re, string) {
		var captures = [], found;
		while (found = re.exec(string)) {
		captures.push(found[1]);
		}
		return captures;
		function getParams(re, string) {
		var captures = [], found;
		while ((found = re.exec(string)) !== null) {
		captures.push(found[1]);
		}
		return captures;
		}

		var splitTag = {};
		var elementRe = /(^\w*)/g;
		splitTag.element = elementRe.exec(tagString)[1];
		splitTag.classes = getParams( /\.([\d\w-]*)/g, tagString);
		splitTag.ids = getParams( /#([\d\w-]*)/g, tagString);
		var splitTag = {};
		var elementRe = /(^\w*)/g;
		splitTag.element = elementRe.exec(tagString)[1];
		splitTag.classes = getParams( /\.([\d\w-]*)/g, tagString);
		splitTag.ids = getParams( /#([\d\w-]*)/g, tagString);

		return splitTag;
		return splitTag;
		};

304

lib/html-to-text.js

		@@ -13,181 +13,181 @@ var fs = require('fs');
		var SKIP_TYPES = [
		'style',
		'script'
		'style',
		'script'
		];

		function htmlToText(html, options) {
		options = options \|\| {};
		_.defaults(options, {
		wordwrap: 80,
		tables: [],
		preserveNewlines: false,
		uppercaseHeadings: true,
		singleNewLineParagraphs: false,
		hideLinkHrefIfSameAsText: false,
		linkHrefBaseUrl: null,
		noLinkBrackets: false,
		baseElement: 'body',
		returnDomByDefault: true,
		decodeOptions: {
		isAttributeValue: false,
		strict: false
		},
		longWordSplit: {
		wrapCharacters: [],
		forceWrapOnLimit: false
		}
		});
		options = options \|\| {};
		_.defaults(options, {
		wordwrap: 80,
		tables: [],
		preserveNewlines: false,
		uppercaseHeadings: true,
		singleNewLineParagraphs: false,
		hideLinkHrefIfSameAsText: false,
		linkHrefBaseUrl: null,
		noLinkBrackets: false,
		baseElement: 'body',
		returnDomByDefault: true,
		decodeOptions: {
		isAttributeValue: false,
		strict: false
		},
		longWordSplit: {
		wrapCharacters: [],
		forceWrapOnLimit: false
		}
		});

		var handler = new htmlparser.DefaultHandler(function (error, dom) {
		var handler = new htmlparser.DefaultHandler(function (error, dom) {

		}, {
		verbose: true
		});
		new htmlparser.Parser(handler).parseComplete(html);
		}, {
		verbose: true
		});
		new htmlparser.Parser(handler).parseComplete(html);

		options.lineCharCount = 0;
		options.lineCharCount = 0;

		var result = '';
		var baseElements = Array.isArray(options.baseElement) ? options.baseElement : [options.baseElement];
		for (var idx = 0; idx < baseElements.length; ++idx) {
		result += walk(filterBody(handler.dom, options, baseElements[idx]), options);
		}
		return _s.strip(result);
		var result = '';
		var baseElements = Array.isArray(options.baseElement) ? options.baseElement : [options.baseElement];
		for (var idx = 0; idx < baseElements.length; ++idx) {
		result += walk(filterBody(handler.dom, options, baseElements[idx]), options);
		}
		return _s.strip(result);
		}

		function filterBody(dom, options, baseElement) {
		var result = null;
		var result = null;

		var splitTag = helper.splitCssSearchTag(baseElement);
		var splitTag = helper.splitCssSearchTag(baseElement);

		function walk(dom) {
		if (result) return;
		_.each(dom, function(elem) {
		if (result) return;
		if (elem.name === splitTag.element) {
		var documentClasses = elem.attribs && elem.attribs.class ? elem.attribs.class.split(" ") : [];
		var documentIds = elem.attribs && elem.attribs.id ? elem.attribs.id.split(" ") : [];
		function walk(dom) {
		if (result) return;
		_.each(dom, function(elem) {
		if (result) return;
		if (elem.name === splitTag.element) {
		var documentClasses = elem.attribs && elem.attribs.class ? elem.attribs.class.split(" ") : [];
		var documentIds = elem.attribs && elem.attribs.id ? elem.attribs.id.split(" ") : [];

		if ((splitTag.classes.every(function (val) { return documentClasses.indexOf(val) >= 0 })) &&
		(splitTag.ids.every(function (val) { return documentIds.indexOf(val) >= 0 }))) {
		result = [elem];
		return;
		}
		}
		if (elem.children) walk(elem.children);
		});
		}
		walk(dom);
		return options.returnDomByDefault ? result \|\| dom : result;
		if ((splitTag.classes.every(function (val) { return documentClasses.indexOf(val) >= 0 })) &&
		(splitTag.ids.every(function (val) { return documentIds.indexOf(val) >= 0 }))) {
		result = [elem];
		return;
		}
		}
		if (elem.children) walk(elem.children);
		});
		}
		walk(dom);
		return options.returnDomByDefault ? result \|\| dom : result;
		}

		function containsTable(attr, tables) {
		if (tables === true) return true;
		if (tables === true) return true;

		function removePrefix(key) {
		return key.substr(1);
		}
		function checkPrefix(prefix) {
		return function(key) {
		return _s.startsWith(key, prefix);
		};
		}
		function filterByPrefix(tables, prefix) {
		return _(tables).chain()
		.filter(checkPrefix(prefix))
		.map(removePrefix)
		.value();
		}
		var classes = filterByPrefix(tables, '.');
		var ids = filterByPrefix(tables, '#');
		return attr && (_.include(classes, attr['class']) \|\| _.include(ids, attr['id']));
		function removePrefix(key) {
		return key.substr(1);
		}
		function checkPrefix(prefix) {
		return function(key) {
		return _s.startsWith(key, prefix);
		};
		}
		function filterByPrefix(tables, prefix) {
		return _(tables).chain()
		.filter(checkPrefix(prefix))
		.map(removePrefix)
		.value();
		}
		var classes = filterByPrefix(tables, '.');
		var ids = filterByPrefix(tables, '#');
		return attr && (_.include(classes, attr['class']) \|\| _.include(ids, attr['id']));
		}

		function walk(dom, options, result) {
		if (arguments.length < 3) {
		result = '';
		}
		var whiteSpaceRegex = /\s$/;
		_.each(dom, function(elem) {
		switch(elem.type) {
		case 'tag':
		switch(elem.name.toLowerCase()) {
		case 'img':
		result += format.image(elem, options);
		break;
		case 'a':
		// Inline element needs its leading space to be trimmed if `result`
		// currently ends with whitespace
		elem.trimLeadingSpace = whiteSpaceRegex.test(result);
		result += format.anchor(elem, walk, options);
		break;
		case 'p':
		result += format.paragraph(elem, walk, options);
		break;
		case 'h1':
		case 'h2':
		case 'h3':
		case 'h4':
		case 'h5':
		case 'h6':
		result += format.heading(elem, walk, options);
		break;
		case 'br':
		result += format.lineBreak(elem, walk, options);
		break;
		case 'hr':
		result += format.horizontalLine(elem, walk, options);
		break;
		case 'ul':
		result += format.unorderedList(elem, walk, options);
		break;
		case 'ol':
		result += format.orderedList(elem, walk, options);
		break;
		case 'pre':
		var newOptions = _(options).clone();
		newOptions.isInPre = true;
		result += format.paragraph(elem, walk, newOptions);
		break;
		case 'table':
		if (containsTable(elem.attribs, options.tables)) {
		result += format.table(elem, walk, options);
		break;
		}
		default:
		result = walk(elem.children \|\| [], options, result);
		}
		break;
		case 'text':
		if (elem.data !== '\r\n') {
		// Text needs its leading space to be trimmed if `result`
		// currently ends with whitespace
		elem.trimLeadingSpace = whiteSpaceRegex.test(result);
		result += format.text(elem, options);
		}
		break;
		default:
		if (!_.include(SKIP_TYPES, elem.type)) {
		result = walk(elem.children \|\| [], options, result);
		}
		}
		if (arguments.length < 3) {
		result = '';
		}
		var whiteSpaceRegex = /\s$/;
		_.each(dom, function(elem) {
		switch(elem.type) {
		case 'tag':
		switch(elem.name.toLowerCase()) {
		case 'img':
		result += format.image(elem, options);
		break;
		case 'a':
		// Inline element needs its leading space to be trimmed if `result`
		// currently ends with whitespace
		elem.trimLeadingSpace = whiteSpaceRegex.test(result);
		result += format.anchor(elem, walk, options);
		break;
		case 'p':
		result += format.paragraph(elem, walk, options);
		break;
		case 'h1':
		case 'h2':
		case 'h3':
		case 'h4':
		case 'h5':
		case 'h6':
		result += format.heading(elem, walk, options);
		break;
		case 'br':
		result += format.lineBreak(elem, walk, options);
		break;
		case 'hr':
		result += format.horizontalLine(elem, walk, options);
		break;
		case 'ul':
		result += format.unorderedList(elem, walk, options);
		break;
		case 'ol':
		result += format.orderedList(elem, walk, options);
		break;
		case 'pre':
		var newOptions = _(options).clone();
		newOptions.isInPre = true;
		result += format.paragraph(elem, walk, newOptions);
		break;
		case 'table':
		result = containsTable(elem.attribs, options.tables)
		? result + format.table(elem, walk, options)
		: walk(elem.children \|\| [], options, result);
		break;
		default:
		result = walk(elem.children \|\| [], options, result);
		}
		break;
		case 'text':
		if (elem.data !== '\r\n') {
		// Text needs its leading space to be trimmed if `result`
		// currently ends with whitespace
		elem.trimLeadingSpace = whiteSpaceRegex.test(result);
		result += format.text(elem, options);
		}
		break;
		default:
		if (!_.include(SKIP_TYPES, elem.type)) {
		result = walk(elem.children \|\| [], options, result);
		}
		}

		options.lineCharCount = result.length - (result.lastIndexOf('\n') + 1);
		});
		return result;
		options.lineCharCount = result.length - (result.lastIndexOf('\n') + 1);
		});
		return result;
		}

		exports.fromFile = function(file, options, callback) {
		if (!callback) {
		callback = options;
		options = {};
		}
		fs.readFile(file, 'utf8', function(err, str) {
		var result = htmlToText(str, options);
		return callback(null, result);
		});
		if (!callback) {
		callback = options;
		options = {};
		}
		fs.readFile(file, 'utf8', function (err, str) {
		if (err) return callback(err);
		return callback(null, htmlToText(str, options));
		});
		};

		exports.fromString = function(str, options) {
		return htmlToText(str, options \|\| {});
		return htmlToText(str, options \|\| {});
		};

package.json

		{
		"name": "html-to-text",
		"version": "3.1.0",
		"version": "3.2.0",
		"description": "Advanced html to plain text converter",
		"main": "index.js",
		"scripts": {
		"test": "node_modules/.bin/mocha",
		"example": "node ./example/html-to-text.js"
		"test": "istanbul cover _mocha && eslint .",
		"example": "node ./example/html-to-text.js",
		"lint": "eslint ."
		},
		@@ -46,4 +47,6 @@ "author": {
		"chai": "^3.5.0",
		"eslint": "^3.14.1",
		"istanbul": "^0.4.5",
		"mocha": "^3.0.0"
		}
		}

README.md

		# html-to-text

		[![Build Status](https://travis-ci.org/werk85/node-html-to-text.svg?branch=master)](https://travis-ci.org/werk85/node-html-to-text)
		[![Test Coverage](https://codeclimate.com/github/werk85/node-html-to-text/badges/coverage.svg)](https://codeclimate.com/github/werk85/node-html-to-text/coverage)

		@@ -5,0 +6,0 @@ An advanced converter that parses HTML and returns beautiful text. It was mainly designed to transform HTML E-Mail templates to a text representation. So it is currently optimized for table layouts.

test/html-to-text.js

		@@ -0,1 +1,3 @@
		/* eslint max-len: "off" */

		var expect = require('chai').expect;
		@@ -233,2 +235,17 @@ var htmlToText = require('..');

		it('should support the ordered list type="1" attribute', function() {
		var testString = '<ol type="1"><li>foo</li><li>bar</li></ol>';
		expect(htmlToText.fromString(testString)).to.equal('1. foo\n 2. bar');
		});

		it('should support the ordered list type="a" attribute', function() {
		var testString = '<ol type="a"><li>foo</li><li>bar</li></ol>';
		expect(htmlToText.fromString(testString)).to.equal('a. foo\nb. bar');
		});

		it('should support the ordered list type="A" attribute', function() {
		var testString = '<ol type="A"><li>foo</li><li>bar</li></ol>';
		expect(htmlToText.fromString(testString)).to.equal('A. foo\nB. bar');
		});

		it('should support the ordered list start attribute', function() {
		@@ -238,2 +255,17 @@ var testString = '<ol start="2"><li>foo</li><li>bar</li></ol>';
		});

		/*
		* Currently failing tests for continuing to fill out the specification
		* Spec: https://html.spec.whatwg.org/multipage/semantics.html#the-ol-element
		*
		it('should support the ordered list type="a" attribute past 26 characters', function() {
		var testString = '<ol start="26" type="a"><li>foo</li><li>bar</li></ol>';
		expect(htmlToText.fromString(testString)).to.equal('z. foo\naa. bar');
		});

		it('should support the ordered list type="A" attribute past 26 characters', function() {
		var testString = '<ol start="26" type="A"><li>foo</li><li>bar</li></ol>';
		expect(htmlToText.fromString(testString)).to.equal('Z. foo\nAA. bar');
		});
		*/
		});
		@@ -291,4 +323,4 @@
		it('should retrieve and convert the entire document under `body` by default', function(done) {
		var htmlFile = path.join(__dirname, 'test.html'),
		txtFile = path.join(__dirname, 'test.txt');
		var htmlFile = path.join(__dirname, 'test.html');
		var txtFile = path.join(__dirname, 'test.txt');

		@@ -307,4 +339,4 @@ var expectedTxt = fs.readFileSync(txtFile, 'utf8');
		it('should only retrieve and convert content under the specified base element if found', function(done) {
		var htmlFile = path.join(__dirname, 'test.html'),
		txtFile = path.join(__dirname, 'test-address.txt');
		var htmlFile = path.join(__dirname, 'test.html');
		var txtFile = path.join(__dirname, 'test-address.txt');

		@@ -324,4 +356,4 @@ var expectedTxt = fs.readFileSync(txtFile, 'utf8');
		it('should retrieve and convert content under multiple base elements', function(done) {
		var htmlFile = path.join(__dirname, 'test.html'),
		txtFile = path.join(__dirname, 'test-address-dup.txt');
		var htmlFile = path.join(__dirname, 'test.html');
		var txtFile = path.join(__dirname, 'test-address-dup.txt');

		@@ -341,4 +373,4 @@ var expectedTxt = fs.readFileSync(txtFile, 'utf8');
		it('should retrieve and convert content under multiple base elements in any order', function(done) {
		var htmlFile = path.join(__dirname, 'test.html'),
		txtFile = path.join(__dirname, 'test-any-order.txt');
		var htmlFile = path.join(__dirname, 'test.html');
		var txtFile = path.join(__dirname, 'test-any-order.txt');

		@@ -358,4 +390,4 @@ var expectedTxt = fs.readFileSync(txtFile, 'utf8');
		it('should process the first base element found when multiple exist', function(done) {
		var htmlFile = path.join(__dirname, 'test.html'),
		txtFile = path.join(__dirname, 'test-first-element.txt');
		var htmlFile = path.join(__dirname, 'test.html');
		var txtFile = path.join(__dirname, 'test-first-element.txt');

		@@ -375,4 +407,4 @@ var expectedTxt = fs.readFileSync(txtFile, 'utf8');
		it('should retrieve and convert the entire document by default if no base element is found', function(done) {
		var htmlFile = path.join(__dirname, 'test.html'),
		txtFile = path.join(__dirname, 'test.txt');
		var htmlFile = path.join(__dirname, 'test.html');
		var txtFile = path.join(__dirname, 'test.txt');

		@@ -379,0 +411,0 @@ var expectedTxt = fs.readFileSync(txtFile, 'utf8');

.npmignore

Sorry, the diff of this file is not supported yet

.travis.yml

Sorry, the diff of this file is not supported yet

html-to-text - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics