html-to-text
Advanced tools
Comparing version 3.0.0 to 3.1.0
@@ -11,2 +11,3 @@ #!/usr/bin/env node | ||
.default('ignore-image', false) | ||
.default('noLinkBrackets', false) | ||
.argv; | ||
@@ -29,3 +30,4 @@ | ||
ignoreHref: argv['ignore-href'], | ||
ignoreImage: argv['ignore-image'] | ||
ignoreImage: argv['ignore-image'], | ||
noLinkBrackets: argv['noLinkBrackets'] | ||
}); | ||
@@ -32,0 +34,0 @@ process.stdout.write(text + '\n', 'utf-8'); |
# Changelog | ||
## Version 3.1.0 | ||
* Support for the ordered list start attribute added #117 | ||
* Option to format paragraph with single new line #112 | ||
* `noLinksBrackets` options added #119 | ||
## Version 3.0.0 | ||
@@ -4,0 +10,0 @@ |
@@ -41,3 +41,8 @@ var _ = require('underscore'); | ||
function formatParagraph(elem, fn, options) { | ||
return fn(elem.children, options) + '\n\n'; | ||
var paragraph = fn(elem.children, options) | ||
if (options.singleNewLineParagraphs) { | ||
return paragraph + '\n' | ||
} else { | ||
return paragraph + '\n\n' | ||
} | ||
} | ||
@@ -79,3 +84,7 @@ | ||
if (!options.hideLinkHrefIfSameAsText || href != _s.replaceAll(result, '\n', '')) { | ||
result += ' [' + href + ']'; | ||
if (!options.noLinkBrackets) { | ||
result += ' [' + href + ']'; | ||
} else { | ||
result += ' ' + href; | ||
} | ||
} | ||
@@ -128,6 +137,8 @@ } | ||
if (nonWhiteSpaceChildren.length) { | ||
// Calculate initial start from ol attribute | ||
var start = parseInt(elem.attribs.start || '1') - 1 | ||
// Calculate the maximum length to i. | ||
var maxLength = nonWhiteSpaceChildren.length.toString().length; | ||
var maxLength = (nonWhiteSpaceChildren.length + start).toString().length; | ||
_.each(nonWhiteSpaceChildren, function(elem, i) { | ||
var index = i + 1; | ||
var index = i + 1 + start; | ||
// Calculate the needed spacing for nice indentation. | ||
@@ -134,0 +145,0 @@ var spacing = maxLength - index.toString().length; |
@@ -24,4 +24,6 @@ var fs = require('fs'); | ||
uppercaseHeadings: true, | ||
singleNewLineParagraphs: false, | ||
hideLinkHrefIfSameAsText: false, | ||
linkHrefBaseUrl: null, | ||
noLinkBrackets: false, | ||
baseElement: 'body', | ||
@@ -28,0 +30,0 @@ returnDomByDefault: true, |
{ | ||
"name": "html-to-text", | ||
"version": "3.0.0", | ||
"version": "3.1.0", | ||
"description": "Advanced html to plain text converter", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -1,2 +0,2 @@ | ||
# node-html-to-text | ||
# html-to-text | ||
@@ -61,2 +61,3 @@ [![Build Status](https://travis-ci.org/werk85/node-html-to-text.svg?branch=master)](https://travis-ci.org/werk85/node-html-to-text) | ||
* `hideLinkHrefIfSameAsText` by default links are translated the following `<a href='link'>text</a>` => becomes => `text [link]`. If this option is set to true and `link` and `text` are the same, `[link]` will be hidden and only `text` visible. | ||
* `noLinkBrackets` dont print brackets around the link if `true`. | ||
* `ignoreHref` ignore all document links if `true`. | ||
@@ -67,2 +68,3 @@ * `ignoreImage` ignore all document images if `true`. | ||
* `uppercaseHeadings` by default, headings (`<h1>`, `<h2>`, etc) are uppercased. Set to `false` to leave headings as they are. | ||
* `singleNewLineParagraphs` by default, paragraphs are converted with two newlines (`\n\n`). Set to `true` to convert to a single newline. | ||
* `baseElement` defines the tags whose text content will be captured from the html. All content will be captured below the baseElement tags and added to the resulting text output. This option allows the user to specify an array of elements as base elements using a single tag with css class and id parameters e.g. [`p.class1.class2#id1#id2`, `p.class1.class2#id1#id2`] . Default: `body` | ||
@@ -289,3 +291,3 @@ * `returnDomByDefault` convert the entire document if we don't find the tag we're looking for if `true`. | ||
Copyright (c) 2016 werk85 <legenhausen@werk85.de> | ||
Copyright (c) 2017 werk85 <legenhausen@werk85.de> | ||
@@ -292,0 +294,0 @@ Permission is hereby granted, free of charge, to any person obtaining |
@@ -6,3 +6,2 @@ var expect = require('chai').expect; | ||
describe('html-to-text', function() { | ||
@@ -123,2 +122,23 @@ describe('.fromString()', function() { | ||
}); | ||
describe('single line paragraph option', function() { | ||
var paragraphsString; | ||
beforeEach(function() { | ||
paragraphsString = '<p>First</p><p>Second</p>'; | ||
}); | ||
it('should not use single new line when given null', function() { | ||
expect(htmlToText.fromString(paragraphsString, { singleNewLineParagraphs: null } )).to.equal('First\n\nSecond'); | ||
}); | ||
it('should not use single new line when given false', function() { | ||
expect(htmlToText.fromString(paragraphsString, { singleNewLineParagraphs: false } )).to.equal('First\n\nSecond'); | ||
}); | ||
it('should use single new line when given true', function() { | ||
expect(htmlToText.fromString(paragraphsString, { singleNewLineParagraphs: true } )).to.equal('First\nSecond'); | ||
}); | ||
}); | ||
}); | ||
@@ -141,17 +161,2 @@ | ||
describe('li', function () { | ||
it('doesnt wrap li if wordwrap isnt', function () { | ||
var html = 'Good morning Jacob, \ | ||
<p>Lorem ipsum dolor sit amet</p> \ | ||
<p><strong>Lorem ipsum dolor sit amet.</strong></p> \ | ||
<ul> \ | ||
<li>run in the park <span style="color:#888888;">(in progress)</span></li> \ | ||
</ul> \ | ||
'; | ||
var resultExpected = 'Good morning Jacob, Lorem ipsum dolor sit amet\n\nLorem ipsum dolor sit amet.\n\n * run in the park (in progress)'; | ||
var result = htmlToText.fromString(html, { wordwrap: false }); | ||
expect(result).to.equal(resultExpected); | ||
}); | ||
}); | ||
describe('tables', function () { | ||
@@ -193,12 +198,58 @@ it('does not process tables with uppercase tags / does not process tables with center tag', function () { | ||
describe('a', function () { | ||
it('should return link with brackets', function () { | ||
var result = htmlToText.fromString('<a href="http://my.link">test</a>'); | ||
expect(result).to.equal('test [http://my.link]'); | ||
}); | ||
it('should return link without brackets', function () { | ||
var result = htmlToText.fromString('<a href="http://my.link">test</a>', { | ||
noLinkBrackets: true | ||
}); | ||
expect(result).to.equal('test http://my.link'); | ||
}); | ||
}); | ||
describe('lists', function() { | ||
it('should handle empty unordered lists', function() { | ||
var testString = '<ul></ul>'; | ||
expect(htmlToText.fromString(testString)).to.equal(''); | ||
describe('ul', function() { | ||
it('should handle empty unordered lists', function() { | ||
var testString = '<ul></ul>'; | ||
expect(htmlToText.fromString(testString)).to.equal(''); | ||
}); | ||
it('should handle an unordered list with multiple elements', function() { | ||
var testString = '<ul><li>foo</li><li>bar</li></ul>'; | ||
expect(htmlToText.fromString(testString)).to.equal('* foo\n * bar'); | ||
}); | ||
}); | ||
it('should handle empty ordered lists', function() { | ||
var testString = '<ol></ol>'; | ||
expect(htmlToText.fromString(testString)).to.equal(''); | ||
describe('ol', function() { | ||
it('should handle empty ordered lists', function() { | ||
var testString = '<ol></ol>'; | ||
expect(htmlToText.fromString(testString)).to.equal(''); | ||
}); | ||
it('should handle an ordered list with multiple elements', function() { | ||
var testString = '<ol><li>foo</li><li>bar</li></ol>'; | ||
expect(htmlToText.fromString(testString)).to.equal('1. foo\n 2. bar'); | ||
}); | ||
it('should support the ordered list start attribute', function() { | ||
var testString = '<ol start="2"><li>foo</li><li>bar</li></ol>'; | ||
expect(htmlToText.fromString(testString)).to.equal('2. foo\n 3. bar'); | ||
}); | ||
}); | ||
it('doesnt wrap li if wordwrap isnt', function () { | ||
var html = 'Good morning Jacob, \ | ||
<p>Lorem ipsum dolor sit amet</p> \ | ||
<p><strong>Lorem ipsum dolor sit amet.</strong></p> \ | ||
<ul> \ | ||
<li>run in the park <span style="color:#888888;">(in progress)</span></li> \ | ||
</ul> \ | ||
'; | ||
var resultExpected = 'Good morning Jacob, Lorem ipsum dolor sit amet\n\nLorem ipsum dolor sit amet.\n\n * run in the park (in progress)'; | ||
var result = htmlToText.fromString(html, { wordwrap: false }); | ||
expect(result).to.equal(resultExpected); | ||
}); | ||
}); | ||
@@ -238,3 +289,2 @@ | ||
}); | ||
}); | ||
@@ -241,0 +291,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Deprecated
MaintenanceThe maintainer of the package marked it as deprecated. This could indicate that a single version should not be used, or that the package is no longer maintained and any new vulnerabilities will not be fixed.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
73530
994
309
1