html-to-text
Advanced tools
Comparing version 3.2.0 to 3.3.0
# Changelog | ||
## Version 3.3.0 | ||
* Ability to pass custom formatting via the `format` option #128 | ||
* Enhanced support for alpha ordered list types added #123 | ||
## Version 3.2.0 | ||
@@ -4,0 +9,0 @@ |
@@ -79,11 +79,13 @@ var _ = require('underscore'); | ||
if (href) { | ||
if (options.linkHrefBaseUrl && href.indexOf('/') === 0) { | ||
href = options.linkHrefBaseUrl + href; | ||
} | ||
if (!options.hideLinkHrefIfSameAsText || href !== _s.replaceAll(result, '\n', '')) { | ||
if (!options.noLinkBrackets) { | ||
result += ' [' + href + ']'; | ||
} else { | ||
result += ' ' + href; | ||
if ((!options.noAnchorUrl) || (options.noAnchorUrl && href.indexOf('#') === -1)) { | ||
if (options.linkHrefBaseUrl && href.indexOf('/') === 0) { | ||
href = options.linkHrefBaseUrl + href; | ||
} | ||
if (!options.hideLinkHrefIfSameAsText || href !== _s.replaceAll(result, '\n', '')) { | ||
if (!options.noLinkBrackets) { | ||
result += ' [' + href + ']'; | ||
} else { | ||
result += ' ' + href; | ||
} | ||
} | ||
} | ||
@@ -135,13 +137,18 @@ } | ||
// Return different functions for different OL types | ||
var typeFunctions = { | ||
1: function(start, i) { return i + 1 + start}, | ||
a: function(start, i) { return String.fromCharCode(i + start + 97)}, | ||
A: function(start, i) { return String.fromCharCode(i + start + 65)} | ||
}; | ||
// Determine type | ||
var olType = elem.attribs.type || '1' | ||
var typeFunction = (function() { | ||
// Determine type | ||
var olType = elem.attribs.type || '1'; | ||
// TODO Imeplement the other valid types | ||
// Fallback to type '1' function for other valid types | ||
switch(olType) { | ||
case 'a': return function(start, i) { return String.fromCharCode(i + start + 97)}; | ||
case 'A': return function(start, i) { return String.fromCharCode(i + start + 65)}; | ||
case '1': | ||
default: return function(start, i) { return i + 1 + start}; | ||
} | ||
}()) | ||
// Make sure there are list items present | ||
if (nonWhiteSpaceChildren.length) { | ||
// Calculate initial start from ol attribute | ||
var start = Number(elem.attribs.start || '1') - 1 | ||
var start = Number(elem.attribs.start || '1') - 1; | ||
// Calculate the maximum length to i. | ||
@@ -151,6 +158,6 @@ var maxLength = (nonWhiteSpaceChildren.length + start).toString().length; | ||
// Use different function depending on type | ||
var index = typeFunctions[olType](start, i); | ||
var index = typeFunction(start, i); | ||
// Calculate the needed spacing for nice indentation. | ||
var spacing = maxLength - index.toString().length; | ||
var prefix = (olType === '1') ? ' ' + index + '. ' + _s.repeat(' ', spacing) : index + '. '; | ||
var prefix = ' ' + index + '. ' + _s.repeat(' ', spacing); | ||
result += formatListItem(prefix, elem, fn, options); | ||
@@ -157,0 +164,0 @@ }); |
@@ -9,3 +9,3 @@ var fs = require('fs'); | ||
var helper = require('./helper'); | ||
var format = require('./formatter'); | ||
var defaultFormat = require('./formatter'); | ||
@@ -29,4 +29,6 @@ // Which type of tags should not be parsed | ||
noLinkBrackets: false, | ||
noAnchorUrl: true, | ||
baseElement: 'body', | ||
returnDomByDefault: true, | ||
format: {}, | ||
decodeOptions: { | ||
@@ -112,2 +114,4 @@ isAttributeValue: false, | ||
var whiteSpaceRegex = /\s$/; | ||
var format = _.assign({}, defaultFormat, options.format); | ||
_.each(dom, function(elem) { | ||
@@ -114,0 +118,0 @@ switch(elem.type) { |
{ | ||
"name": "html-to-text", | ||
"version": "3.2.0", | ||
"version": "3.3.0", | ||
"description": "Advanced html to plain text converter", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -74,3 +74,25 @@ # html-to-text | ||
* `forceWrapOnLimit` defines whether to break long words on the limit if `true`. | ||
* `format` pass an object to enable custom formatting for specific elements (see below) | ||
### Override formatting for specific elements | ||
By using the `format` option, you can specify formatting for these elements: `text`, `image`, `lineBreak`, `paragraph`, `anchor`, `heading`, `table`, `orderedList`, `unorderedList`, `listItem`, `horizontalLine`. | ||
Each key must be a function which eventually receive `node` (the current node), `fn` (the next formatting function) and `options` (the options passed to html-to-text). | ||
```js | ||
var htmlToText = require('html-to-text'); | ||
var text = htmlToText.fromString('<h1>Hello World</h1>', { | ||
format: { | ||
heading: function (node, fn, options) { | ||
var h = fn(elem.children, options); | ||
return '====\n' + h.toUpperCase() + '\n===='; | ||
} | ||
} | ||
}); | ||
console.log(text); | ||
``` | ||
## Command Line Interface | ||
@@ -77,0 +99,0 @@ |
@@ -209,2 +209,16 @@ /* eslint max-len: "off" */ | ||
}); | ||
it('should not return link for anchor if noAnchorUrl is set to true', function () { | ||
var result = htmlToText.fromString('<a href="#link">test</a>', { | ||
noAnchorUrl: true | ||
}); | ||
expect(result).to.equal('test'); | ||
}); | ||
it('should return link for anchor if noAnchorUrl is set to false', function () { | ||
var result = htmlToText.fromString('<a href="#link">test</a>', { | ||
noAnchorUrl: false | ||
}); | ||
expect(result).to.equal('test [#link]'); | ||
}); | ||
}); | ||
@@ -241,5 +255,10 @@ | ||
it('should fallback to type="!" behavior if type attribute is invalid', function() { | ||
var testString = '<ol type="1"><li>foo</li><li>bar</li></ol>'; | ||
expect(htmlToText.fromString(testString)).to.equal('1. foo\n 2. bar'); | ||
}); | ||
it('should support the ordered list type="a" attribute', function() { | ||
var testString = '<ol type="a"><li>foo</li><li>bar</li></ol>'; | ||
expect(htmlToText.fromString(testString)).to.equal('a. foo\nb. bar'); | ||
expect(htmlToText.fromString(testString)).to.equal('a. foo\n b. bar'); | ||
}); | ||
@@ -249,5 +268,19 @@ | ||
var testString = '<ol type="A"><li>foo</li><li>bar</li></ol>'; | ||
expect(htmlToText.fromString(testString)).to.equal('A. foo\nB. bar'); | ||
expect(htmlToText.fromString(testString)).to.equal('A. foo\n B. bar'); | ||
}); | ||
it('should support the ordered list type="i" attribute by falling back to type="1"', function() { | ||
var testString = '<ol type="i"><li>foo</li><li>bar</li></ol>'; | ||
// TODO Implement lowercase roman numerals | ||
// expect(htmlToText.fromString(testString)).to.equal('i. foo\nii. bar'); | ||
expect(htmlToText.fromString(testString)).to.equal('1. foo\n 2. bar'); | ||
}); | ||
it('should support the ordered list type="I" attribute by falling back to type="1"', function() { | ||
var testString = '<ol type="I"><li>foo</li><li>bar</li></ol>'; | ||
// TODO Implement uppercase roman numerals | ||
// expect(htmlToText.fromString(testString)).to.equal('I. foo\nII. bar'); | ||
expect(htmlToText.fromString(testString)).to.equal('1. foo\n 2. bar'); | ||
}); | ||
it('should support the ordered list start attribute', function() { | ||
@@ -322,2 +355,16 @@ var testString = '<ol start="2"><li>foo</li><li>bar</li></ol>'; | ||
describe('custom formatting', function () { | ||
it('should allow to pass custom formatting functions', function () { | ||
var result = htmlToText.fromString('<h1>TeSt</h1>', { | ||
format: { | ||
heading: function (elem, fn, options) { | ||
var h = fn(elem.children, options); | ||
return '====\n' + h.toLowerCase() + '\n===='; | ||
} | ||
} | ||
}); | ||
expect(result).to.equal('====\ntest\n===='); | ||
}) | ||
}); | ||
describe('Base element', function () { | ||
@@ -324,0 +371,0 @@ it('should retrieve and convert the entire document under `body` by default', function(done) { |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
84329
1078
332