Comparing version 0.0.1 to 0.0.2
49
index.js
@@ -1,8 +0,33 @@ | ||
var downsize = require('downsize'); | ||
var htmlToText = require('html-to-text'); | ||
var $ = require('cheerio'); | ||
function excerpts(html, opts) { | ||
html = String(html); | ||
opts = prepare(opts); | ||
var text = $('<p>').html(html).text().trim() | ||
.replace(/(\r\n|\r|\n|\s)+/g, ' '); | ||
var excerpt = ''; | ||
if (opts.characters != null) { | ||
excerpt = text.slice(0, opts.characters); | ||
} | ||
if (opts.words != null) { | ||
excerpt = text.split(' ').slice(0, opts.words).join(' '); | ||
} | ||
if (excerpt.length < text.length) { | ||
excerpt += opts.append; | ||
} | ||
return excerpt; | ||
} | ||
function prepare(opts) { | ||
opts = opts || {}; | ||
opts.append = opts.append || '...'; | ||
if (opts.append == null) { | ||
opts.append = '...'; | ||
} | ||
@@ -17,17 +42,13 @@ if (!opts.words && !opts.characters) { | ||
var text; | ||
if (opts.words != null) { | ||
opts.words = parseInt(opts.words, 10); | ||
} | ||
text = htmlToText.fromString(html, { | ||
wordwrap: false, | ||
ignoreHref: true, | ||
ignoreImage: true | ||
}); | ||
if (opts.characters != null) { | ||
opts.characters = parseInt(opts.characters, 10); | ||
} | ||
text = downsize(text, opts); | ||
text = text.replace(/(\r\n|\r|\n)+/g, ' '); | ||
return text; | ||
return opts; | ||
} | ||
module.exports = excerpts; |
{ | ||
"name": "excerpts", | ||
"version": "0.0.1", | ||
"version": "0.0.2", | ||
"description": "Excerpting text of given words or characters from HTML.", | ||
@@ -22,4 +22,3 @@ "license": "MIT", | ||
"dependencies": { | ||
"downsize": "0.0.8", | ||
"html-to-text": "^1.6.0" | ||
"cheerio": "^0.20.0" | ||
}, | ||
@@ -26,0 +25,0 @@ "devDependencies": { |
# excerpts [![Build Status](https://travis-ci.org/gnowoel/excerpts.svg?branch=master)](https://travis-ci.org/gnowoel/excerpts) | ||
Excerpting text of given words or characters from HTML. | ||
Excerpting words or characters of text from an HTML snippet. | ||
@@ -13,3 +13,3 @@ ## Installation | ||
Given HTML: | ||
Given HTML snippet: | ||
@@ -22,9 +22,7 @@ ``` html | ||
Excerpting words with `words` option: | ||
Excerpting words with the `words` option: | ||
```javascript | ||
var excerpts = require('excerpts'); | ||
var text = excerpts(html, { words: 3 }); | ||
//=> Lorem ipsum dolor... | ||
@@ -35,13 +33,11 @@ ``` | ||
Excerpting characters with `characters` option: | ||
Excerpting characters with the `characters` option: | ||
```javascript | ||
var excerpts = require('excerpts'); | ||
var text = excerpts(html, { characters: 10 }); | ||
//=> Lorem ipsum dol... | ||
``` | ||
The `words` option takes precedence over the `characters` option. With missing option, 50 words would be extracted by default. | ||
The `words` option takes precedence over the `characters` option. By default, 50 words will be extracted when options are missing. | ||
@@ -54,9 +50,7 @@ ### Appendix | ||
var excerpts = require('excerpts'); | ||
var text = excerpts(html, { words: 3, append: " >>" }); | ||
var text = excerpts(html, { words: 3, append: ' >>' }); | ||
//=> Lorem ipsum dolor >> | ||
``` | ||
The appendix won't appear when full text are extracted. | ||
The appendix won't appear when full text has been extracted. | ||
@@ -63,0 +57,0 @@ ## Tests |
@@ -1,1 +0,1 @@ | ||
Lorem ipsum dolor sit amet, test link adipiscing elit. This is strong. Nullam dignissim convallis est. Quisque aliquam. This is emphasized. Donec faucibus. Nunc iaculis suscipit dui. 5 3 = 125. Water is H 2 O. Nam sit amet sem. Aliquam libero nisi, imperdiet at, tincidunt nec, gravida vehicula, nisl. The... | ||
Lorem ipsum dolor sit amet, test link adipiscing elit. This is strong. Nullam dignissim convallis est. Quisque aliquam. This is emphasized. Donec faucibus. Nunc iaculis suscipit dui. 53 = 125. Water is H2O. Nam sit amet sem. Aliquam libero nisi, imperdiet at, tincidunt nec, gravida vehicula, nisl. The New York... |
@@ -1,1 +0,1 @@ | ||
Lorem ipsum dolor sit amet, test link adipiscing elit. This is strong. Nullam dignissim convallis est. Quisque aliquam. This is emphasized. Donec faucibus. Nunc iaculis suscipit dui. 5 3 = 125. Water is H 2 O. Nam sit amet sem. Aliquam libero nisi, imperdiet at, tincidunt nec, gravida vehicula, nisl. The New York Times (That’s a citation). Underline. Maecenas ornare tortor. Donec sed tellus eget sapien fringilla nonummy. Mauris a ante. Suspendisse quam sem, consequat at, commodo vitae, feugiat in, nunc. Morbi imperdiet augue quis tellus. HTML and CSS are our tools. Mauris a ante. Suspendisse quam sem, consequat at, commodo vitae, feugiat in, nunc. Morbi imperdiet augue quis tellus. Praesent mattis, massa quis luctus fermentum, turpis mi volutpat justo, eu volutpat enim diam eget metus. To copy a file type COPY filename . Dinner’s at 5:00. Let’s make that 7. This text has been struck. | ||
Lorem ipsum dolor sit amet, test link adipiscing elit. This is strong. Nullam dignissim convallis est. Quisque aliquam. This is emphasized. Donec faucibus. Nunc iaculis suscipit dui. 53 = 125. Water is H2O. Nam sit amet sem. Aliquam libero nisi, imperdiet at, tincidunt nec, gravida vehicula, nisl. The New York Times (That’s a citation). Underline. Maecenas ornare tortor. Donec sed tellus eget sapien fringilla nonummy. Mauris a ante. Suspendisse quam sem, consequat at, commodo vitae, feugiat in, nunc. Morbi imperdiet augue quis tellus. HTML and CSS are our tools. Mauris a ante. Suspendisse quam sem, consequat at, commodo vitae, feugiat in, nunc. Morbi imperdiet augue quis tellus. Praesent mattis, massa quis luctus fermentum, turpis mi volutpat justo, eu volutpat enim diam eget metus. To copy a file type COPY filename. Dinner’s at 5:00. Let’s make that 7. This text has been struck. |
@@ -6,5 +6,5 @@ var fs = require('fs'); | ||
var html = fs.readFileSync(path.join(__dirname, 'sample.html'), 'utf8'); | ||
var html = fs.readFileSync(path.join(__dirname, 'snippet.html'), 'utf8'); | ||
describe('excerpts', function() { | ||
describe('excerpts(html, opts)', function() { | ||
it('should extract 50 words by default', function(done) { | ||
@@ -28,2 +28,11 @@ var text = fs.readFileSync(path.join(__dirname, 'default.txt'), 'utf8'); | ||
it('should accept either number or string options', function(done) { | ||
var text = fs.readFileSync(path.join(__dirname, 'words.txt'), 'utf8'); | ||
var excerpt = excerpts(html, { words: '10' }); | ||
assert.equal(excerpt, text.trim()); | ||
done(); | ||
}); | ||
it('should extract specified characters', function(done) { | ||
@@ -30,0 +39,0 @@ var text = fs.readFileSync(path.join(__dirname, 'characters.txt'), 'utf8'); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
8087
1
103
63
+ Addedcheerio@^0.20.0
+ Addedabab@1.0.4(transitive)
+ Addedacorn@2.7.0(transitive)
+ Addedacorn-globals@1.0.9(transitive)
+ Addedajv@6.12.6(transitive)
+ Addedasn1@0.2.6(transitive)
+ Addedassert-plus@1.0.0(transitive)
+ Addedasynckit@0.4.0(transitive)
+ Addedaws-sign2@0.7.0(transitive)
+ Addedaws4@1.13.2(transitive)
+ Addedbcrypt-pbkdf@1.0.2(transitive)
+ Addedboolbase@1.0.0(transitive)
+ Addedcaseless@0.12.0(transitive)
+ Addedcheerio@0.20.0(transitive)
+ Addedcombined-stream@1.0.8(transitive)
+ Addedcore-util-is@1.0.21.0.3(transitive)
+ Addedcss-select@1.2.0(transitive)
+ Addedcss-what@2.1.3(transitive)
+ Addedcssom@0.3.8(transitive)
+ Addedcssstyle@0.2.37(transitive)
+ Addeddashdash@1.14.1(transitive)
+ Addeddeep-is@0.1.4(transitive)
+ Addeddelayed-stream@1.0.0(transitive)
+ Addeddom-serializer@0.1.1(transitive)
+ Addeddomelementtype@1.3.1(transitive)
+ Addeddomhandler@2.3.0(transitive)
+ Addeddomutils@1.5.1(transitive)
+ Addedecc-jsbn@0.1.2(transitive)
+ Addedentities@1.0.01.1.2(transitive)
+ Addedescodegen@1.14.3(transitive)
+ Addedesprima@4.0.1(transitive)
+ Addedestraverse@4.3.0(transitive)
+ Addedesutils@2.0.3(transitive)
+ Addedextend@3.0.2(transitive)
+ Addedextsprintf@1.3.0(transitive)
+ Addedfast-deep-equal@3.1.3(transitive)
+ Addedfast-json-stable-stringify@2.1.0(transitive)
+ Addedfast-levenshtein@2.0.6(transitive)
+ Addedforever-agent@0.6.1(transitive)
+ Addedform-data@2.3.3(transitive)
+ Addedgetpass@0.1.7(transitive)
+ Addedhar-schema@2.0.0(transitive)
+ Addedhar-validator@5.1.5(transitive)
+ Addedhtmlparser2@3.8.3(transitive)
+ Addedhttp-signature@1.2.0(transitive)
+ Addedinherits@2.0.4(transitive)
+ Addedis-typedarray@1.0.0(transitive)
+ Addedisarray@0.0.1(transitive)
+ Addedisstream@0.1.2(transitive)
+ Addedjsbn@0.1.1(transitive)
+ Addedjsdom@7.2.2(transitive)
+ Addedjson-schema@0.4.0(transitive)
+ Addedjson-schema-traverse@0.4.1(transitive)
+ Addedjson-stringify-safe@5.0.1(transitive)
+ Addedjsprim@1.4.2(transitive)
+ Addedlevn@0.3.0(transitive)
+ Addedlodash@4.17.21(transitive)
+ Addedmime-db@1.52.0(transitive)
+ Addedmime-types@2.1.35(transitive)
+ Addednth-check@1.0.2(transitive)
+ Addednwmatcher@1.4.4(transitive)
+ Addedoauth-sign@0.9.0(transitive)
+ Addedoptionator@0.8.3(transitive)
+ Addedparse5@1.5.1(transitive)
+ Addedperformance-now@2.1.0(transitive)
+ Addedprelude-ls@1.1.2(transitive)
+ Addedpsl@1.9.0(transitive)
+ Addedpunycode@2.3.1(transitive)
+ Addedqs@6.5.3(transitive)
+ Addedreadable-stream@1.1.14(transitive)
+ Addedrequest@2.88.2(transitive)
+ Addedsafe-buffer@5.2.1(transitive)
+ Addedsafer-buffer@2.1.2(transitive)
+ Addedsax@1.4.1(transitive)
+ Addedsource-map@0.6.1(transitive)
+ Addedsshpk@1.18.0(transitive)
+ Addedstring_decoder@0.10.31(transitive)
+ Addedsymbol-tree@3.2.4(transitive)
+ Addedtough-cookie@2.5.0(transitive)
+ Addedtr46@0.0.3(transitive)
+ Addedtunnel-agent@0.6.0(transitive)
+ Addedtweetnacl@0.14.5(transitive)
+ Addedtype-check@0.3.2(transitive)
+ Addeduri-js@4.4.1(transitive)
+ Addeduuid@3.4.0(transitive)
+ Addedverror@1.10.0(transitive)
+ Addedwebidl-conversions@2.0.1(transitive)
+ Addedwhatwg-url-compat@0.6.5(transitive)
+ Addedword-wrap@1.2.5(transitive)
+ Addedxml-name-validator@2.0.1(transitive)
- Removeddownsize@0.0.8
- Removedhtml-to-text@^1.6.0
- Removeddownsize@0.0.8(transitive)
- Removedhtml-to-text@1.6.2(transitive)
- Removedhtmlparser@1.7.7(transitive)
- Removedminimist@0.0.10(transitive)
- Removedoptimist@0.6.1(transitive)
- Removedunderscore@1.13.7(transitive)
- Removedunderscore.string@2.4.0(transitive)
- Removedwordwrap@0.0.3(transitive)
- Removedxregexp@2.0.0(transitive)