node-readability
Advanced tools
Comparing version 0.10.0 to 1.0.0
{ | ||
"name": "node-readability", | ||
"version": "0.10.0", | ||
"version": "1.0.0", | ||
"author": "Zihua Li", | ||
@@ -5,0 +5,0 @@ "description": "Turning any web page into a clean view.", |
@@ -33,3 +33,3 @@ # Readability | ||
Example | ||
```javascript | ||
var read = require('node-readability'); | ||
@@ -50,4 +50,7 @@ | ||
console.log(meta); | ||
// Close article to clean up jsdom and prevent leaks | ||
article.close(); | ||
}); | ||
``` | ||
**NB** If the page has been marked with charset other than utf-8, it will be converted automatically. Charsets such as GBK, GB2312 is also supported. | ||
@@ -60,6 +63,9 @@ | ||
node-readability has additional option cleanRules which allow set your own validation rule for tags. | ||
node-readability has two additional options: | ||
- `cleanRulers` which allow set your own validation rule for tags. | ||
If true rule is valid, otherwise no. | ||
options.cleanRules = [callback(obj, tagName)] | ||
``` | ||
options.cleanRulers = [callback(obj, tagName)] | ||
```javascript | ||
read(url, { | ||
@@ -77,2 +83,20 @@ cleanRulers : [ | ||
``` | ||
- `preprocess` which should be a function to check or modify downloaded source before passing it to readability. | ||
options.preprocess = callback(source, response, content_type, callback); | ||
```javascript | ||
read(url, { | ||
preprocess: function(source, response, content_type, callback) { | ||
if (source.length > maxBodySize) { | ||
return callback(new Error('too big')); | ||
} | ||
callback(null, source); | ||
}, function(err, article, response) { | ||
//... | ||
}); | ||
``` | ||
## article object | ||
@@ -79,0 +103,0 @@ |
@@ -13,4 +13,5 @@ var jsdom = require('jsdom'); | ||
function Readability(document, options) { | ||
this._document = document; | ||
function Readability(window, options) { | ||
this._window = window; | ||
this._document = window.document; | ||
this.iframeLoads = 0; | ||
@@ -43,2 +44,8 @@ // Cache the body HTML in case we need to re-use it later | ||
Readability.prototype.close = function() { | ||
this._window && this._window.close(); | ||
this._window = null; | ||
this._document = null; | ||
} | ||
Readability.prototype.getContent = function(notDeprecated) { | ||
@@ -158,5 +165,7 @@ if (!notDeprecated) { | ||
var overrideEncoding = options.encoding; | ||
var overrideEncoding = options.encoding, | ||
preprocess = options.preprocess; | ||
options.encoding = null; | ||
delete options.preprocess; | ||
@@ -181,3 +190,12 @@ if (html.indexOf('<') === -1) { | ||
jsdomParse(null, res, buffer.toString()); | ||
buffer = buffer.toString(); | ||
if (preprocess) { | ||
preprocess(buffer, res, content_type, function(err, buffer) { | ||
if (err) return callback(err); | ||
jsdomParse(null, res, buffer); | ||
}); | ||
} else { | ||
jsdomParse(null, res, buffer); | ||
} | ||
}); | ||
@@ -213,3 +231,3 @@ } else { | ||
// add meta information to callback | ||
callback(null, new Readability(window.document, options), meta); | ||
callback(null, new Readability(window, options), meta); | ||
} | ||
@@ -216,0 +234,0 @@ }); |
require('./mock-helpers.js'); | ||
var should = require('should'); | ||
var read = require('../src/readability'); | ||
@@ -31,3 +32,47 @@ | ||
describe('preprocess', function() { | ||
it('should preprocess document', function(done) { | ||
read('http://colorlines.com/archives/2011/08/dispatch_from_angola_faith-based_slavery_in_a_louisiana_prison.html', | ||
{ | ||
preprocess: function(source, response, content_type, callback) { | ||
should.exist(source); | ||
source.length.should.equal(50734); | ||
should.exist(response); | ||
should.exist(response.headers); | ||
should.exist(content_type); | ||
should.exist(content_type.charset); | ||
callback(null, '<html><head><title>some other title</title></head><body></body></html>'); | ||
} | ||
}, | ||
function(err, read) { | ||
should.not.exist(err); | ||
should.exist(read); | ||
read.title.should.equal('some other title') | ||
read.content.should.equal(false); | ||
done(); | ||
}); | ||
}); | ||
it('should stop processing document', function(done) { | ||
read('http://www.whitehouse.gov/', { | ||
preprocess: function(source, response, content_type, callback) { | ||
should.exist(source); | ||
source.length.should.equal(71002); | ||
should.exist(response); | ||
should.exist(response.headers); | ||
should.exist(content_type); | ||
should.exist(content_type.charset); | ||
callback(new Error('stop')); | ||
} | ||
}, function(err, read) { | ||
should.not.exist(read); | ||
should.exist(err); | ||
err.message.should.equal('stop'); | ||
done(); | ||
}); | ||
}); | ||
}); | ||
}); | ||
}); |
@@ -12,2 +12,4 @@ require('./mock-helpers.js'); | ||
html.should.include('<title>Dispatch From Angola: Faith-Based Slavery in a Louisiana Prison - COLORLINES</title>'); | ||
read.close.should.be.a.Function | ||
read.close(); | ||
done(); | ||
@@ -21,2 +23,4 @@ }); | ||
read.title.should.equal('The White House'); | ||
read.close.should.be.a.Function | ||
read.close(); | ||
done(); | ||
@@ -29,2 +33,4 @@ }); | ||
read.document.body.innerHTML.should.include('Hello world!'); | ||
read.close.should.be.a.Function | ||
read.close(); | ||
done(); | ||
@@ -31,0 +37,0 @@ }); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No v1
QualityPackage is not semver >=1. This means it is not stable and does not support ^ ranges.
Found 1 instance in 1 package
3690143
1318
0
134