Socket
Socket
Sign inDemoInstall

node-readability

Package Overview
Dependencies
Maintainers
1
Versions
27
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

node-readability - npm Package Compare versions

Comparing version 2.2.0 to 3.0.0

9

CHANGELOG.md

@@ -0,1 +1,10 @@

<a name="3.0.0"></a>
## [3.0.0](https://github.com/luin/node-readability/compare/v2.2.0...v3.0.0) (2017-08-10)
Thank [Harold Treen](https://github.com/haroldtreen) and other contributors for the awesome work!
* Replace text node followed by br with a p tag
* Remove lightbox from unlikelyCandidatesRe
* Fix for psychology-today + test
<a name="2.2.0"></a>

@@ -2,0 +11,0 @@ ## [2.2.0](https://github.com/luin/node-readability/compare/v2.1.5...v2.2.0) (2016-03-11)

4

package.json
{
"name": "node-readability",
"version": "2.2.0",
"version": "3.0.0",
"author": "Zihua Li",

@@ -26,3 +26,3 @@ "description": "Turning any web page into a clean view.",

"encoding": "~0.1.7",
"jsdom": "^6.3.0",
"jsdom": "^9.12.0",
"minimist": "^1.2.0",

@@ -29,0 +29,0 @@ "request": "~2.40.0"

@@ -22,3 +22,3 @@ # Readability

Note that as of our 2.0.0 release, this module only works with Node.js >= 2.0. In the meantime you are still welcome to install a release in the 1.x series(by `npm install node-readability@1`) if you use an older Node.js version.
Note that from v2.0.0, this module only works with Node.js >= 2.0. In the meantime you are still welcome to install a release in the 1.x series(by `npm install node-readability@1`) if you use an older Node.js version.

@@ -128,3 +128,3 @@ ## Usage

This lib is using jsdom to parser HTML instead of cheerio because some data such as image size and element visibility isn't able to acquire when using cheerio, which will significantly affect the result.
This lib is using jsdom to parse HTML instead of cheerio because some data such as image size and element visibility isn't able to acquire when using cheerio, which will significantly affect the result.

@@ -131,0 +131,0 @@ ## Contributors

@@ -1,2 +0,2 @@

#!/usr/bin/env iojs
#!/usr/bin/env node
var read = require("./readability.js");

@@ -3,0 +3,0 @@ var argv = require("minimist")(process.argv.slice(2));

@@ -5,5 +5,5 @@ var url = require("url");

var regexps = {
unlikelyCandidatesRe: /combx|modal|lightbox|comment|disqus|foot|header|menu|meta|nav|rss|shoutbox|sidebar|sponsor|social|teaserlist|time|tweet|twitter/i,
okMaybeItsACandidateRe: /and|article|body|column|main/i,
positiveRe: /article|body|content|entry|hentry|page|pagination|post|text/i,
unlikelyCandidatesRe: /combx|modal|comment|disqus|foot|header|menu|meta|nav|rss|shoutbox|sidebar|sponsor|social|teaserlist|time|tweet|twitter/i,
okMaybeItsACandidateRe: /and|article|body|column|main|story|entry|^post/im,
positiveRe: /article|body|content|entry|hentry|page|pagination|post|section|chapter|description|main|blog|text/i,
negativeRe: /combx|comment|contact|foot|footer|footnote|link|media|meta|promo|related|scroll|shoutbox|sponsor|utility|tags|widget/i,

@@ -16,3 +16,4 @@ divToPElementsRe: /<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i,

killBreaksRe: /(<br\s*\/?>(\s|&nbsp;?)*){1,}/g,
videoRe: /http:\/\/(www\.)?(youtube|vimeo|youku|tudou|56|yinyuetai)\.com/i
videoRe: /http:\/\/(www\.)?(youtube|vimeo|youku|tudou|56|yinyuetai)\.com/i,
attributeRe: /blog|post|article/i
};

@@ -69,2 +70,8 @@

}
// Strip out all <script> tags, as they *should* be useless
var scripts = document.getElementsByTagName('script');
[].forEach.call(scripts, function (node) {
node.parentNode.removeChild(node);
});

@@ -96,3 +103,3 @@ // turn all double br's into p's

if (!preserveUnlikelyCandidates) {
var unlikelyMatchString = node.className + node.id;
var unlikelyMatchString = node.className + '\n' + node.id;
if (unlikelyMatchString.search(regexps.unlikelyCandidatesRe) !== -1 && unlikelyMatchString.search(regexps.okMaybeItsACandidateRe) == -1 && node.tagName !== 'HTML' && node.tagName !== "BODY") {

@@ -116,7 +123,16 @@ dbg("Removing unlikely candidate - " + unlikelyMatchString);

if (childNode.nodeType == 3 /*TEXT_NODE*/ ) {
// use span instead of p. Need more tests.
dbg("replacing text node with a span tag with the same content.");
var span = document.createElement('span');
span.innerHTML = childNode.nodeValue;
childNode.parentNode.replaceChild(span, childNode);
var nextSibling = childNode.nextSibling
if (nextSibling && nextSibling.tagName == 'BR') {
dbg("replacing text node followed by br with a p tag with the same content.");
var p = document.createElement('p');
p.innerHTML = childNode.nodeValue;
childNode.parentNode.removeChild(nextSibling)
childNode.parentNode.replaceChild(p, childNode);
} else {
// use span instead of p. Need more tests.
dbg("replacing text node with a span tag with the same content.");
var span = document.createElement('span');
span.innerHTML = childNode.nodeValue;
childNode.parentNode.replaceChild(span, childNode);
}
}

@@ -307,3 +323,3 @@ });

**/
getInnerText = exports.getInnerText = function(e, normalizeSpaces) {
var getInnerText = exports.getInnerText = function(e, normalizeSpaces) {
var textContent = "";

@@ -658,3 +674,3 @@

if (node.attributes.itemtype &&
/blog|post|article/i.test(node.getAttribute('itemtype'))) {
regexps.attributeRe.test(node.getAttribute('itemtype'))) {
node.readability.contentScore += 30;

@@ -661,0 +677,0 @@ }

@@ -82,3 +82,3 @@ var jsdom = require('jsdom');

var title = this._document.title;
var title = _findMetaTitle(this._document) || this._document.title;
var betterTitle;

@@ -142,2 +142,16 @@ var commonSeparatingCharacters = [' | ', ' _ ', ' - ', '«', '»', '—'];

function _findMetaTitle(document) {
var metaTags = document.getElementsByTagName('meta');
var tag;
for(var i = 0; i < metaTags.length; i++) {
tag = metaTags[i];
if(tag.getAttribute('property') === 'og:title' || tag.getAttribute('name') === 'twitter:title'){
return tag.getAttribute('content');
}
}
return null;
}
function _findHTMLCharset(htmlbuffer) {

@@ -144,0 +158,0 @@

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc