readabilitySAX
Advanced tools
Comparing version 1.4.0 to 1.4.1
#!/usr/bin/env node | ||
function write(){ | ||
process.stdout.write(Array.prototype.join.call(arguments, " ") + "\n"); | ||
}; | ||
if(process.argv.length < 3 || !/^https?:\/\//.test(process.argv[2])){ | ||
console.log("Usage:", "readability", "http://domain.tld/sub"); | ||
write("Usage:", "readability", "http://domain.tld/sub", "[format]"); | ||
return; | ||
@@ -9,10 +13,10 @@ } | ||
require("./getURL.js")(process.argv[2], process.argv[3] === "html" ? "html" : "text", function(result){ | ||
if(result.error) return console.log("Error:", result.text); | ||
if(result.error) return write("ERROR:", result.text); | ||
//else | ||
console.log("TITLE:", result.title); | ||
console.log("SCORE:", result.score); | ||
if(result.nextPage) console.log("NEXT PAGE:", result.nextPage); | ||
console.log("LENGTH:", result.textLength); | ||
console.log(""); | ||
write("TITLE:", result.title); | ||
write("SCORE:", result.score); | ||
if(result.nextPage) write("NEXT PAGE:", result.nextPage); | ||
write("LENGTH:", result.textLength); | ||
write(""); | ||
@@ -25,5 +29,5 @@ var text; | ||
} | ||
console.log(text); | ||
write(text); | ||
process.exit(); | ||
}); |
var WritableStream = require("./WritableStream.js"), | ||
minreq = require("minreq"), | ||
url = require("url"), | ||
processData = require("./index.js").process; | ||
processData = require("./process.js"); | ||
@@ -6,0 +6,0 @@ module.exports = function(uri, format, cb){ |
{ | ||
"name": "readabilitySAX", | ||
"version": "1.4.0", | ||
"version": "1.4.1", | ||
"description": "the readability script ported to a sax parser", | ||
@@ -5,0 +5,0 @@ "author": "Felix Boehm <me@feedic.com>", |
@@ -160,5 +160,4 @@ /* | ||
re_negative = /com(?:bx|ment|-)|contact|foot(?:er|note)?|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/, | ||
re_unlikelyCandidates = /ad-break|agegate|auth?or|com(?:bx|ment|munity)|disqus|extra|foot|header|ignore|menu|navi|pag(?:er|ination)|popup|postinfo|remark|rss|shoutbox|sidebar|sponsor|teaserlist|tweet|twitter|unrelated/, | ||
re_unlikelyCandidates = /ad-break|agegate|auth?or|bookmark|cat|com(?:bx|ment|munity)|date|disqus|extra|foot|header|ignore|info|links|menu|nav|pag(?:er|ination)|popup|related|remark|rss|shoutbox|sidebar|similar|social|sponsor|teaserlist|time|tweet|twitter/, | ||
re_okMaybeItsACandidate = /and|article|body|column|main|shadow/, | ||
re_linkLists = /bookmark|cat|date|links|nav|related|similar|social|time/, // TODO merge this with unlikelyCandidates? | ||
@@ -182,2 +181,3 @@ re_sentence = /\. |\.$/, | ||
re_closing = /\/?(?:#.*)?$/, | ||
re_imgUrl = /\.(gif|jpe?g|png|webp)$/i, | ||
@@ -201,2 +201,3 @@ re_commas = /,[\s\,]*/g; | ||
cleanAttributes: true, | ||
replaceImgs: true, | ||
searchFurtherPages: true, | ||
@@ -379,2 +380,3 @@ linksToSkip: {}, //pages that are already parsed | ||
if(!value) return; | ||
name = name.toLowerCase(); | ||
@@ -466,3 +468,2 @@ var elem = this._currentElement; | ||
} | ||
//if(re_linkLists.test(elem.elementData)) return; // TODO | ||
if(tagName === "div" | ||
@@ -514,2 +515,11 @@ && elem.children.length === 1 | ||
} | ||
if(this._settings.replaceImgs | ||
&& tagName === "a" | ||
&& elem.children.length === 1 | ||
&& elem.children[0].name === "img" | ||
&& re_imgUrl.test(elem.attributes.href) | ||
){ | ||
elem = elem.children[0]; | ||
elem.attributes.src = elem.parent.attributes.href; | ||
} | ||
@@ -516,0 +526,0 @@ elem.parent.children.push(elem); |
93482
874