New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

read-art

Package Overview
Dependencies
Maintainers
1
Versions
66
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

read-art - npm Package Compare versions

Comparing version 0.0.6 to 0.0.7

4

examples/simple.js
var read = require('../');
read('http://auto.rednet.cn/html/tupian/20145/20145124642220.html', function(err, art){
read('http://www.tianjinwe.com/rollnews/201405/t20140512_8874908.html', {
dataType: 'json'
}, function(err, art){
if(err){

@@ -5,0 +7,0 @@ console.log('[ERROR]', err.message);

@@ -27,6 +27,3 @@ // Copyright 2014 Tjatse

* @param uri uri or html
* @param options including:
* cacheable: false as default.
* killBreaks: true as default.
* lowerCaseTags: true as default.
* @param options reference to https://github.com/Tjatse/node-readability#options
* @param callback callback, have two arguments been passed:

@@ -49,4 +46,5 @@ * 1: error

}
defineBoolean(options, 'killBreaks', true);
defineBoolean(options, 'lowerCaseTags', true);
defineOption(options, 'killBreaks', true);
defineOption(options, 'lowerCaseTags', true);
defineOption(options, 'dataType', 'html');

@@ -95,3 +93,3 @@ // indicating uri is html or url.

o.html = o.html.replace(/(<br\s*\/?>(\s|&nbsp;?)*){1,}/g,'<br />');
// remove formats like \r\t\n
// remove tab symbols like \r\t\n
o.html = o.html.replace(/([\n\r\t]*){2,}/gi, '');

@@ -109,3 +107,3 @@ }

/**
* Define property of object to default boolean value.
* Define property of object to default value.
* @param options option object

@@ -115,3 +113,3 @@ * @param k key

*/
function defineBoolean(options, k, v){
function defineOption(options, k, v){
if(typeof options[k] == 'undefined'){

@@ -118,0 +116,0 @@ options[k] = v;

@@ -62,4 +62,17 @@ // Copyright 2014 Tjatse

// else read it by article reader.
var node = read(this.$, this.options);
var content = ((node && node.length > 0) ? node.html() : '' );
var node = read(this.$, this.options),
content;
if((node && node.length > 0)){
switch(this.options.dataType){
case 'text':
content = node.text();
break;
case 'html':
default:
content = node.html();
break;
}
}else{
content = '';
}
// if cacheable, cache it.

@@ -66,0 +79,0 @@ if(this.caches){

@@ -129,3 +129,2 @@ // Copyright 2014 Tjatse

node.data(scoreKey, score);
if(!topCandidate || score > topCandidate.data(scoreKey)){

@@ -148,3 +147,3 @@ topCandidate = node;

parent, siblings;
if((parent == topCandidate.parent()) && parent.length > 0 && parent.get(0).name.toLowerCase() != 'body'){
if((parent = topCandidate.parent()) && parent.length > 0 && parent.get(0).name.toLowerCase() != 'body'){
siblings = parent.children();

@@ -158,2 +157,3 @@ }else{

append = false;
if(node.is(topCandidate) || (node.data(scoreKey) || 0) > siblingScoreThreshold){

@@ -160,0 +160,0 @@ append = true;

{
"name": "read-art",
"version": "0.0.6",
"version": "0.0.7",
"description": "Scrape article from any page, automatically, make web page readability.",

@@ -5,0 +5,0 @@ "main": "index.js",

@@ -69,2 +69,5 @@ # read-art -- readability reference to Arc90's

## Options
### dataType
The data type of article content, including: html, text.
### cacheable

@@ -74,3 +77,3 @@ A value indicating whether cache body && title.

### killBreaks
Kill breaks in the HTML, and convert them to simple `<br />`.
Kill breaks, blanks, tab symbols(\r\t\n) into one <br />.

@@ -77,0 +80,0 @@ ###options from [cheerio](https://github.com/cheeriojs/cheerio)

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc