New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

confidencial-ni-node

Package Overview
Dependencies
Maintainers
1
Versions
10
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

confidencial-ni-node - npm Package Compare versions

Comparing version 0.0.9 to 0.0.10

example/test.get-all-links.js

322

lib/confidencial.js

@@ -8,3 +8,5 @@ // Load modules

var async = require('async');
var jquery = fs.readFileSync(require('path').resolve(__dirname, 'jquery.min.js')).toString();
var jquery = fs.readFileSync(
require('path').resolve(__dirname, 'jquery.min.js')
).toString();

@@ -31,196 +33,180 @@ // confidencial categories

// article scraping data
var parseArticleOptions = {
domain: "http://www.confidencial.com.ni/",
domain: 'http://www.confidencial.com.ni/',
elements: [
{
name: 'title',
sel: function ($) {
var result = $('#articleheader h2').text().trim();
return ( !S(result).isEmpty() ) ? result : '';
}
},
{
name: 'title_sub',
sel: function($) {
var result = $('#articleheader h3').text().trim();
return ( !S(result).isEmpty() ) ? result : '';
}
},
{
name: 'title_paragraph',
sel: function($) {
var result = $('#articleheader p.bold').text().trim();
return ( !S(result).isEmpty() ) ? result : '';
}
},
{
name: 'author',
sel: function($) {
var result = $('#articleheader p.authorname').text().trim().split("|")[0].trim();
return ( !S(result).isEmpty() ) ? result : '';
}
},
{
name: 'date',
sel: function($) {
var result = $('#articleheader p.authorname').text();
if( !S(result).isEmpty() ){
result = result.match( /(\d{1,2}\/\d{1,2}\/\d{4})/g );
}
{
name: 'title',
sel: function ($) {
var result = $('#articleheader h2').text().trim();
return (!S(result).isEmpty()) ? result : '';
}
},
{
name: 'title_sub',
sel: function($) {
var result = $('#articleheader h3').text().trim();
return (!S(result).isEmpty()) ? result : '';
}
},
{
name: 'title_paragraph',
sel: function($) {
var result = $('#articleheader p.bold').text().trim();
return (!S(result).isEmpty()) ? result : '';
}
},
{
name: 'author',
sel: function($) {
var result = $('#articleheader p.authorname').text().
trim().split('|')[0].trim();
return (!S(result).isEmpty()) ? result : '';
}
},
{
name: 'date',
sel: function($) {
var result = $('#articleheader p.authorname').text();
if (!S(result).isEmpty()) {
result = result.match(/(\d{1,2}\/\d{1,2}\/\d{4})/g);
}
return (!S(result).isEmpty()) ? S(result).left(9).s : '';
}
},
{
name: 'images',
sel: function($) {
var result = [];
var array = $('article img').map(function() {
return $(this).attr('src');
}).get();
array.forEach(function(item) {
result.push(parseArticleOptions.domain + item);
});
return (!S(result).isEmpty()) ? result : [];
}
},
{
name: 'category',
sel: function($) {
var result = $('#quicknav').text();
if (!S(result).isEmpty()) {
result = S(result).trim().s;
result = S(result).replaceAll('Confidencial', '').s;
result = S(result).replaceAll('Leer artículo', '').s;
result = S(result).replaceAll('»', '').s;
result = S(result).trim().s;
}
return (!S(result).isEmpty()) ? S(result).trim().s : '';
}
},
{
name: 'content',
sel: function($) {
var newResult = '';
var result = $('article div.content_article div.text_article').html();
if (!S(result).isEmpty()) {
result = result.trim();
newResult = S(result).stripTags('p,br,strong').s;
newResult = S(newResult).collapseWhitespace().s;
newResult = S(newResult).replaceAll('<br />', '\n').s;
newResult = S(newResult).replaceAll('<br /> <br />', '\n').s;
newResult = S(newResult).replaceAll('<br /><br />', '\n').s;
newResult = S(newResult).replaceAll('<p>', '').s;
newResult = S(newResult).replaceAll('</p>', '\n').s;
newResult = S(newResult).replaceAll('&nbsp;', '').s;
newResult = S(newResult.replace(/<a.*href="(.*?)".*>(.*?)<\/a>/gi, '$2 $1')).trim().s;
newResult = newResult.replace(/<(?:.|\n)*?>/gm, '');
if (S(newResult).endsWith('\n')) {
return ( !S(result).isEmpty() ) ? S(result).left(9).s : '';
}
},
{
name: 'images',
sel: function($) {
var result = [];
var array = $("article img").map(function() {
return $(this).attr("src");
}).get();
array.forEach(function(item){
result.push( parseArticleOptions.domain + item );
});
return ( !S(result).isEmpty() ) ? result : [];
}
},
{
name: 'category',
sel: function($) {
var result = $("#quicknav").text();
if( !S(result).isEmpty() ){
result = S(result).trim().s;
result = S(result).replaceAll('Confidencial', '').s;
result = S(result).replaceAll('Leer artículo', '').s;
result = S(result).replaceAll('»', '').s;
result = S(result).trim().s;
}
return ( !S(result).isEmpty() ) ? S(result).trim().s : '';
}
},
{
name: 'content',
sel: function($) {
var new_result = "";
var result = $("article div.content_article div.text_article").html();
if( !S(result).isEmpty() ){
result = result.trim();
new_result = S( result ).stripTags('p,br,strong').s;
new_result = S( new_result ).collapseWhitespace().s;
new_result = S( new_result ).replaceAll('<br />', '\n').s;
new_result = S( new_result ).replaceAll('<br /> <br />', '\n').s;
new_result = S( new_result ).replaceAll('<br /><br />', '\n').s;
new_result = S( new_result ).replaceAll('<p>', '').s;
new_result = S( new_result ).replaceAll('</p>', '\n').s;
new_result = S( new_result ).replaceAll('&nbsp;', '').s;
new_result = S( new_result.replace(/<a.*href="(.*?)".*>(.*?)<\/a>/gi, "$2 $1") ).trim().s;
new_result = new_result.replace(/<(?:.|\n)*?>/gm, '');
if( S( new_result).endsWith('\n') ){
}
}
return ( !S(result).isEmpty() ) ? S(new_result).trim().s : '';
}
}
return (!S(result).isEmpty()) ? S(newResult).trim().s : '';
}
}
]
};
};
// simple function to compare if exist item in array
function inArray(needle, haystack) {
var length = haystack.length;
for(var i = 0; i < length; i++) {
if(haystack[i] == needle) return true;
var length = haystack.length;
for (var i = 0; i < length; i++) {
if (haystack[i] == needle) {
return true;
}
return false;
}
return false;
}
// function to parse data
function parse(site, callback) {
jsdom.env({
url: site,
scripts: ["http://code.jquery.com/jquery.js"],
done: function (errors, window) {
var $ = window.$;
callback($, errors);
}
});
jsdom.env({
url: site,
scripts: ['http://code.jquery.com/jquery.js'],
done: function (errors, window) {
var $ = window.$;
callback($, errors);
}
});
};
// Global function to get all links from all categories
module.exports.getAllLinks = function(callback){
function out(){
var result = [];
module.exports.getAllLinks = function(callback) {
function out() {
var result = [];
var fetch = function(url,cb){
request(url, function(err,response,body){
if ( err ){
cb( err );
} else {
cb( null, body ); // First param indicates error, null=> no error
}
});
var fetch = function(url, cb) {
request(url, function(err, response, body) {
if (err) {
cb(err);
}
else {
cb(null, body); // First param indicates error, null=> no error
}
});
}
async.map(categories, fetch, function(err, results){
if ( err){
console.log(err);
// either file1, file2 or file3 has raised an error, so you should not use results and handle the error
} else {
results.forEach(function(category){
$ = cheerio.load(category);
var links = $('.article h3 a,article h2 a'); //use your CSS selector here
$(links).each(function(i, link){
var article_url = 'http://confidencial.com.ni/' + $(link).attr('href');
if( !inArray( article_url, result ) ){
result.push( article_url );
}
});
});
callback(result);
async.map(categories, fetch, function(err, results) {
if (err) {
console.log(err);
// either file1, file2 or file3 has raised an error, so you should not use results and handle the error
} else {
results.forEach(function(category) {
$ = cheerio.load(category);
var links = $('.article h3 a,article h2 a'); //use your CSS selector here
$(links).each(function(i, link) {
var articleUrl = 'http://confidencial.com.ni/' +
$(link).attr('href');
if (!inArray(articleUrl, result)) {
result.push(articleUrl);
}
});
});
}
out();
callback(result);
}
});
}
out();
}
// global function to get data from article url
module.exports.getArticle = function(site, cb){
module.exports.getArticle = function(site, cb) {
function getId(url) {
var id = url.split('/');
return id[4];
}
function getId(url){
var id = url.split('/');
return id[4];
}
function out(url) {
parse(url, function ($, err) {
var result = {};
result.id = getId(url);
result.url = url;
parseArticleOptions.elements.forEach(function (elem) {
result[elem.name] = elem.sel($);
});
cb(result, err);
});
}
out(site);
function out(url) {
parse(url, function ($, err) {
var result = {};
result.id = getId(url);
result.url = url;
parseArticleOptions.elements.forEach(function (elem) {
result[elem.name] = elem.sel($);
});
cb(result, err);
});
}
out(site);
}
{
"name": "confidencial-ni-node"
, "description": "Web scraping http://www.confidencial.com.ni/"
, "version": "0.0.9"
, "version": "0.0.10"
, "author": "Paulo McNally <paulomcnally@gmail.com>"

@@ -24,2 +24,2 @@ , "keywords": ["nicaragua", "diario", "news"]

}
}
}

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc