Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

indo-news-scraper

Package Overview
Dependencies
Maintainers
1
Versions
9
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

indo-news-scraper - npm Package Compare versions

Comparing version 1.3.0 to 1.4.0

lib/websites/Viva.js

4

lib/index.js

@@ -10,2 +10,3 @@ 'use strict';

const Suara = require('./websites/Suara.js');
const Viva = require('./websites/Viva.js');

@@ -19,3 +20,4 @@ module.exports = {

Tempo: Tempo,
Suara: Suara
Suara: Suara,
Viva: Viva
}

@@ -12,2 +12,4 @@ 'use_strict';

Antara.prototype.headless = true;
Antara.prototype.scrap = (query = null) => {

@@ -23,3 +25,3 @@ let url = Antara.prototype.baseUrl;

return puppeteer
.launch()
.launch({headless: Antara.prototype.headless})
.then(browser => browser.newPage())

@@ -26,0 +28,0 @@ .then(page => {

@@ -12,2 +12,3 @@ 'use_strict';

Detik.prototype.headless = true;
Detik.prototype.convertDate = (dateString) => {

@@ -50,3 +51,3 @@ dateString = dateString.replace('WIB', '');

return puppeteer
.launch()
.launch({headless: Detik.prototype.headless})
.then(browser => browser.newPage())

@@ -53,0 +54,0 @@ .then(page => {

'use_strict';
const fetch = require('node-fetch');
const moment = require('moment');
const cheerio = require("cheerio");
const puppeteer = require("puppeteer");
const moment = require("moment");

@@ -9,4 +10,6 @@ const Kompas = function(){};

Kompas.prototype.source = 'Kompas';
Kompas.prototype.baseUrl = 'https://cse.google.com/cse/element/v1?rsz=filtered_cse&num=10&hl=en&source=gcsc&gss=.com&cselibv=26b8d00a7c7a0812&cx=018212539862037696382:-xa61bkyvao&safe=off&cse_tok=AJvRUv2UpW015_VJ2w-42Op5c5w7:1598375214995&sort=&exp=csqr,cc&callback=google.search.cse.api10905';
Kompas.prototype.baseUrl = 'https://www.kompas.com/tag/';
Kompas.prototype.headless = true;
Kompas.prototype.scrap = (query) => {

@@ -19,44 +22,38 @@ let url = Kompas.prototype.baseUrl;

url+=`&q=${query}`;
url+=`${query}?sort=desc`;
return fetch(url)
.then(res => {
return res.text()
return puppeteer
.launch({headless: Kompas.prototype.headless})
.then(browser => browser.newPage())
.then(page => {
return page.goto(url).then(() => {
return page.content();
});
})
.then(body => {
return Kompas.prototype.processResult(body);
})
.catch(err => {
console.log(err);
});
}
Kompas.prototype.processResult = (rawData) => {
const preparedData = JSON.parse(rawData.substring(35).replace(");", ""));
const result = [];
if(preparedData.results){
let data = preparedData.results;
data.map(v => {
result.push(Kompas.prototype.formatResult(v));
.then(html => {
const $ = cheerio.load(html);
const newsData = [];
$('.article__list').each((e, el) => {
newsData.push({
title: $(el).find('.article__link').html(),
url: $(el).find('.article__link').attr('href'),
img: $(el).find('.article__asset').find('a').find('img').attr('src'),
// date: Kompas.prototype.convertDate($(el).find('.article__date').html())
date: Kompas.prototype.convertDate($(el).find('.article__date').html())
})
});
}
return result;
return newsData;
})
.catch(err => new Error(err));
}
Kompas.prototype.formatResult = (data) => {
return {
'title': data.richSnippet.metatags.ogTitle,
'url': data.url,
'img': !data.richSnippet.cseImage ? null : data.richSnippet.cseImage.src,
'date': Kompas.prototype.convertDate(data.richSnippet.metatags.contentPublisheddate)
};
}
Kompas.prototype.convertDate = (dateString) => {
dateString = dateString.replace(' WIB', '');
dateString = dateString.replace(',', '');
Kompas.prototype.convertDate = (dateString) => {
return moment(dateString).toISOString();
let d = moment(dateString, 'DD/MM/YYY HH:mm');
return d.toISOString();
}
module.exports = new Kompas();

@@ -13,2 +13,4 @@ 'use_strict';

Liputan6.prototype.headless = true;
Liputan6.prototype.scrap = (query = null) => {

@@ -24,5 +26,3 @@ let url = Liputan6.prototype.baseUrl;

return puppeteer
.launch({
headless: true
})
.launch({headless: Liputan6.prototype.headless})
.then(browser => browser.newPage())

@@ -29,0 +29,0 @@ .then(page => {

@@ -12,2 +12,4 @@ 'use_strict';

Republika.prototype.headless = true;
Republika.prototype.scrap = (query = null) => {

@@ -23,3 +25,3 @@ let url = Republika.prototype.baseUrl;

return puppeteer
.launch()
.launch({headless: Republika.prototype.headless})
.then(browser => browser.newPage())

@@ -26,0 +28,0 @@ .then(page => {

@@ -11,2 +11,4 @@ 'use_strict';

Suara.prototype.headless = true;
Suara.prototype.scrap = (query = null) => {

@@ -20,3 +22,3 @@ let url = Suara.prototype.baseUrl;

return puppeteer
.launch()
.launch({headless: Suara.prototype.headless})
.then(browser => browser.newPage())

@@ -23,0 +25,0 @@ .then(page => {

@@ -11,2 +11,4 @@ 'use_strict';

Tempo.prototype.headless = true;
Tempo.prototype.scrap = (query = null) => {

@@ -20,3 +22,3 @@ let url = Tempo.prototype.baseUrl;

return puppeteer
.launch()
.launch({headless: Tempo.prototype.headless})
.then(browser => browser.newPage())

@@ -23,0 +25,0 @@ .then(page => {

{
"name": "indo-news-scraper",
"version": "1.3.0",
"description": "Indonesia online news scrapper made for NodeJS",
"version": "1.4.0",
"description": "A news scraper for javascript that help to scrap news from Indonesian news portal.",
"main": "lib/index.js",

@@ -18,5 +18,13 @@ "scripts": {

"indonesia",
"berita"
"berita",
"nodejs",
"node-modules",
"news api",
"indo-news-scraper",
"news scraper"
],
"author": "Pandu Yudhistira",
"author": {
"name": "Pandu Yudhistira",
"email": "theyudhiztira@gmail.com"
},
"license": "MIT",

@@ -31,5 +39,4 @@ "bugs": {

"moment": "^2.27.0",
"node-fetch": "^2.6.0",
"puppeteer": "^5.2.1"
}
}

@@ -17,9 +17,10 @@ # Indo News Scraper [Beta]

## Available News Portal
- Antara
- Detik
- Antara
- Kompas
- Liputan6
- Republika
- Liputan6
- Suara
- Tempo
- Suara
- Viva

@@ -26,0 +27,0 @@ ## Usage

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc