indo-news-scraper
Advanced tools
Comparing version 1.3.0 to 1.4.0
@@ -10,2 +10,3 @@ 'use strict'; | ||
const Suara = require('./websites/Suara.js'); | ||
const Viva = require('./websites/Viva.js'); | ||
@@ -19,3 +20,4 @@ module.exports = { | ||
Tempo: Tempo, | ||
Suara: Suara | ||
Suara: Suara, | ||
Viva: Viva | ||
} |
@@ -12,2 +12,4 @@ 'use_strict'; | ||
Antara.prototype.headless = true; | ||
Antara.prototype.scrap = (query = null) => { | ||
@@ -23,3 +25,3 @@ let url = Antara.prototype.baseUrl; | ||
return puppeteer | ||
.launch() | ||
.launch({headless: Antara.prototype.headless}) | ||
.then(browser => browser.newPage()) | ||
@@ -26,0 +28,0 @@ .then(page => { |
@@ -12,2 +12,3 @@ 'use_strict'; | ||
Detik.prototype.headless = true; | ||
Detik.prototype.convertDate = (dateString) => { | ||
@@ -50,3 +51,3 @@ dateString = dateString.replace('WIB', ''); | ||
return puppeteer | ||
.launch() | ||
.launch({headless: Detik.prototype.headless}) | ||
.then(browser => browser.newPage()) | ||
@@ -53,0 +54,0 @@ .then(page => { |
'use_strict'; | ||
const fetch = require('node-fetch'); | ||
const moment = require('moment'); | ||
const cheerio = require("cheerio"); | ||
const puppeteer = require("puppeteer"); | ||
const moment = require("moment"); | ||
@@ -9,4 +10,6 @@ const Kompas = function(){}; | ||
Kompas.prototype.source = 'Kompas'; | ||
Kompas.prototype.baseUrl = 'https://cse.google.com/cse/element/v1?rsz=filtered_cse&num=10&hl=en&source=gcsc&gss=.com&cselibv=26b8d00a7c7a0812&cx=018212539862037696382:-xa61bkyvao&safe=off&cse_tok=AJvRUv2UpW015_VJ2w-42Op5c5w7:1598375214995&sort=&exp=csqr,cc&callback=google.search.cse.api10905'; | ||
Kompas.prototype.baseUrl = 'https://www.kompas.com/tag/'; | ||
Kompas.prototype.headless = true; | ||
Kompas.prototype.scrap = (query) => { | ||
@@ -19,44 +22,38 @@ let url = Kompas.prototype.baseUrl; | ||
url+=`&q=${query}`; | ||
url+=`${query}?sort=desc`; | ||
return fetch(url) | ||
.then(res => { | ||
return res.text() | ||
return puppeteer | ||
.launch({headless: Kompas.prototype.headless}) | ||
.then(browser => browser.newPage()) | ||
.then(page => { | ||
return page.goto(url).then(() => { | ||
return page.content(); | ||
}); | ||
}) | ||
.then(body => { | ||
return Kompas.prototype.processResult(body); | ||
}) | ||
.catch(err => { | ||
console.log(err); | ||
}); | ||
} | ||
Kompas.prototype.processResult = (rawData) => { | ||
const preparedData = JSON.parse(rawData.substring(35).replace(");", "")); | ||
const result = []; | ||
if(preparedData.results){ | ||
let data = preparedData.results; | ||
data.map(v => { | ||
result.push(Kompas.prototype.formatResult(v)); | ||
.then(html => { | ||
const $ = cheerio.load(html); | ||
const newsData = []; | ||
$('.article__list').each((e, el) => { | ||
newsData.push({ | ||
title: $(el).find('.article__link').html(), | ||
url: $(el).find('.article__link').attr('href'), | ||
img: $(el).find('.article__asset').find('a').find('img').attr('src'), | ||
// date: Kompas.prototype.convertDate($(el).find('.article__date').html()) | ||
date: Kompas.prototype.convertDate($(el).find('.article__date').html()) | ||
}) | ||
}); | ||
} | ||
return result; | ||
return newsData; | ||
}) | ||
.catch(err => new Error(err)); | ||
} | ||
Kompas.prototype.formatResult = (data) => { | ||
return { | ||
'title': data.richSnippet.metatags.ogTitle, | ||
'url': data.url, | ||
'img': !data.richSnippet.cseImage ? null : data.richSnippet.cseImage.src, | ||
'date': Kompas.prototype.convertDate(data.richSnippet.metatags.contentPublisheddate) | ||
}; | ||
} | ||
Kompas.prototype.convertDate = (dateString) => { | ||
dateString = dateString.replace(' WIB', ''); | ||
dateString = dateString.replace(',', ''); | ||
Kompas.prototype.convertDate = (dateString) => { | ||
return moment(dateString).toISOString(); | ||
let d = moment(dateString, 'DD/MM/YYY HH:mm'); | ||
return d.toISOString(); | ||
} | ||
module.exports = new Kompas(); |
@@ -13,2 +13,4 @@ 'use_strict'; | ||
Liputan6.prototype.headless = true; | ||
Liputan6.prototype.scrap = (query = null) => { | ||
@@ -24,5 +26,3 @@ let url = Liputan6.prototype.baseUrl; | ||
return puppeteer | ||
.launch({ | ||
headless: true | ||
}) | ||
.launch({headless: Liputan6.prototype.headless}) | ||
.then(browser => browser.newPage()) | ||
@@ -29,0 +29,0 @@ .then(page => { |
@@ -12,2 +12,4 @@ 'use_strict'; | ||
Republika.prototype.headless = true; | ||
Republika.prototype.scrap = (query = null) => { | ||
@@ -23,3 +25,3 @@ let url = Republika.prototype.baseUrl; | ||
return puppeteer | ||
.launch() | ||
.launch({headless: Republika.prototype.headless}) | ||
.then(browser => browser.newPage()) | ||
@@ -26,0 +28,0 @@ .then(page => { |
@@ -11,2 +11,4 @@ 'use_strict'; | ||
Suara.prototype.headless = true; | ||
Suara.prototype.scrap = (query = null) => { | ||
@@ -20,3 +22,3 @@ let url = Suara.prototype.baseUrl; | ||
return puppeteer | ||
.launch() | ||
.launch({headless: Suara.prototype.headless}) | ||
.then(browser => browser.newPage()) | ||
@@ -23,0 +25,0 @@ .then(page => { |
@@ -11,2 +11,4 @@ 'use_strict'; | ||
Tempo.prototype.headless = true; | ||
Tempo.prototype.scrap = (query = null) => { | ||
@@ -20,3 +22,3 @@ let url = Tempo.prototype.baseUrl; | ||
return puppeteer | ||
.launch() | ||
.launch({headless: Tempo.prototype.headless}) | ||
.then(browser => browser.newPage()) | ||
@@ -23,0 +25,0 @@ .then(page => { |
{ | ||
"name": "indo-news-scraper", | ||
"version": "1.3.0", | ||
"description": "Indonesia online news scrapper made for NodeJS", | ||
"version": "1.4.0", | ||
"description": "A news scraper for javascript that help to scrap news from Indonesian news portal.", | ||
"main": "lib/index.js", | ||
@@ -18,5 +18,13 @@ "scripts": { | ||
"indonesia", | ||
"berita" | ||
"berita", | ||
"nodejs", | ||
"node-modules", | ||
"news api", | ||
"indo-news-scraper", | ||
"news scraper" | ||
], | ||
"author": "Pandu Yudhistira", | ||
"author": { | ||
"name": "Pandu Yudhistira", | ||
"email": "theyudhiztira@gmail.com" | ||
}, | ||
"license": "MIT", | ||
@@ -31,5 +39,4 @@ "bugs": { | ||
"moment": "^2.27.0", | ||
"node-fetch": "^2.6.0", | ||
"puppeteer": "^5.2.1" | ||
} | ||
} |
@@ -17,9 +17,10 @@ # Indo News Scraper [Beta] | ||
## Available News Portal | ||
- Antara | ||
- Detik | ||
- Antara | ||
- Kompas | ||
- Liputan6 | ||
- Republika | ||
- Liputan6 | ||
- Suara | ||
- Tempo | ||
- Suara | ||
- Viva | ||
@@ -26,0 +27,0 @@ ## Usage |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Network access
Supply chain riskThis module accesses the network.
Found 1 instance in 1 package
22408
4
12
456
43
0
- Removednode-fetch@^2.6.0