subreddit-wiki-scrapper
Advanced tools
Comparing version 0.0.1 to 0.0.2
17
index.js
@@ -12,2 +12,3 @@ const | ||
* @property {boolean} [silent] operate in silent mode, there will be no logs in the terminal - default true | ||
* @property {number} [timeout] page loading timeout in ms - default 30000 (30s) | ||
*/ | ||
@@ -37,3 +38,3 @@ | ||
const page = await browser.newPage() | ||
// page.setDefaultTimeout(120000) | ||
typeof opts.timeout === "number" && page.setDefaultTimeout(opts.timeout) | ||
page.setRequestInterception(true) | ||
@@ -45,3 +46,3 @@ page.on("request", req => { | ||
opts.silent === false && console.log("Downloading index") | ||
await page.goto(`https://www.reddit.com/r/${subreddit}/wiki/index`) | ||
@@ -66,3 +67,3 @@ | ||
await fs.writeFile(`${outDir}/index.html`, wiki).then(() => doneScraping.add("index")) | ||
wiki = null | ||
@@ -94,3 +95,3 @@ await page.close() | ||
async function recursiveScrape(outDir, browser, subreddit, link, doneScraping, opts) { | ||
if (doneScraping.has(link)) return | ||
@@ -102,3 +103,3 @@ | ||
// page.setDefaultTimeout(120000) | ||
typeof opts.timeout === "number" && page.setDefaultTimeout(opts.timeout) | ||
page.setRequestInterception(true) | ||
@@ -110,3 +111,3 @@ page.on("request", req => { | ||
await page.goto(`https://www.reddit.com/r/${subreddit}/wiki/${link}`) | ||
const wikiError = await page.waitForSelector(".md.wiki").then(() => 0).catch(err => err) | ||
@@ -118,3 +119,3 @@ | ||
} | ||
let [wiki, links] = await page.evaluate(() => [ | ||
@@ -124,3 +125,3 @@ document.querySelector(".md.wiki").innerHTML, | ||
]) | ||
links = links.filter(link => link.indexOf(`/r/${subreddit}/wiki/`) !== -1).map(link => link.substring(9 + subreddit.length)) | ||
@@ -127,0 +128,0 @@ |
{ | ||
"name": "subreddit-wiki-scrapper", | ||
"version": "0.0.1", | ||
"version": "0.0.2", | ||
"description": "Download any subreddit's wiki and archive it.", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -35,2 +35,2 @@ # Subreddit-Wiki-Scrapper | ||
- `silent` - operate in silent mode, there will be no logs in the terminal - default true | ||
- `timeout` - page loading timeout in ms - default 30000 (30s) |
7366
97
35