nitter-scraper
Advanced tools
Comparing version
@@ -5,6 +5,5 @@ #!/usr/bin/env node | ||
const scraper_1 = require("./scraper"); | ||
const storage_1 = require("./utils/storage"); | ||
async function main() { | ||
try { | ||
const username = "wslyvh"; // Twitter username to scrape (without @) | ||
const username = "NoirLang"; // Twitter username to scrape (without @) | ||
console.log(`Starting Twitter scraper for @${username}`); | ||
@@ -14,5 +13,14 @@ // Fetch tweets | ||
lastWeek.setDate(lastWeek.getDate() - 7); | ||
const tweets = await (0, scraper_1.fetchTweets)(username, lastWeek, 3, true); | ||
const promises = await Promise.all([ | ||
(0, scraper_1.fetchTweets)(username, lastWeek, 10, true), | ||
(0, scraper_1.fetchTweets)(username, lastWeek, 10, true), | ||
(0, scraper_1.fetchTweets)(username, lastWeek, 10, true), | ||
(0, scraper_1.fetchTweets)(username, lastWeek, 10, true), | ||
(0, scraper_1.fetchTweets)(username, lastWeek, 10, true), | ||
]); | ||
for (const [i, tweets] of promises.entries()) { | ||
console.log(`Call ${i + 1}: ${tweets.length} tweets`); | ||
} | ||
// Save tweets to file | ||
(0, storage_1.saveTweets)(tweets); | ||
// saveTweets(tweets); | ||
} | ||
@@ -19,0 +27,0 @@ catch (error) { |
@@ -49,4 +49,4 @@ "use strict"; | ||
"https://nitter.net", | ||
"https://nitter.privacyredirect.com", | ||
"https://nitter.tiekoetter.com", | ||
// "https://nitter.privacyredirect.com", | ||
// "https://nitter.tiekoetter.com", | ||
]; | ||
@@ -70,3 +70,3 @@ const REFERERS = [ | ||
*/ | ||
function extractTweetsFromHtml(html, username, existingTweets, sinceDate) { | ||
function extractTweetsFromHtml(html, username, sinceDate) { | ||
const $ = cheerio.load(html); | ||
@@ -102,6 +102,2 @@ const tweets = []; | ||
} | ||
// Skip if we already have this tweet | ||
if (existingTweets.has(cleanId)) { | ||
return; | ||
} | ||
const text = tweetElement.find(".tweet-content").text().trim(); | ||
@@ -177,3 +173,2 @@ // Get timestamp and full date from title attribute | ||
tweets.push(tweet); | ||
existingTweets.set(cleanId, tweet); | ||
} | ||
@@ -220,2 +215,5 @@ catch (error) { | ||
} | ||
if (response.status !== 200) { | ||
console.error(`Unexpected status code: ${response.status} for ${url}`); | ||
} | ||
const html = await response.text(); | ||
@@ -244,3 +242,2 @@ return { html, status: response.status }; | ||
let allTweets = []; | ||
const existingTweets = new Map(); | ||
while (pageNumber <= maxPages) { | ||
@@ -252,3 +249,3 @@ const { html, status } = await fetchTweetsPage(username, cursor, includeReplies); | ||
} | ||
const { tweets, nextCursor } = extractTweetsFromHtml(html, username, existingTweets, sinceDate); | ||
const { tweets, nextCursor } = extractTweetsFromHtml(html, username, sinceDate); | ||
allTweets = [...allTweets, ...tweets]; | ||
@@ -264,3 +261,10 @@ if (!nextCursor) { | ||
} | ||
return allTweets; | ||
// Deduplicate tweets by ID before returning | ||
const uniqueTweetsMap = new Map(); | ||
for (const tweet of allTweets) { | ||
if (!uniqueTweetsMap.has(tweet.id)) { | ||
uniqueTweetsMap.set(tweet.id, tweet); | ||
} | ||
} | ||
return Array.from(uniqueTweetsMap.values()); | ||
} |
{ | ||
"name": "nitter-scraper", | ||
"version": "1.0.10", | ||
"version": "1.0.11", | ||
"description": "A Twitter scraper that uses Nitter to fetch tweets without authentication", | ||
@@ -5,0 +5,0 @@ "author": "wslyvh", |
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
26084
2.15%578
2.12%