nitter-scraper
Advanced tools
Comparing version
@@ -11,3 +11,5 @@ #!/usr/bin/env node | ||
// Fetch tweets | ||
const tweets = await (0, scraper_1.fetchTweets)(username, 1, true); | ||
const lastWeek = new Date(); | ||
lastWeek.setDate(lastWeek.getDate() - 7); | ||
const tweets = await (0, scraper_1.fetchTweets)(username, lastWeek, 3, true); | ||
// Save tweets to file | ||
@@ -14,0 +16,0 @@ (0, storage_1.saveTweets)(tweets); |
@@ -5,5 +5,7 @@ import type { Tweet } from "./types/Tweet"; | ||
* @param username Twitter username to scrape (without @) | ||
* @param sinceDate Optional date to start fetching tweets from (default: null) | ||
* @param maxPages Maximum number of pages to fetch (default: 3) | ||
* @param includeReplies Whether to include replies (default: false) | ||
* @returns Promise containing an array of tweets | ||
*/ | ||
export declare function fetchTweets(username: string, maxPages?: number, includeReplies?: boolean): Promise<Tweet[]>; | ||
export declare function fetchTweets(username: string, sinceDate?: Date | null, maxPages?: number, includeReplies?: boolean): Promise<Tweet[]>; |
@@ -46,3 +46,3 @@ "use strict"; | ||
*/ | ||
function extractTweetsFromHtml(html, username, existingTweets) { | ||
function extractTweetsFromHtml(html, username, existingTweets, sinceDate) { | ||
const $ = cheerio.load(html); | ||
@@ -155,2 +155,9 @@ const tweets = []; | ||
}); | ||
if (sinceDate) { | ||
const filtered = tweets.filter((t) => t.timestamp && t.timestamp * 1000 >= sinceDate.getTime()); | ||
// If any tweet was filtered out, it means we've passed sinceDate, so stop pagination | ||
if (filtered.length < tweets.length) { | ||
return { tweets: filtered, nextCursor: null }; | ||
} | ||
} | ||
return { tweets, nextCursor }; | ||
@@ -180,3 +187,3 @@ } | ||
await new Promise((resolve) => setTimeout(resolve, 30000)); | ||
return fetchTweetsPage(username, cursor, pageNumber); | ||
return fetchTweetsPage(username, cursor, pageNumber, includeReplies); | ||
} | ||
@@ -194,6 +201,8 @@ const html = await response.text(); | ||
* @param username Twitter username to scrape (without @) | ||
* @param sinceDate Optional date to start fetching tweets from (default: null) | ||
* @param maxPages Maximum number of pages to fetch (default: 3) | ||
* @param includeReplies Whether to include replies (default: false) | ||
* @returns Promise containing an array of tweets | ||
*/ | ||
async function fetchTweets(username, maxPages = 3, includeReplies = false) { | ||
async function fetchTweets(username, sinceDate = null, maxPages = 3, includeReplies = false) { | ||
let cursor = null; | ||
@@ -209,3 +218,3 @@ let pageNumber = 1; | ||
} | ||
const { tweets, nextCursor } = extractTweetsFromHtml(html, username, existingTweets); | ||
const { tweets, nextCursor } = extractTweetsFromHtml(html, username, existingTweets, sinceDate); | ||
allTweets = [...allTweets, ...tweets]; | ||
@@ -212,0 +221,0 @@ if (!nextCursor) { |
{ | ||
"name": "nitter-scraper", | ||
"version": "1.0.5", | ||
"version": "1.0.6", | ||
"description": "A Twitter scraper that uses Nitter to fetch tweets without authentication", | ||
@@ -5,0 +5,0 @@ "author": "wslyvh", |
@@ -52,3 +52,3 @@ # Nitter Scraper | ||
```typescript | ||
fetchTweets(username: string, maxPages?: number): Promise<Tweet[]> | ||
fetchTweets(username: string, sinceDate?: Date, maxPages?: number, includeReplies?: boolean): Promise<Tweet[]> | ||
``` | ||
@@ -59,3 +59,5 @@ | ||
- `username`: Twitter username to scrape (without @) | ||
- `sinceDate`: Optional date to start fetching tweets from (default: null) | ||
- `maxPages`: Maximum number of pages to fetch (default: 3) | ||
- `includeReplies`: Whether to include replies (default: false) | ||
@@ -62,0 +64,0 @@ ### Tweet Type |
23373
4.47%509
2.62%85
2.41%