@hyperbrowser/sdk
Advanced tools
Comparing version 0.24.0 to 0.25.0
@@ -1,4 +0,25 @@ | ||
import { ScrapeJobResponse, StartScrapeJobParams, StartScrapeJobResponse } from "../types/scrape"; | ||
import { BatchScrapeJobResponse, GetBatchScrapeJobParams, ScrapeJobResponse, StartBatchScrapeJobParams, StartBatchScrapeJobResponse, StartScrapeJobParams, StartScrapeJobResponse } from "../types/scrape"; | ||
import { BaseService } from "./base"; | ||
export declare class BatchScrapeService extends BaseService { | ||
/** | ||
* Start a new batch scrape job | ||
* @param params The parameters for the batch scrape job | ||
*/ | ||
start(params: StartBatchScrapeJobParams): Promise<StartBatchScrapeJobResponse>; | ||
/** | ||
* Get the status of a batch scrape job | ||
* @param id The ID of the batch scrape job to get | ||
* @param params Optional parameters to filter the batch scrape job | ||
*/ | ||
get(id: string, params?: GetBatchScrapeJobParams): Promise<BatchScrapeJobResponse>; | ||
/** | ||
* Start a batch scrape job and wait for it to complete | ||
* @param params The parameters for the batch scrape job | ||
* @param returnAllPages Whether to return all pages in the batch scrape job response | ||
*/ | ||
startAndWait(params: StartBatchScrapeJobParams, returnAllPages?: boolean): Promise<BatchScrapeJobResponse>; | ||
} | ||
export declare class ScrapeService extends BaseService { | ||
readonly batch: BatchScrapeService; | ||
constructor(apiKey: string, baseUrl: string, timeout: number); | ||
/** | ||
@@ -5,0 +26,0 @@ * Start a new scrape job |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.ScrapeService = void 0; | ||
exports.ScrapeService = exports.BatchScrapeService = void 0; | ||
const base_1 = require("./base"); | ||
const utils_1 = require("../utils"); | ||
const client_1 = require("../client"); | ||
class BatchScrapeService extends base_1.BaseService { | ||
/** | ||
* Start a new batch scrape job | ||
* @param params The parameters for the batch scrape job | ||
*/ | ||
async start(params) { | ||
try { | ||
return await this.request("/scrape/batch", { | ||
method: "POST", | ||
body: JSON.stringify(params), | ||
}); | ||
} | ||
catch (error) { | ||
if (error instanceof client_1.HyperbrowserError) { | ||
throw error; | ||
} | ||
throw new client_1.HyperbrowserError("Failed to start batch scrape job", undefined); | ||
} | ||
} | ||
/** | ||
* Get the status of a batch scrape job | ||
* @param id The ID of the batch scrape job to get | ||
* @param params Optional parameters to filter the batch scrape job | ||
*/ | ||
async get(id, params) { | ||
try { | ||
return await this.request(`/scrape/batch/${id}`, undefined, { | ||
page: params?.page, | ||
}); | ||
} | ||
catch (error) { | ||
if (error instanceof client_1.HyperbrowserError) { | ||
throw error; | ||
} | ||
throw new client_1.HyperbrowserError(`Failed to get batch scrape job ${id}`, undefined); | ||
} | ||
} | ||
/** | ||
* Start a batch scrape job and wait for it to complete | ||
* @param params The parameters for the batch scrape job | ||
* @param returnAllPages Whether to return all pages in the batch scrape job response | ||
*/ | ||
async startAndWait(params, returnAllPages = true) { | ||
const job = await this.start(params); | ||
const jobId = job.jobId; | ||
if (!jobId) { | ||
throw new client_1.HyperbrowserError("Failed to start batch scrape job, could not get job ID"); | ||
} | ||
let jobResponse; | ||
let failures = 0; | ||
while (true) { | ||
try { | ||
jobResponse = await this.get(jobId); | ||
if (jobResponse.status === "completed" || jobResponse.status === "failed") { | ||
break; | ||
} | ||
failures = 0; | ||
} | ||
catch (error) { | ||
failures++; | ||
if (failures >= 5) { | ||
throw new client_1.HyperbrowserError(`Failed to poll batch scrape job ${jobId} after 5 attempts: ${error}`); | ||
} | ||
} | ||
await (0, utils_1.sleep)(2000); | ||
} | ||
if (!returnAllPages) { | ||
return jobResponse; | ||
} | ||
failures = 0; | ||
while (jobResponse.currentPageBatch < jobResponse.totalPageBatches) { | ||
try { | ||
const tmpJobResponse = await this.get(jobId, { | ||
page: jobResponse.currentPageBatch + 1, | ||
batchSize: 100, | ||
}); | ||
if (tmpJobResponse.data) { | ||
jobResponse.data?.push(...tmpJobResponse.data); | ||
} | ||
jobResponse.currentPageBatch = tmpJobResponse.currentPageBatch; | ||
jobResponse.totalScrapedPages = tmpJobResponse.totalScrapedPages; | ||
jobResponse.totalPageBatches = tmpJobResponse.totalPageBatches; | ||
jobResponse.batchSize = tmpJobResponse.batchSize; | ||
failures = 0; | ||
} | ||
catch (error) { | ||
failures++; | ||
if (failures >= 5) { | ||
throw new client_1.HyperbrowserError(`Failed to get batch page ${jobResponse.currentPageBatch + 1} for job ${jobId} after 5 attempts: ${error}`); | ||
} | ||
} | ||
await (0, utils_1.sleep)(500); | ||
} | ||
return jobResponse; | ||
} | ||
} | ||
exports.BatchScrapeService = BatchScrapeService; | ||
class ScrapeService extends base_1.BaseService { | ||
constructor(apiKey, baseUrl, timeout) { | ||
super(apiKey, baseUrl, timeout); | ||
this.batch = new BatchScrapeService(apiKey, baseUrl, timeout); | ||
} | ||
/** | ||
@@ -9,0 +110,0 @@ * Start a new scrape job |
@@ -5,2 +5,3 @@ export type ScrapeFormat = "markdown" | "html" | "links" | "screenshot"; | ||
export type CrawlJobStatus = "pending" | "running" | "completed" | "failed"; | ||
export type ScrapePageStatus = "completed" | "failed"; | ||
export type CrawlPageStatus = "completed" | "failed"; | ||
@@ -7,0 +8,0 @@ export type ScrapeWaitUntil = "load" | "domcontentloaded" | "networkidle"; |
export { HyperbrowserConfig } from "./config"; | ||
export { StartCrawlJobParams, StartCrawlJobResponse, CrawledPage, CrawlJobResponse, GetCrawlJobParams, } from "./crawl"; | ||
export { StartScrapeJobParams, StartScrapeJobResponse, ScrapeJobData, ScrapeJobResponse, } from "./scrape"; | ||
export { StartScrapeJobParams, StartScrapeJobResponse, ScrapeJobData, ScrapeJobResponse, ScrapeOptions, } from "./scrape"; | ||
export { BasicResponse, SessionStatus, Session, SessionDetail, SessionListParams, SessionListResponse, ScreenConfig, CreateSessionParams, } from "./session"; | ||
export { ProfileResponse, CreateProfileResponse, ProfileListParams, ProfileListResponse, } from "./profile"; | ||
export { CreateExtensionParams, CreateExtensionResponse, ListExtensionsResponse, } from "./extension"; | ||
export { ScrapeJobStatus, CrawlJobStatus, Country, ISO639_1, OperatingSystem, Platform, ScrapeFormat, ScrapeWaitUntil, } from "./constants"; | ||
export { ScrapeJobStatus, CrawlJobStatus, Country, ISO639_1, OperatingSystem, Platform, ScrapeFormat, ScrapeWaitUntil, ScrapePageStatus, CrawlPageStatus, } from "./constants"; |
@@ -1,2 +0,2 @@ | ||
import { ScrapeFormat, ScrapeJobStatus, ScrapeWaitUntil } from "./constants"; | ||
import { ScrapeFormat, ScrapeJobStatus, ScrapePageStatus, ScrapeWaitUntil } from "./constants"; | ||
import { CreateSessionParams } from "./session"; | ||
@@ -33,1 +33,33 @@ export interface ScrapeOptions { | ||
} | ||
export interface StartBatchScrapeJobParams { | ||
urls: string[]; | ||
sessionOptions?: CreateSessionParams; | ||
scrapeOptions?: ScrapeOptions; | ||
} | ||
export interface ScrapedPage { | ||
url: string; | ||
status: ScrapePageStatus; | ||
error?: string | null; | ||
metadata?: Record<string, string | string[]>; | ||
markdown?: string; | ||
html?: string; | ||
links?: string[]; | ||
screenshot?: string; | ||
} | ||
export interface GetBatchScrapeJobParams { | ||
page?: number; | ||
batchSize?: number; | ||
} | ||
export interface StartBatchScrapeJobResponse { | ||
jobId: string; | ||
} | ||
export interface BatchScrapeJobResponse { | ||
jobId: string; | ||
status: ScrapeJobStatus; | ||
data?: ScrapedPage[]; | ||
error?: string; | ||
totalScrapedPages: number; | ||
totalPageBatches: number; | ||
currentPageBatch: number; | ||
batchSize: number; | ||
} |
{ | ||
"name": "@hyperbrowser/sdk", | ||
"version": "0.24.0", | ||
"version": "0.25.0", | ||
"description": "Node SDK for Hyperbrowser API", | ||
@@ -5,0 +5,0 @@ "author": "", |
59198
1503