@hyperbrowser/sdk - npm Package Compare versions

Comparing version 0.24.0 to 0.25.0

dist/services/scrape.d.ts

		@@ -1,4 +0,25 @@
		import { ScrapeJobResponse, StartScrapeJobParams, StartScrapeJobResponse } from "../types/scrape";
		import { BatchScrapeJobResponse, GetBatchScrapeJobParams, ScrapeJobResponse, StartBatchScrapeJobParams, StartBatchScrapeJobResponse, StartScrapeJobParams, StartScrapeJobResponse } from "../types/scrape";
		import { BaseService } from "./base";
		export declare class BatchScrapeService extends BaseService {
		/**
		* Start a new batch scrape job
		* @param params The parameters for the batch scrape job
		*/
		start(params: StartBatchScrapeJobParams): Promise<StartBatchScrapeJobResponse>;
		/**
		* Get the status of a batch scrape job
		* @param id The ID of the batch scrape job to get
		* @param params Optional parameters to filter the batch scrape job
		*/
		get(id: string, params?: GetBatchScrapeJobParams): Promise<BatchScrapeJobResponse>;
		/**
		* Start a batch scrape job and wait for it to complete
		* @param params The parameters for the batch scrape job
		* @param returnAllPages Whether to return all pages in the batch scrape job response
		*/
		startAndWait(params: StartBatchScrapeJobParams, returnAllPages?: boolean): Promise<BatchScrapeJobResponse>;
		}
		export declare class ScrapeService extends BaseService {
		readonly batch: BatchScrapeService;
		constructor(apiKey: string, baseUrl: string, timeout: number);
		/**
		@@ -5,0 +26,0 @@ * Start a new scrape job

103

dist/services/scrape.js

		"use strict";
		Object.defineProperty(exports, "__esModule", { value: true });
		exports.ScrapeService = void 0;
		exports.ScrapeService = exports.BatchScrapeService = void 0;
		const base_1 = require("./base");
		const utils_1 = require("../utils");
		const client_1 = require("../client");
		class BatchScrapeService extends base_1.BaseService {
		/**
		* Start a new batch scrape job
		* @param params The parameters for the batch scrape job
		*/
		async start(params) {
		try {
		return await this.request("/scrape/batch", {
		method: "POST",
		body: JSON.stringify(params),
		});
		}
		catch (error) {
		if (error instanceof client_1.HyperbrowserError) {
		throw error;
		}
		throw new client_1.HyperbrowserError("Failed to start batch scrape job", undefined);
		}
		}
		/**
		* Get the status of a batch scrape job
		* @param id The ID of the batch scrape job to get
		* @param params Optional parameters to filter the batch scrape job
		*/
		async get(id, params) {
		try {
		return await this.request(`/scrape/batch/${id}`, undefined, {
		page: params?.page,
		});
		}
		catch (error) {
		if (error instanceof client_1.HyperbrowserError) {
		throw error;
		}
		throw new client_1.HyperbrowserError(`Failed to get batch scrape job ${id}`, undefined);
		}
		}
		/**
		* Start a batch scrape job and wait for it to complete
		* @param params The parameters for the batch scrape job
		* @param returnAllPages Whether to return all pages in the batch scrape job response
		*/
		async startAndWait(params, returnAllPages = true) {
		const job = await this.start(params);
		const jobId = job.jobId;
		if (!jobId) {
		throw new client_1.HyperbrowserError("Failed to start batch scrape job, could not get job ID");
		}
		let jobResponse;
		let failures = 0;
		while (true) {
		try {
		jobResponse = await this.get(jobId);
		if (jobResponse.status === "completed" \|\| jobResponse.status === "failed") {
		break;
		}
		failures = 0;
		}
		catch (error) {
		failures++;
		if (failures >= 5) {
		throw new client_1.HyperbrowserError(`Failed to poll batch scrape job ${jobId} after 5 attempts: ${error}`);
		}
		}
		await (0, utils_1.sleep)(2000);
		}
		if (!returnAllPages) {
		return jobResponse;
		}
		failures = 0;
		while (jobResponse.currentPageBatch < jobResponse.totalPageBatches) {
		try {
		const tmpJobResponse = await this.get(jobId, {
		page: jobResponse.currentPageBatch + 1,
		batchSize: 100,
		});
		if (tmpJobResponse.data) {
		jobResponse.data?.push(...tmpJobResponse.data);
		}
		jobResponse.currentPageBatch = tmpJobResponse.currentPageBatch;
		jobResponse.totalScrapedPages = tmpJobResponse.totalScrapedPages;
		jobResponse.totalPageBatches = tmpJobResponse.totalPageBatches;
		jobResponse.batchSize = tmpJobResponse.batchSize;
		failures = 0;
		}
		catch (error) {
		failures++;
		if (failures >= 5) {
		throw new client_1.HyperbrowserError(`Failed to get batch page ${jobResponse.currentPageBatch + 1} for job ${jobId} after 5 attempts: ${error}`);
		}
		}
		await (0, utils_1.sleep)(500);
		}
		return jobResponse;
		}
		}
		exports.BatchScrapeService = BatchScrapeService;
		class ScrapeService extends base_1.BaseService {
		constructor(apiKey, baseUrl, timeout) {
		super(apiKey, baseUrl, timeout);
		this.batch = new BatchScrapeService(apiKey, baseUrl, timeout);
		}
		/**
		@@ -9,0 +110,0 @@ * Start a new scrape job

dist/types/constants.d.ts

		@@ -5,2 +5,3 @@ export type ScrapeFormat = "markdown" \| "html" \| "links" \| "screenshot";
		export type CrawlJobStatus = "pending" \| "running" \| "completed" \| "failed";
		export type ScrapePageStatus = "completed" \| "failed";
		export type CrawlPageStatus = "completed" \| "failed";
		@@ -7,0 +8,0 @@ export type ScrapeWaitUntil = "load" \| "domcontentloaded" \| "networkidle";

dist/types/index.d.ts

		export { HyperbrowserConfig } from "./config";
		export { StartCrawlJobParams, StartCrawlJobResponse, CrawledPage, CrawlJobResponse, GetCrawlJobParams, } from "./crawl";
		export { StartScrapeJobParams, StartScrapeJobResponse, ScrapeJobData, ScrapeJobResponse, } from "./scrape";
		export { StartScrapeJobParams, StartScrapeJobResponse, ScrapeJobData, ScrapeJobResponse, ScrapeOptions, } from "./scrape";
		export { BasicResponse, SessionStatus, Session, SessionDetail, SessionListParams, SessionListResponse, ScreenConfig, CreateSessionParams, } from "./session";
		export { ProfileResponse, CreateProfileResponse, ProfileListParams, ProfileListResponse, } from "./profile";
		export { CreateExtensionParams, CreateExtensionResponse, ListExtensionsResponse, } from "./extension";
		export { ScrapeJobStatus, CrawlJobStatus, Country, ISO639_1, OperatingSystem, Platform, ScrapeFormat, ScrapeWaitUntil, } from "./constants";
		export { ScrapeJobStatus, CrawlJobStatus, Country, ISO639_1, OperatingSystem, Platform, ScrapeFormat, ScrapeWaitUntil, ScrapePageStatus, CrawlPageStatus, } from "./constants";

dist/types/scrape.d.ts

		@@ -1,2 +0,2 @@
		import { ScrapeFormat, ScrapeJobStatus, ScrapeWaitUntil } from "./constants";
		import { ScrapeFormat, ScrapeJobStatus, ScrapePageStatus, ScrapeWaitUntil } from "./constants";
		import { CreateSessionParams } from "./session";
		@@ -33,1 +33,33 @@ export interface ScrapeOptions {
		}
		export interface StartBatchScrapeJobParams {
		urls: string[];
		sessionOptions?: CreateSessionParams;
		scrapeOptions?: ScrapeOptions;
		}
		export interface ScrapedPage {
		url: string;
		status: ScrapePageStatus;
		error?: string \| null;
		metadata?: Record<string, string \| string[]>;
		markdown?: string;
		html?: string;
		links?: string[];
		screenshot?: string;
		}
		export interface GetBatchScrapeJobParams {
		page?: number;
		batchSize?: number;
		}
		export interface StartBatchScrapeJobResponse {
		jobId: string;
		}
		export interface BatchScrapeJobResponse {
		jobId: string;
		status: ScrapeJobStatus;
		data?: ScrapedPage[];
		error?: string;
		totalScrapedPages: number;
		totalPageBatches: number;
		currentPageBatch: number;
		batchSize: number;
		}

package.json

		{
		"name": "@hyperbrowser/sdk",
		"version": "0.24.0",
		"version": "0.25.0",
		"description": "Node SDK for Hyperbrowser API",
		@@ -5,0 +5,0 @@ "author": "",

		@@ -5,2 +5,3 @@ export type ScrapeFormat = "markdown" \| "html" \| "links" \| "screenshot";
		export type CrawlJobStatus = "pending" \| "running" \| "completed" \| "failed";
		export type ScrapePageStatus = "completed" \| "failed";
		export type CrawlPageStatus = "completed" \| "failed";
		@@ -7,0 +8,0 @@ export type ScrapeWaitUntil = "load" \| "domcontentloaded" \| "networkidle";

@hyperbrowser/sdk - npm Package Compare versions

Improved metrics