@@ -25,3 +25,4 @@ import {CrawlRequest, Job, JobId, ScrapeRequest, ScrapeResponse} from "./model";
		'Content-Type': 'application/json',
		'Authorization': `Bearer ${this.apiKey}`
		'Authorization': `Bearer ${this.apiKey}`,
		"User-Agent": "WebcrawlerAPI-NodeJS-Client"
		},
		@@ -45,3 +46,3 @@ 'body': JSON.stringify(scrapeRequest),

		public async scrapeWithMeta(scrapeRequest: ScrapeRequest): Promise<ScrapeResponse> {
		public async scrapeWithMeta(scrapeRequest: ScrapeRequest, maxPollingRetries: number = MaxPullRetries): Promise<ScrapeResponse> {
		const url = `${this.basePath}/${this.apiVersion}/scrape`;
		@@ -65,3 +66,3 @@
		let delayIntervalMs = initialPullDelayMs;
		for (let i = 0; i < MaxPullRetries; i++) {
		for (let i = 0; i < maxPollingRetries; i++) {
		await new Promise(resolve => setTimeout(resolve, delayIntervalMs));
		@@ -79,7 +80,7 @@ const scrapeResult = await this.getScrapeResult(jobIdResponse.id);
		}
		throw new Error("Scraping took too long, please retry");
		throw new Error("Scraping took too long, please retry or increase the number of polling retries");
		}

		public async scrape(scrapeRequest: ScrapeRequest): Promise<ScrapeResponse> {
		const scrapeResult = await this.scrapeWithMeta(scrapeRequest);
		public async scrape(scrapeRequest: ScrapeRequest, maxPollingRetries: number = MaxPullRetries): Promise<any> {
		const scrapeResult = await this.scrapeWithMeta(scrapeRequest, maxPollingRetries);
		return scrapeResult.structured_data;
		@@ -94,2 +95,3 @@ }
		'Authorization': `Bearer ${this.apiKey}`,
		"User-Agent": "WebcrawlerAPI-NodeJS-Client"
		},
		@@ -121,3 +123,4 @@ };
		'Content-Type': 'application/json',
		'Authorization': `Bearer ${this.apiKey}`
		'Authorization': `Bearer ${this.apiKey}`,
		"User-Agent": "WebcrawlerAPI-NodeJS-Client"
		},
		@@ -167,3 +170,4 @@ 'body': JSON.stringify(crawlRequest),
		'Content-Type': 'application/json',
		'Authorization': `Bearer ${this.apiKey}`
		'Authorization': `Bearer ${this.apiKey}`,
		"User-Agent": "WebcrawlerAPI-NodeJS-Client"
		}
		@@ -170,0 +174,0 @@ }

dist/api.js

		@@ -152,3 +152,2 @@ "use strict";
		const url = `${this.basePath}/${this.apiVersion}/job/${jobID}`;
		console.log(url);
		const requestOptions = {
		@@ -155,0 +154,0 @@ 'method': 'GET',

package.json

		{
		"name": "webcrawlerapi-js",
		"version": "1.0.4",
		"version": "1.0.5",
		"description": "JS client for WecrawlerAPI",
		@@ -20,3 +20,3 @@ "main": "./dist/index.js",
		},
		"author": "Andrew <support@webcrawlerapi.com>",
		"author": "Andrew <sdk@webcrawlerapi.com>",
		"license": "MIT",
		@@ -23,0 +23,0 @@ "dependencies": {},

README.md

		@@ -1,5 +0,11 @@
		# JS client for WebcrawlerAPI scrapers
		# JS client for WebcrawlerAPI

		Official client for [WebcrawlerAPI](https://webcrawlerapi.com/) scrapers.
		Official client for [WebcrawlerAPI](https://webcrawlerapi.com/).

		WebcrawlerAPI allows you to extract data from any website with just a simple API call.

		## Preparation
		1. Register to [dashboard\|https://dash.webcrawlerapi.com/].
		2. Get an [Access Key](https://dash.webcrawlerapi.com/access).

		## Installation
		@@ -10,22 +16,33 @@ Install WebcrawlerAPI js package:

		## Preparation
		1. Register to [dashboard\|https://dash.webcrawlerapi.com/].
		2. Get an [Access Key](https://dash.webcrawlerapi.com/access).

		## Request example

		```javascript
		const webcrawlerapi = require('webcrawlerapi-js');
		import webcrawlerapi from "webcrawlerapi-js";

		async function main() {
		const client = new webcrawlerapi.WebcrawlerClient(
		"YOUR API KEY HERE"
		"YOUR API ACCESS KEY HERE",
		)
		const response = await client.scrape({
		input: {
		"url": "https://www.funda.nl/detail/koop/heerhugowaard/huis-govert-flinckplantsoen-1/89968455/"
		},
		crawler_id: "webcrawler/funda",
		})
		console.log(response)
		// sync way - promise will be resolved with the all the data
		const syncJob = await client.crawl({
		"items_limit": 10,
		"url": "https://stripe.com/",
		"scrape_type": "markdown"
		}
		)
		console.log(syncJob);

		// or async - get the job id and then poll the job status and get the data

		const jobWithId = await client.crawlAsync({
		"items_limit": 10,
		"url": "https://stripe.com/",
		"scrape_type": "markdown"
		}
		)
		// wait for job to complete
		const jobId = jobWithId.id;
		let asyncJob = await client.getJob(jobId);

		console.log(asyncJob);
		}
		@@ -37,26 +54,34 @@
		## Response example
		```json
		```javascript
		{
		city: 'Heerhugowaard',
		price: 325000,
		images: [ 'https://cloud.funda.nl/valentina_media/191/215/183_2160.jpg' ],
		status: 'inonderhandeling',
		videos: [],
		address: 'Govert Flinckplantsoen 1',
		country: 'Nederland',
		province: 'Noord-Holland',
		plot_area: '183 m²',
		post_code: '1701NH',
		description: 'De woning is met liefde en zorg 53 jaar bewoond door...',
		living_area: 127,
		house_number: 1,
		energie_label: 'd',
		property_type: 'woonhuis',
		publication_date: '2024-05-28T00:00:00',
		number_of_bedrooms: 4,
		coordinates_latitude: 52.67685,
		year_of_construction: 1971,
		coordinates_longitude: 4.8560443,
		house_number_extension: ''
		id: '49c4942b-b7d9-4d62-94b5-b54a3016ac51',
		org_id: 'clxsnorta00075wuuqxgzzvxm',
		url: 'https://stripe.com/',
		scrape_type: 'markdown',
		whitelist_regexp: '',
		blacklist_regexp: '',
		allow_subdomains: false,
		items_limit: 10,
		created_at: '2024-12-28T21:36:04.417Z',
		finished_at: null,
		updated_at: '2024-12-28T21:36:04.383Z',
		webhook_url: '',
		status: 'in_progress',
		job_items: [
		{
		id: 'f26cefe1-09d1-4d4c-8b74-b65e075e230d',
		job_id: '49c4942a-b7d9-4d62-94b5-b54a3016ac51',
		original_url: 'https://stripe.com/',
		page_status_code: 0,
		status: 'new',
		title: '',
		last_error: '',
		created_at: '2024-12-28T21:36:04.468Z',
		updated_at: '2024-12-28T21:36:04.435Z',
		cost: 0,
		referred_url: ''
		}
		],
		recommended_pull_delay_ms: 5000
		}
		```

webcrawlerapi-js - npm Package Compare versions

Improved metrics