🚀 Big News: Socket Acquires Coana to Bring Reachability Analysis to Every Appsec Team.Learn more
Socket
DemoInstallSign in
Socket

webcrawlerapi-js

Package Overview
Dependencies
Maintainers
1
Versions
19
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

webcrawlerapi-js - npm Package Compare versions

Comparing version

to
1.0.5

20

api.ts

@@ -25,3 +25,4 @@ import {CrawlRequest, Job, JobId, ScrapeRequest, ScrapeResponse} from "./model";

'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`
'Authorization': `Bearer ${this.apiKey}`,
"User-Agent": "WebcrawlerAPI-NodeJS-Client"
},

@@ -45,3 +46,3 @@ 'body': JSON.stringify(scrapeRequest),

public async scrapeWithMeta(scrapeRequest: ScrapeRequest): Promise<ScrapeResponse> {
public async scrapeWithMeta(scrapeRequest: ScrapeRequest, maxPollingRetries: number = MaxPullRetries): Promise<ScrapeResponse> {
const url = `${this.basePath}/${this.apiVersion}/scrape`;

@@ -65,3 +66,3 @@

let delayIntervalMs = initialPullDelayMs;
for (let i = 0; i < MaxPullRetries; i++) {
for (let i = 0; i < maxPollingRetries; i++) {
await new Promise(resolve => setTimeout(resolve, delayIntervalMs));

@@ -79,7 +80,7 @@ const scrapeResult = await this.getScrapeResult(jobIdResponse.id);

}
throw new Error("Scraping took too long, please retry");
throw new Error("Scraping took too long, please retry or increase the number of polling retries");
}
public async scrape(scrapeRequest: ScrapeRequest): Promise<ScrapeResponse> {
const scrapeResult = await this.scrapeWithMeta(scrapeRequest);
public async scrape(scrapeRequest: ScrapeRequest, maxPollingRetries: number = MaxPullRetries): Promise<any> {
const scrapeResult = await this.scrapeWithMeta(scrapeRequest, maxPollingRetries);
return scrapeResult.structured_data;

@@ -94,2 +95,3 @@ }

'Authorization': `Bearer ${this.apiKey}`,
"User-Agent": "WebcrawlerAPI-NodeJS-Client"
},

@@ -121,3 +123,4 @@ };

'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`
'Authorization': `Bearer ${this.apiKey}`,
"User-Agent": "WebcrawlerAPI-NodeJS-Client"
},

@@ -167,3 +170,4 @@ 'body': JSON.stringify(crawlRequest),

'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`
'Authorization': `Bearer ${this.apiKey}`,
"User-Agent": "WebcrawlerAPI-NodeJS-Client"
}

@@ -170,0 +174,0 @@ }

1

dist/api.js

@@ -152,3 +152,2 @@ "use strict";

const url = `${this.basePath}/${this.apiVersion}/job/${jobID}`;
console.log(url);
const requestOptions = {

@@ -155,0 +154,0 @@ 'method': 'GET',

{
"name": "webcrawlerapi-js",
"version": "1.0.4",
"version": "1.0.5",
"description": "JS client for WecrawlerAPI",

@@ -20,3 +20,3 @@ "main": "./dist/index.js",

},
"author": "Andrew <support@webcrawlerapi.com>",
"author": "Andrew <sdk@webcrawlerapi.com>",
"license": "MIT",

@@ -23,0 +23,0 @@ "dependencies": {},

@@ -1,5 +0,11 @@

# JS client for WebcrawlerAPI scrapers
# JS client for WebcrawlerAPI
Official client for [WebcrawlerAPI](https://webcrawlerapi.com/) scrapers.
Official client for [WebcrawlerAPI](https://webcrawlerapi.com/).
WebcrawlerAPI allows you to extract data from any website with just a simple API call.
## Preparation
1. Register to [dashboard|https://dash.webcrawlerapi.com/].
2. Get an [Access Key](https://dash.webcrawlerapi.com/access).
## Installation

@@ -10,22 +16,33 @@ Install WebcrawlerAPI js package:

## Preparation
1. Register to [dashboard|https://dash.webcrawlerapi.com/].
2. Get an [Access Key](https://dash.webcrawlerapi.com/access).
## Request example
```javascript
const webcrawlerapi = require('webcrawlerapi-js');
import webcrawlerapi from "webcrawlerapi-js";
async function main() {
const client = new webcrawlerapi.WebcrawlerClient(
"YOUR API KEY HERE"
"YOUR API ACCESS KEY HERE",
)
const response = await client.scrape({
input: {
"url": "https://www.funda.nl/detail/koop/heerhugowaard/huis-govert-flinckplantsoen-1/89968455/"
},
crawler_id: "webcrawler/funda",
})
console.log(response)
// sync way - promise will be resolved with the all the data
const syncJob = await client.crawl({
"items_limit": 10,
"url": "https://stripe.com/",
"scrape_type": "markdown"
}
)
console.log(syncJob);
// or async - get the job id and then poll the job status and get the data
const jobWithId = await client.crawlAsync({
"items_limit": 10,
"url": "https://stripe.com/",
"scrape_type": "markdown"
}
)
// wait for job to complete
const jobId = jobWithId.id;
let asyncJob = await client.getJob(jobId);
console.log(asyncJob);
}

@@ -37,26 +54,34 @@

## Response example
```json
```javascript
{
city: 'Heerhugowaard',
price: 325000,
images: [ 'https://cloud.funda.nl/valentina_media/191/215/183_2160.jpg' ],
status: 'inonderhandeling',
videos: [],
address: 'Govert Flinckplantsoen 1',
country: 'Nederland',
province: 'Noord-Holland',
plot_area: '183 m²',
post_code: '1701NH',
description: 'De woning is met liefde en zorg 53 jaar bewoond door...',
living_area: 127,
house_number: 1,
energie_label: 'd',
property_type: 'woonhuis',
publication_date: '2024-05-28T00:00:00',
number_of_bedrooms: 4,
coordinates_latitude: 52.67685,
year_of_construction: 1971,
coordinates_longitude: 4.8560443,
house_number_extension: ''
id: '49c4942b-b7d9-4d62-94b5-b54a3016ac51',
org_id: 'clxsnorta00075wuuqxgzzvxm',
url: 'https://stripe.com/',
scrape_type: 'markdown',
whitelist_regexp: '',
blacklist_regexp: '',
allow_subdomains: false,
items_limit: 10,
created_at: '2024-12-28T21:36:04.417Z',
finished_at: null,
updated_at: '2024-12-28T21:36:04.383Z',
webhook_url: '',
status: 'in_progress',
job_items: [
{
id: 'f26cefe1-09d1-4d4c-8b74-b65e075e230d',
job_id: '49c4942a-b7d9-4d62-94b5-b54a3016ac51',
original_url: 'https://stripe.com/',
page_status_code: 0,
status: 'new',
title: '',
last_error: '',
created_at: '2024-12-28T21:36:04.468Z',
updated_at: '2024-12-28T21:36:04.435Z',
cost: 0,
referred_url: ''
}
],
recommended_pull_delay_ms: 5000
}
```