🚀 Big News: Socket Acquires Coana to Bring Reachability Analysis to Every Appsec Team.Learn more
Socket
DemoInstallSign in
Socket

webcrawlerapi-js

Package Overview
Dependencies
Maintainers
1
Versions
15
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

webcrawlerapi-js - npm Package Compare versions

Comparing version

to
1.0.8

59

api.ts

@@ -90,4 +90,8 @@ import {CrawlRequest, Job, JobId, ScrapeRequest, ScrapeResponse} from "./model";

'headers': {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
"User-Agent": "WebcrawlerAPI-NodeJS-Client"
"User-Agent": "WebcrawlerAPI-NodeJS-Client",
'Cache-Control': 'no-cache, no-store, must-revalidate',
'Pragma': 'no-cache',
'Expires': '0'
},

@@ -120,3 +124,6 @@ };

'Authorization': `Bearer ${this.apiKey}`,
"User-Agent": "WebcrawlerAPI-NodeJS-Client"
"User-Agent": "WebcrawlerAPI-NodeJS-Client",
'Cache-Control': 'no-cache, no-store, must-revalidate',
'Pragma': 'no-cache',
'Expires': '0'
},

@@ -135,4 +142,44 @@ 'body': JSON.stringify(crawlRequest),

await new Promise(resolve => setTimeout(resolve, delayIntervalMs));
const job = await this.getJob(jobIdResponse.id);
const timestamp = new Date().getTime();
const job = await this.getJob(`${jobIdResponse.id}?t=${timestamp}`);
if (job.status !== 'in_progress' && job.status !== 'new') {
// Transform each job item to include getContent method
job.job_items = job.job_items.map(item => ({
...item,
getContent: async function(): Promise<string | null> {
if (this.status !== 'done') {
return null;
}
let contentUrl: string | undefined;
switch (job.scrape_type) {
case 'html':
contentUrl = this.raw_content_url;
break;
case 'cleaned':
contentUrl = this.cleaned_content_url;
break;
case 'markdown':
contentUrl = this.markdown_content_url;
break;
}
if (!contentUrl) {
return null;
}
const response = await fetch(contentUrl, {
headers: {
'Accept-Encoding': 'gzip, deflate, br',
'Accept': '*/*'
}
});
if (!response.ok) {
throw new Error(`Failed to fetch content: ${response.statusText}`);
}
return await response.text();
}
}));
return job;

@@ -144,2 +191,3 @@ }

}
throw new Error("Crawling took too long, please retry or increase the number of polling retries");
}

@@ -169,3 +217,6 @@

'Authorization': `Bearer ${this.apiKey}`,
"User-Agent": "WebcrawlerAPI-NodeJS-Client"
"User-Agent": "WebcrawlerAPI-NodeJS-Client",
'Cache-Control': 'no-cache, no-store, must-revalidate',
'Pragma': 'no-cache',
'Expires': '0'
}

@@ -172,0 +223,0 @@ }

@@ -92,4 +92,8 @@ "use strict";

'headers': {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
"User-Agent": "WebcrawlerAPI-NodeJS-Client"
"User-Agent": "WebcrawlerAPI-NodeJS-Client",
'Cache-Control': 'no-cache, no-store, must-revalidate',
'Pragma': 'no-cache',
'Expires': '0'
},

@@ -118,3 +122,6 @@ };

'Authorization': `Bearer ${this.apiKey}`,
"User-Agent": "WebcrawlerAPI-NodeJS-Client"
"User-Agent": "WebcrawlerAPI-NodeJS-Client",
'Cache-Control': 'no-cache, no-store, must-revalidate',
'Pragma': 'no-cache',
'Expires': '0'
},

@@ -130,4 +137,38 @@ 'body': JSON.stringify(crawlRequest),

yield new Promise(resolve => setTimeout(resolve, delayIntervalMs));
const job = yield this.getJob(jobIdResponse.id);
const timestamp = new Date().getTime();
const job = yield this.getJob(`${jobIdResponse.id}?t=${timestamp}`);
if (job.status !== 'in_progress' && job.status !== 'new') {
// Transform each job item to include getContent method
job.job_items = job.job_items.map(item => (Object.assign(Object.assign({}, item), { getContent: function () {
return __awaiter(this, void 0, void 0, function* () {
if (this.status !== 'done') {
return null;
}
let contentUrl;
switch (job.scrape_type) {
case 'html':
contentUrl = this.raw_content_url;
break;
case 'cleaned':
contentUrl = this.cleaned_content_url;
break;
case 'markdown':
contentUrl = this.markdown_content_url;
break;
}
if (!contentUrl) {
return null;
}
const response = yield fetch(contentUrl, {
headers: {
'Accept-Encoding': 'gzip, deflate, br',
'Accept': '*/*'
}
});
if (!response.ok) {
throw new Error(`Failed to fetch content: ${response.statusText}`);
}
return yield response.text();
});
} })));
return job;

@@ -139,2 +180,3 @@ }

}
throw new Error("Crawling took too long, please retry or increase the number of polling retries");
});

@@ -164,3 +206,6 @@ }

'Authorization': `Bearer ${this.apiKey}`,
"User-Agent": "WebcrawlerAPI-NodeJS-Client"
"User-Agent": "WebcrawlerAPI-NodeJS-Client",
'Cache-Control': 'no-cache, no-store, must-revalidate',
'Pragma': 'no-cache',
'Expires': '0'
}

@@ -167,0 +212,0 @@ };

@@ -59,2 +59,5 @@ export interface ScrapeRequest {

referred_url: string;
raw_content_url?: string;
cleaned_content_url?: string;
getContent(): Promise<string | null>;
}

@@ -64,2 +64,5 @@ export interface ScrapeRequest {

referred_url: string;
raw_content_url?: string;
cleaned_content_url?: string;
getContent(): Promise<string | null>;
}

2

package.json
{
"name": "webcrawlerapi-js",
"version": "1.0.7",
"version": "1.0.8",
"description": "JS client for WecrawlerAPI",

@@ -5,0 +5,0 @@ "main": "./dist/index.js",

@@ -32,2 +32,7 @@ # JS client for WebcrawlerAPI

)
for (const item of syncJob.job_items) {
item.getContent().then((content) => {
console.log(content.slice(0, 100));
})
}
console.log(syncJob);

@@ -34,0 +39,0 @@