🚀 Big News: Socket Acquires Coana to Bring Reachability Analysis to Every Appsec Team.Learn more
Socket
DemoInstallSign in
Socket

@harvestapi/scraper

Package Overview
Dependencies
Maintainers
1
Versions
22
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@harvestapi/scraper - npm Package Compare versions

Comparing version

to
1.3.11

1

dist/base/listing.scraper.d.ts

@@ -20,2 +20,3 @@ import { ListingScraperOptions } from './types';

private scrapedItems;
private paginationToken;
private undefinedPagination;

@@ -22,0 +23,0 @@ constructor(options: ListingScraperOptions<TItemShort, TItemDetail>);

3

dist/base/types.d.ts

@@ -19,4 +19,5 @@ import { ApiItemResponse, ApiListResponse } from '../types';

export type ListingScraperOptions<TItemShot, TItemDetails> = ListingScraperConfig<TItemShot, TItemDetails> & {
fetchList: ({ page }: {
fetchList: (args: {
page: number;
paginationToken?: string | null;
}) => Promise<ApiListResponse<TItemShot>>;

@@ -23,0 +24,0 @@ fetchItem: ({ item, }: {

@@ -815,2 +815,3 @@ 'use strict';

this.scrapedItems = {};
this.paginationToken = null;
this.undefinedPagination = false;

@@ -861,3 +862,3 @@ this.onItemScraped = async ({ item }) => {

async scrapeStart() {
var _a, _b, _c, _d, _e;
var _a, _b, _c, _d, _e, _f;
this.stats = {

@@ -871,8 +872,11 @@ pages: 0,

};
this.paginationToken = null;
this.scrapePagesDone = false;
const firstPage = await this.fetchPage({ page: 1 });
let totalPages = ((_a = firstPage === null || firstPage === void 0 ? void 0 : firstPage.pagination) === null || _a === void 0 ? void 0 : _a.totalPages) || 0;
this.paginationToken = ((_b = firstPage === null || firstPage === void 0 ? void 0 : firstPage.pagination) === null || _b === void 0 ? void 0 : _b.paginationToken) || null;
if (this.options.maxPages && totalPages > this.options.maxPages) {
totalPages = this.options.maxPages;
}
if (!totalPages && ((_b = firstPage === null || firstPage === void 0 ? void 0 : firstPage.elements) === null || _b === void 0 ? void 0 : _b.length)) {
if (!totalPages && ((_c = firstPage === null || firstPage === void 0 ? void 0 : firstPage.elements) === null || _c === void 0 ? void 0 : _c.length)) {
totalPages = this.options.maxPages;

@@ -884,5 +888,5 @@ this.undefinedPagination = true;

}
const concurrency = ((_c = this.options) === null || _c === void 0 ? void 0 : _c.overrideConcurrency) || ((_d = firstPage === null || firstPage === void 0 ? void 0 : firstPage.user) === null || _d === void 0 ? void 0 : _d.requestsConcurrency) || 1;
const concurrency = ((_d = this.options) === null || _d === void 0 ? void 0 : _d.overrideConcurrency) || ((_e = firstPage === null || firstPage === void 0 ? void 0 : firstPage.user) === null || _e === void 0 ? void 0 : _e.requestsConcurrency) || 1;
this.log(`Scraping ${this.options.entityName} with ${concurrency} concurrent ${concurrency === 1 ? 'worker' : 'workers'}... Total pages: ${totalPages}`);
if (!((_e = firstPage === null || firstPage === void 0 ? void 0 : firstPage.elements) === null || _e === void 0 ? void 0 : _e.length)) {
if (!((_f = firstPage === null || firstPage === void 0 ? void 0 : firstPage.elements) === null || _f === void 0 ? void 0 : _f.length)) {
this.done = true;

@@ -956,3 +960,5 @@ if (this.error) {

this.log(`Scraping page ${page} of ${this.options.entityName}...`);
const result = await this.options.fetchList({ page }).catch((error) => {
const result = await this.options
.fetchList({ page, paginationToken: this.paginationToken })
.catch((error) => {
this.errorLog('Error fetching page', page, error);

@@ -1105,2 +1111,5 @@ return null;

}
async getPostComments(params) {
return this.scraper.fetchApi({ path: 'linkedin/post-comments', params });
}
async searchCompanyAssociatedProfiles(params) {

@@ -1182,2 +1191,14 @@ return this.scraper.fetchApi({

}
async scrapePostComments({ query, ...options }) {
return new ListingScraper({
fetchList: (fetchArgs) => this.getPostComments({ ...query, ...fetchArgs }),
fetchItem: async ({ item }) => (item === null || item === void 0 ? void 0 : item.id)
? { entityId: item === null || item === void 0 ? void 0 : item.id, element: item }
: { skipped: true },
scrapeDetails: false,
entityName: 'post-comments',
...options,
maxPages: 100,
}).scrapeStart();
}
async test() {

@@ -1184,0 +1205,0 @@ return this.scraper.fetchApi({ path: 'linkedin/test' });

@@ -813,2 +813,3 @@ import { randomUUID } from 'crypto';

this.scrapedItems = {};
this.paginationToken = null;
this.undefinedPagination = false;

@@ -859,3 +860,3 @@ this.onItemScraped = async ({ item }) => {

async scrapeStart() {
var _a, _b, _c, _d, _e;
var _a, _b, _c, _d, _e, _f;
this.stats = {

@@ -869,8 +870,11 @@ pages: 0,

};
this.paginationToken = null;
this.scrapePagesDone = false;
const firstPage = await this.fetchPage({ page: 1 });
let totalPages = ((_a = firstPage === null || firstPage === void 0 ? void 0 : firstPage.pagination) === null || _a === void 0 ? void 0 : _a.totalPages) || 0;
this.paginationToken = ((_b = firstPage === null || firstPage === void 0 ? void 0 : firstPage.pagination) === null || _b === void 0 ? void 0 : _b.paginationToken) || null;
if (this.options.maxPages && totalPages > this.options.maxPages) {
totalPages = this.options.maxPages;
}
if (!totalPages && ((_b = firstPage === null || firstPage === void 0 ? void 0 : firstPage.elements) === null || _b === void 0 ? void 0 : _b.length)) {
if (!totalPages && ((_c = firstPage === null || firstPage === void 0 ? void 0 : firstPage.elements) === null || _c === void 0 ? void 0 : _c.length)) {
totalPages = this.options.maxPages;

@@ -882,5 +886,5 @@ this.undefinedPagination = true;

}
const concurrency = ((_c = this.options) === null || _c === void 0 ? void 0 : _c.overrideConcurrency) || ((_d = firstPage === null || firstPage === void 0 ? void 0 : firstPage.user) === null || _d === void 0 ? void 0 : _d.requestsConcurrency) || 1;
const concurrency = ((_d = this.options) === null || _d === void 0 ? void 0 : _d.overrideConcurrency) || ((_e = firstPage === null || firstPage === void 0 ? void 0 : firstPage.user) === null || _e === void 0 ? void 0 : _e.requestsConcurrency) || 1;
this.log(`Scraping ${this.options.entityName} with ${concurrency} concurrent ${concurrency === 1 ? 'worker' : 'workers'}... Total pages: ${totalPages}`);
if (!((_e = firstPage === null || firstPage === void 0 ? void 0 : firstPage.elements) === null || _e === void 0 ? void 0 : _e.length)) {
if (!((_f = firstPage === null || firstPage === void 0 ? void 0 : firstPage.elements) === null || _f === void 0 ? void 0 : _f.length)) {
this.done = true;

@@ -954,3 +958,5 @@ if (this.error) {

this.log(`Scraping page ${page} of ${this.options.entityName}...`);
const result = await this.options.fetchList({ page }).catch((error) => {
const result = await this.options
.fetchList({ page, paginationToken: this.paginationToken })
.catch((error) => {
this.errorLog('Error fetching page', page, error);

@@ -1103,2 +1109,5 @@ return null;

}
async getPostComments(params) {
return this.scraper.fetchApi({ path: 'linkedin/post-comments', params });
}
async searchCompanyAssociatedProfiles(params) {

@@ -1180,2 +1189,14 @@ return this.scraper.fetchApi({

}
async scrapePostComments({ query, ...options }) {
return new ListingScraper({
fetchList: (fetchArgs) => this.getPostComments({ ...query, ...fetchArgs }),
fetchItem: async ({ item }) => (item === null || item === void 0 ? void 0 : item.id)
? { entityId: item === null || item === void 0 ? void 0 : item.id, element: item }
: { skipped: true },
scrapeDetails: false,
entityName: 'post-comments',
...options,
maxPages: 100,
}).scrapeStart();
}
async test() {

@@ -1182,0 +1203,0 @@ return this.scraper.fetchApi({ path: 'linkedin/test' });

import { ScraperOptions } from '../base';
import { ApiItemResponse, ApiListResponse } from '../types';
import { Company, CompanyShort, GetLinkedinCompanyParams, GetLinkedinJobParams, GetLinkedinPostReactionsParams, GetLinkedInProfileParams, Job, JobShort, PostReaction, PostShort, Profile, ProfileShort, ScrapeLinkedinCompaniesParams, ScrapeLinkedinJobsParams, ScrapeLinkedinPostReactionsParams, ScrapeLinkedinPostsParams, ScrapeLinkedinProfilesParams, SearchLinkedinCompaniesParams, SearchLinkedInCompanyAssociatedProfilesParams, SearchLinkedinJobsParams, SearchLinkedinPostsParams, SearchLinkedInProfilesParams, SearchLinkedInProfilesParamsV2 } from './types';
import { Company, CompanyShort, GetLinkedinCompanyParams, GetLinkedinJobParams, GetLinkedinPostCommentsParams, GetLinkedinPostReactionsParams, GetLinkedInProfileParams, Job, JobShort, PostComment, PostReaction, PostShort, Profile, ProfileShort, ScrapeLinkedinCompaniesParams, ScrapeLinkedinJobsParams, ScrapeLinkedinPostCommentsParams, ScrapeLinkedinPostReactionsParams, ScrapeLinkedinPostsParams, ScrapeLinkedinProfilesParams, SearchLinkedinCompaniesParams, SearchLinkedInCompanyAssociatedProfilesParams, SearchLinkedinJobsParams, SearchLinkedinPostsParams, SearchLinkedInProfilesParams, SearchLinkedInProfilesParamsV2 } from './types';
export declare class LinkedinScraper {

@@ -23,2 +23,3 @@ private options;

getPostReactions(params: GetLinkedinPostReactionsParams): Promise<ApiListResponse<PostReaction>>;
getPostComments(params: GetLinkedinPostCommentsParams): Promise<ApiListResponse<PostComment>>;
searchCompanyAssociatedProfiles(params: SearchLinkedInCompanyAssociatedProfilesParams): Promise<ApiListResponse<ProfileShort>>;

@@ -73,3 +74,11 @@ scrapeJobs({ query, ...options }: ScrapeLinkedinJobsParams): Promise<{

} | undefined>;
scrapePostComments({ query, ...options }: ScrapeLinkedinPostCommentsParams): Promise<{
pages: number;
pagesSuccess: number;
items: number;
itemsSuccess: number;
requests: number;
requestsStartTime: Date;
} | undefined>;
test(): Promise<any>;
}

@@ -97,2 +97,8 @@ import { ListingScraperConfig } from '../base';

}
export interface GetLinkedinPostCommentsParams {
post: string | number;
page?: number;
paginationToken?: string | null;
sortBy?: 'date' | 'relevance';
}
export type Profile = {

@@ -464,2 +470,26 @@ id: string;

};
export type PostComment = {
id: string;
linkedinUrl: string;
commentary: string;
createdAt: string;
postId: string;
actor: {
id: string;
name: string;
linkedinUrl: string;
position: string;
pictureUrl: string;
picture: {
url: string;
width: number;
height: number;
expiresAt: number;
};
};
createdAtTimestamp: number;
pinned?: boolean | null;
contributed?: boolean | null;
edited?: boolean | null;
};
export type ScrapeLinkedinJobsParams = {

@@ -481,2 +511,5 @@ query: SearchLinkedinJobsParams;

} & ListingScraperConfig<PostReaction, PostReaction>;
export type ScrapeLinkedinPostCommentsParams = {
query: GetLinkedinPostCommentsParams;
} & ListingScraperConfig<PostComment, PostComment>;
export type ErrorResponse = {

@@ -483,0 +516,0 @@ error: string;

@@ -25,4 +25,5 @@ export interface BaseApiResponse {

pageSize: number;
paginationToken?: string | null;
} | null;
elements: TItem[];
};
{
"name": "@harvestapi/scraper",
"version": "1.3.10",
"version": "1.3.11",
"main": "dist/index.cjs.js",

@@ -5,0 +5,0 @@ "module": "dist/index.esm.js",

@@ -232,2 +232,16 @@ # HarvestAPI scraping tools

##### getPostComments()
> **getPostComments**(`params`): `Promise`\<[`ApiListResponse`](#apilistresponsetitem)\<[`PostComment`](#postcomment)\>\>
###### Parameters
###### params
[`GetLinkedinPostCommentsParams`](#getlinkedinpostcommentsparams)
###### Returns
`Promise`\<[`ApiListResponse`](#apilistresponsetitem)\<[`PostComment`](#postcomment)\>\>
##### searchCompanyAssociatedProfiles()

@@ -331,2 +345,16 @@

##### scrapePostComments()
> **scrapePostComments**(`__namedParameters`): `Promise`\<`undefined` \| \{ `pages`: `number`; `pagesSuccess`: `number`; `items`: `number`; `itemsSuccess`: `number`; `requests`: `number`; `requestsStartTime`: `Date`; \}\>
###### Parameters
###### \_\_namedParameters
[`ScrapeLinkedinPostCommentsParams`](#scrapelinkedinpostcommentsparams)
###### Returns
`Promise`\<`undefined` \| \{ `pages`: `number`; `pagesSuccess`: `number`; `items`: `number`; `itemsSuccess`: `number`; `requests`: `number`; `requestsStartTime`: `Date`; \}\>
## Interfaces

@@ -674,2 +702,24 @@

### GetLinkedinPostCommentsParams
#### Properties
##### post
> **post**: `string` \| `number`
##### page?
> `optional` **page**: `number`
##### paginationToken?
> `optional` **paginationToken**: `null` \| `string`
##### sortBy?
> `optional` **sortBy**: `"date"` \| `"relevance"`
***
### BaseApiResponse

@@ -1815,2 +1865,90 @@

### PostComment
> **PostComment**: `object`
#### Type declaration
##### id
> **id**: `string`
##### linkedinUrl
> **linkedinUrl**: `string`
##### commentary
> **commentary**: `string`
##### createdAt
> **createdAt**: `string`
##### postId
> **postId**: `string`
##### actor
> **actor**: `object`
###### actor.id
> **id**: `string`
###### actor.name
> **name**: `string`
###### actor.linkedinUrl
> **linkedinUrl**: `string`
###### actor.position
> **position**: `string`
###### actor.pictureUrl
> **pictureUrl**: `string`
###### actor.picture
> **picture**: `object`
###### actor.picture.url
> **url**: `string`
###### actor.picture.width
> **width**: `number`
###### actor.picture.height
> **height**: `number`
###### actor.picture.expiresAt
> **expiresAt**: `number`
##### createdAtTimestamp
> **createdAtTimestamp**: `number`
##### pinned?
> `optional` **pinned**: `boolean` \| `null`
##### contributed?
> `optional` **contributed**: `boolean` \| `null`
##### edited?
> `optional` **edited**: `boolean` \| `null`
***
### ScrapeLinkedinJobsParams

@@ -1880,2 +2018,14 @@

### ScrapeLinkedinPostCommentsParams
> **ScrapeLinkedinPostCommentsParams**: `object` & [`ListingScraperConfig`](#listingscraperconfigtitemshot-titemdetails)\<[`PostComment`](#postcomment), [`PostComment`](#postcomment)\>
#### Type declaration
##### query
> **query**: [`GetLinkedinPostCommentsParams`](#getlinkedinpostcommentsparams)
***
### ErrorResponse

@@ -1925,3 +2075,3 @@

> **pagination**: \{ `totalPages`: `number`; `totalElements`: `number`; `pageNumber`: `number`; `previousElements`: `number`; `pageSize`: `number`; \} \| `null`
> **pagination**: \{ `totalPages`: `number`; `totalElements`: `number`; `pageNumber`: `number`; `previousElements`: `number`; `pageSize`: `number`; `paginationToken`: `string` \| `null`; \} \| `null`

@@ -1928,0 +2078,0 @@ ##### elements

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet