Socket
Socket
Sign inDemoInstall

@crawlee/core

Package Overview
Dependencies
Maintainers
0
Versions
1207
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@crawlee/core - npm Package Compare versions

Comparing version 3.11.5-beta.14 to 3.11.5-beta.15

10

package.json
{
"name": "@crawlee/core",
"version": "3.11.5-beta.14",
"version": "3.11.5-beta.15",
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",

@@ -62,5 +62,5 @@ "engines": {

"@apify/utilities": "^2.7.10",
"@crawlee/memory-storage": "3.11.5-beta.14",
"@crawlee/types": "3.11.5-beta.14",
"@crawlee/utils": "3.11.5-beta.14",
"@crawlee/memory-storage": "3.11.5-beta.15",
"@crawlee/types": "3.11.5-beta.15",
"@crawlee/utils": "3.11.5-beta.15",
"@sapphire/async-queue": "^1.5.1",

@@ -87,3 +87,3 @@ "@vladfrangu/async_event_emitter": "^2.2.2",

},
"gitHead": "bd1b4b94090e77278d7a71a91a05629582ce25a5"
"gitHead": "e7a2e8bd055eec66d79109b6e66ddae15dfc8544"
}

@@ -22,2 +22,10 @@ import { ListDictionary, LruCache } from '@apify/datastructures';

assumedHandledCount: number;
/**
* Counts enqueued `forefront` requests. This is used to invalidate the local head cache.
* We can trust these numbers only in a case that queue is used by a single client.
*
* Note that this number does not have to be persisted, as the local head cache is dropped
* on a migration event.
*/
protected assumedForefrontCount: number;
private initialCount;

@@ -248,2 +256,7 @@ protected queueHeadIds: ListDictionary<string>;

* By default, it's put to the end of the queue.
*
* In case the request is already present in the queue, this option has no effect.
*
* If more requests are added with this option at once, their order in the following `fetchNextRequest` call
* is arbitrary.
* @default false

@@ -250,0 +263,0 @@ */

@@ -92,2 +92,15 @@ "use strict";

});
/**
* Counts enqueued `forefront` requests. This is used to invalidate the local head cache.
* We can trust these numbers only in a case that queue is used by a single client.
*
* Note that this number does not have to be persisted, as the local head cache is dropped
* on a migration event.
*/
Object.defineProperty(this, "assumedForefrontCount", {
enumerable: true,
configurable: true,
writable: true,
value: 0
});
Object.defineProperty(this, "initialCount", {

@@ -209,2 +222,3 @@ enumerable: true,

this.assumedTotalCount++;
this.assumedForefrontCount += forefront ? 1 : 0;
// Performance optimization: add request straight to head if possible

@@ -299,2 +313,3 @@ this._maybeAddRequestToQueueHead(requestId, forefront);

this.assumedTotalCount++;
this.assumedForefrontCount += forefront ? 1 : 0;
// Performance optimization: add request straight to head if possible

@@ -460,2 +475,3 @@ this._maybeAddRequestToQueueHead(requestId, forefront);

queueOperationInfo.uniqueKey = request.uniqueKey;
this.assumedForefrontCount += forefront ? 1 : 0;
this._cacheRequest((0, utils_2.getRequestId)(request.uniqueKey), queueOperationInfo);

@@ -462,0 +478,0 @@ return queueOperationInfo;

@@ -42,2 +42,9 @@ import type { Dictionary } from '@crawlee/types';

private _listHeadAndLockPromise;
/**
* Returns `true` if there are any requests in the queue that were enqueued to the forefront.
*
* Can return false negatives, but never false positives.
* @returns `true` if there are `forefront` requests in the queue.
*/
private hasPendingForefrontRequests;
constructor(options: RequestProviderOptions, config?: Configuration);

@@ -44,0 +51,0 @@ /**

@@ -51,2 +51,12 @@ "use strict";

class RequestQueue extends request_provider_1.RequestProvider {
/**
* Returns `true` if there are any requests in the queue that were enqueued to the forefront.
*
* Can return false negatives, but never false positives.
* @returns `true` if there are `forefront` requests in the queue.
*/
async hasPendingForefrontRequests() {
const queueInfo = await this.client.get();
return this.assumedForefrontCount > 0 && !queueInfo?.hadMultipleClients;
}
constructor(options, config = configuration_1.Configuration.getGlobalConfig()) {

@@ -155,3 +165,3 @@ super({

// We want to fetch ahead of time to minimize dead time
if (this.queueHeadIds.length() > 1) {
if (this.queueHeadIds.length() > 1 && !(await this.hasPendingForefrontRequests())) {
return;

@@ -165,10 +175,21 @@ }

async _listHeadAndLock() {
const headData = await this.client.listAndLockHead({ limit: 25, lockSecs: this.requestLockSecs });
const forefront = await this.hasPendingForefrontRequests();
const headData = await this.client.listAndLockHead({
limit: Math.min(forefront ? this.assumedForefrontCount : 25, 25),
lockSecs: this.requestLockSecs,
});
const headIdBuffer = [];
for (const { id, uniqueKey } of headData.items) {
// Queue head index might be behind the main table, so ensure we don't recycle requests
if (!id || !uniqueKey || this.recentlyHandledRequestsCache.get(id)) {
this.log.debug(`Skipping request from queue head as it's invalid or recently handled`, {
if (!id ||
!uniqueKey ||
this.recentlyHandledRequestsCache.get(id) ||
// If we tried to read new forefront requests, but another client appeared in the meantime, we can't be sure we'll only read our requests.
// To retain the correct queue ordering, we rollback this head read.
(forefront && headData.hadMultipleClients)) {
this.log.debug(`Skipping request from queue head as it's potentially invalid or recently handled`, {
id,
uniqueKey,
recentlyHandled: !!this.recentlyHandledRequestsCache.get(id),
inconsistentForefrontRead: forefront && headData.hadMultipleClients,
});

@@ -185,3 +206,4 @@ // Remove the lock from the request for now, so that it can be picked up later

}
this.queueHeadIds.add(id, id, false);
headIdBuffer.push(id);
this.assumedForefrontCount = Math.max(0, this.assumedForefrontCount - 1);
this._cacheRequest((0, utils_1.getRequestId)(uniqueKey), {

@@ -194,2 +216,5 @@ requestId: id,

}
for (const id of forefront ? headIdBuffer.reverse() : headIdBuffer) {
this.queueHeadIds.add(id, id, forefront);
}
}

@@ -196,0 +221,0 @@ async getOrHydrateRequest(requestId) {

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc