@crawlee/core
Advanced tools
Comparing version 3.11.5-beta.14 to 3.11.5-beta.15
{ | ||
"name": "@crawlee/core", | ||
"version": "3.11.5-beta.14", | ||
"version": "3.11.5-beta.15", | ||
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", | ||
@@ -62,5 +62,5 @@ "engines": { | ||
"@apify/utilities": "^2.7.10", | ||
"@crawlee/memory-storage": "3.11.5-beta.14", | ||
"@crawlee/types": "3.11.5-beta.14", | ||
"@crawlee/utils": "3.11.5-beta.14", | ||
"@crawlee/memory-storage": "3.11.5-beta.15", | ||
"@crawlee/types": "3.11.5-beta.15", | ||
"@crawlee/utils": "3.11.5-beta.15", | ||
"@sapphire/async-queue": "^1.5.1", | ||
@@ -87,3 +87,3 @@ "@vladfrangu/async_event_emitter": "^2.2.2", | ||
}, | ||
"gitHead": "bd1b4b94090e77278d7a71a91a05629582ce25a5" | ||
"gitHead": "e7a2e8bd055eec66d79109b6e66ddae15dfc8544" | ||
} |
@@ -22,2 +22,10 @@ import { ListDictionary, LruCache } from '@apify/datastructures'; | ||
assumedHandledCount: number; | ||
/** | ||
* Counts enqueued `forefront` requests. This is used to invalidate the local head cache. | ||
* We can trust these numbers only in a case that queue is used by a single client. | ||
* | ||
* Note that this number does not have to be persisted, as the local head cache is dropped | ||
* on a migration event. | ||
*/ | ||
protected assumedForefrontCount: number; | ||
private initialCount; | ||
@@ -248,2 +256,7 @@ protected queueHeadIds: ListDictionary<string>; | ||
* By default, it's put to the end of the queue. | ||
* | ||
* In case the request is already present in the queue, this option has no effect. | ||
* | ||
* If more requests are added with this option at once, their order in the following `fetchNextRequest` call | ||
* is arbitrary. | ||
* @default false | ||
@@ -250,0 +263,0 @@ */ |
@@ -92,2 +92,15 @@ "use strict"; | ||
}); | ||
/** | ||
* Counts enqueued `forefront` requests. This is used to invalidate the local head cache. | ||
* We can trust these numbers only in a case that queue is used by a single client. | ||
* | ||
* Note that this number does not have to be persisted, as the local head cache is dropped | ||
* on a migration event. | ||
*/ | ||
Object.defineProperty(this, "assumedForefrontCount", { | ||
enumerable: true, | ||
configurable: true, | ||
writable: true, | ||
value: 0 | ||
}); | ||
Object.defineProperty(this, "initialCount", { | ||
@@ -209,2 +222,3 @@ enumerable: true, | ||
this.assumedTotalCount++; | ||
this.assumedForefrontCount += forefront ? 1 : 0; | ||
// Performance optimization: add request straight to head if possible | ||
@@ -299,2 +313,3 @@ this._maybeAddRequestToQueueHead(requestId, forefront); | ||
this.assumedTotalCount++; | ||
this.assumedForefrontCount += forefront ? 1 : 0; | ||
// Performance optimization: add request straight to head if possible | ||
@@ -460,2 +475,3 @@ this._maybeAddRequestToQueueHead(requestId, forefront); | ||
queueOperationInfo.uniqueKey = request.uniqueKey; | ||
this.assumedForefrontCount += forefront ? 1 : 0; | ||
this._cacheRequest((0, utils_2.getRequestId)(request.uniqueKey), queueOperationInfo); | ||
@@ -462,0 +478,0 @@ return queueOperationInfo; |
@@ -42,2 +42,9 @@ import type { Dictionary } from '@crawlee/types'; | ||
private _listHeadAndLockPromise; | ||
/** | ||
* Returns `true` if there are any requests in the queue that were enqueued to the forefront. | ||
* | ||
* Can return false negatives, but never false positives. | ||
* @returns `true` if there are `forefront` requests in the queue. | ||
*/ | ||
private hasPendingForefrontRequests; | ||
constructor(options: RequestProviderOptions, config?: Configuration); | ||
@@ -44,0 +51,0 @@ /** |
@@ -51,2 +51,12 @@ "use strict"; | ||
class RequestQueue extends request_provider_1.RequestProvider { | ||
/** | ||
* Returns `true` if there are any requests in the queue that were enqueued to the forefront. | ||
* | ||
* Can return false negatives, but never false positives. | ||
* @returns `true` if there are `forefront` requests in the queue. | ||
*/ | ||
async hasPendingForefrontRequests() { | ||
const queueInfo = await this.client.get(); | ||
return this.assumedForefrontCount > 0 && !queueInfo?.hadMultipleClients; | ||
} | ||
constructor(options, config = configuration_1.Configuration.getGlobalConfig()) { | ||
@@ -155,3 +165,3 @@ super({ | ||
// We want to fetch ahead of time to minimize dead time | ||
if (this.queueHeadIds.length() > 1) { | ||
if (this.queueHeadIds.length() > 1 && !(await this.hasPendingForefrontRequests())) { | ||
return; | ||
@@ -165,10 +175,21 @@ } | ||
async _listHeadAndLock() { | ||
const headData = await this.client.listAndLockHead({ limit: 25, lockSecs: this.requestLockSecs }); | ||
const forefront = await this.hasPendingForefrontRequests(); | ||
const headData = await this.client.listAndLockHead({ | ||
limit: Math.min(forefront ? this.assumedForefrontCount : 25, 25), | ||
lockSecs: this.requestLockSecs, | ||
}); | ||
const headIdBuffer = []; | ||
for (const { id, uniqueKey } of headData.items) { | ||
// Queue head index might be behind the main table, so ensure we don't recycle requests | ||
if (!id || !uniqueKey || this.recentlyHandledRequestsCache.get(id)) { | ||
this.log.debug(`Skipping request from queue head as it's invalid or recently handled`, { | ||
if (!id || | ||
!uniqueKey || | ||
this.recentlyHandledRequestsCache.get(id) || | ||
// If we tried to read new forefront requests, but another client appeared in the meantime, we can't be sure we'll only read our requests. | ||
// To retain the correct queue ordering, we rollback this head read. | ||
(forefront && headData.hadMultipleClients)) { | ||
this.log.debug(`Skipping request from queue head as it's potentially invalid or recently handled`, { | ||
id, | ||
uniqueKey, | ||
recentlyHandled: !!this.recentlyHandledRequestsCache.get(id), | ||
inconsistentForefrontRead: forefront && headData.hadMultipleClients, | ||
}); | ||
@@ -185,3 +206,4 @@ // Remove the lock from the request for now, so that it can be picked up later | ||
} | ||
this.queueHeadIds.add(id, id, false); | ||
headIdBuffer.push(id); | ||
this.assumedForefrontCount = Math.max(0, this.assumedForefrontCount - 1); | ||
this._cacheRequest((0, utils_1.getRequestId)(uniqueKey), { | ||
@@ -194,2 +216,5 @@ requestId: id, | ||
} | ||
for (const id of forefront ? headIdBuffer.reverse() : headIdBuffer) { | ||
this.queueHeadIds.add(id, id, forefront); | ||
} | ||
} | ||
@@ -196,0 +221,0 @@ async getOrHydrateRequest(requestId) { |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
1070078
14835
+ Added@crawlee/memory-storage@3.11.5-beta.15(transitive)
+ Added@crawlee/types@3.11.5-beta.15(transitive)
+ Added@crawlee/utils@3.11.5-beta.15(transitive)
- Removed@crawlee/memory-storage@3.11.5-beta.14(transitive)
- Removed@crawlee/types@3.11.5-beta.14(transitive)
- Removed@crawlee/utils@3.11.5-beta.14(transitive)