@crawlee/core
Advanced tools
Comparing version 3.0.0-beta.33 to 3.0.0-beta.34
@@ -37,2 +37,3 @@ /// <reference types="node" /> | ||
emit(event: EventTypeName, ...args: unknown[]): void; | ||
isInitialized(): boolean; | ||
/** | ||
@@ -39,0 +40,0 @@ * @internal |
@@ -90,2 +90,5 @@ "use strict"; | ||
} | ||
isInitialized() { | ||
return this.initialized; | ||
} | ||
/** | ||
@@ -92,0 +95,0 @@ * @internal |
@@ -15,2 +15,3 @@ export * from './autoscaling'; | ||
export * from './validators'; | ||
export { Dictionary, Awaitable, Constructor } from '@crawlee/types'; | ||
//# sourceMappingURL=index.d.ts.map |
{ | ||
"name": "@crawlee/core", | ||
"version": "3.0.0-beta.33", | ||
"version": "3.0.0-beta.34", | ||
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", | ||
@@ -58,6 +58,6 @@ "engines": { | ||
"@apify/utilities": "^1.2.12", | ||
"@crawlee/browser-pool": "^3.0.0-beta.33", | ||
"@crawlee/memory-storage": "^3.0.0-beta.33", | ||
"@crawlee/types": "^3.0.0-beta.33", | ||
"@crawlee/utils": "^3.0.0-beta.33", | ||
"@crawlee/browser-pool": "^3.0.0-beta.34", | ||
"@crawlee/memory-storage": "^3.0.0-beta.34", | ||
"@crawlee/types": "^3.0.0-beta.34", | ||
"@crawlee/utils": "^3.0.0-beta.34", | ||
"@types/tough-cookie": "^4.0.2", | ||
@@ -64,0 +64,0 @@ "iconv-lite": "^0.6.3", |
import type { DatasetClient, DatasetInfo, PaginatedList, StorageClient, Dictionary } from '@crawlee/types'; | ||
import { Configuration } from '../configuration'; | ||
import { Awaitable } from '../typedefs'; | ||
@@ -133,2 +134,3 @@ import { StorageManagerOptions } from './storage_manager'; | ||
export declare class Dataset<Data extends Dictionary = Dictionary> { | ||
readonly config: Configuration; | ||
id: string; | ||
@@ -141,3 +143,3 @@ name?: string; | ||
*/ | ||
constructor(options: DatasetOptions); | ||
constructor(options: DatasetOptions, config?: Configuration); | ||
/** | ||
@@ -144,0 +146,0 @@ * Stores an object or an array of objects to the dataset. |
@@ -133,3 +133,9 @@ "use strict"; | ||
*/ | ||
constructor(options) { | ||
constructor(options, config = configuration_1.Configuration.getGlobalConfig()) { | ||
Object.defineProperty(this, "config", { | ||
enumerable: true, | ||
configurable: true, | ||
writable: true, | ||
value: config | ||
}); | ||
Object.defineProperty(this, "id", { | ||
@@ -341,3 +347,3 @@ enumerable: true, | ||
await this.client.delete(); | ||
const manager = new storage_manager_1.StorageManager(Dataset); | ||
const manager = storage_manager_1.StorageManager.getManager(Dataset, this.config); | ||
manager.closeStorage(this); | ||
@@ -364,4 +370,5 @@ } | ||
})); | ||
const manager = new storage_manager_1.StorageManager(Dataset, options.config); | ||
return manager.openStorage(datasetIdOrName); | ||
options.config ?? (options.config = configuration_1.Configuration.getGlobalConfig()); | ||
const manager = storage_manager_1.StorageManager.getManager(Dataset, options.config); | ||
return manager.openStorage(datasetIdOrName, options.config.getStorageClient()); | ||
} | ||
@@ -368,0 +375,0 @@ /** |
@@ -83,3 +83,6 @@ /// <reference types="node" /> | ||
readonly name?: string; | ||
private client; | ||
private readonly client; | ||
private persistStateEventStarted; | ||
/** Cache for persistent (auto-saved) values. When we try to set such value, the cache will be updated automatically. */ | ||
private readonly cache; | ||
/** | ||
@@ -114,2 +117,4 @@ * @internal | ||
* of the following characters: `a`-`z`, `A`-`Z`, `0`-`9` and `!-_.'()` | ||
* @param defaultValue | ||
* Fallback that will be returned if no value if present in the storage. | ||
* @returns | ||
@@ -120,3 +125,5 @@ * Returns a promise that resolves to an object, string | ||
*/ | ||
getValue<T = unknown>(key: string): Promise<T | null>; | ||
getValue<T = unknown>(key: string, defaultValue?: T): Promise<T | null>; | ||
getAutoSavedValue<T extends Dictionary = Dictionary>(key: string, defaultValue?: T): Promise<T>; | ||
private ensurePersistStateEvent; | ||
/** | ||
@@ -176,2 +183,4 @@ * Saves or deletes a record in the key-value store. | ||
getPublicUrl(key: string): string; | ||
/** @internal */ | ||
clearCache(): void; | ||
/** | ||
@@ -236,2 +245,3 @@ * Iterates over key-value store keys, yielding each in turn to an `iteratee` function. | ||
* @param key Unique record key. | ||
* @param defaultValue Fallback that will be returned if no value if present in the storage. | ||
* @returns | ||
@@ -244,3 +254,4 @@ * Returns a promise that resolves to an object, string | ||
*/ | ||
static getValue<T = unknown>(key: string): Promise<T | null>; | ||
static getValue<T = unknown>(key: string, defaultValue?: T): Promise<T | null>; | ||
static getAutoSavedValue<T extends Dictionary = Dictionary>(key: string, defaultValue?: T): Promise<T>; | ||
/** | ||
@@ -247,0 +258,0 @@ * Stores or deletes a value in the default {@link KeyValueStore} associated with the current crawler run. |
@@ -133,2 +133,15 @@ "use strict"; | ||
}); | ||
Object.defineProperty(this, "persistStateEventStarted", { | ||
enumerable: true, | ||
configurable: true, | ||
writable: true, | ||
value: false | ||
}); | ||
/** Cache for persistent (auto-saved) values. When we try to set such value, the cache will be updated automatically. */ | ||
Object.defineProperty(this, "cache", { | ||
enumerable: true, | ||
configurable: true, | ||
writable: true, | ||
value: new Map() | ||
}); | ||
this.id = options.id; | ||
@@ -163,2 +176,4 @@ this.name = options.name; | ||
* of the following characters: `a`-`z`, `A`-`Z`, `0`-`9` and `!-_.'()` | ||
* @param defaultValue | ||
* Fallback that will be returned if no value if present in the storage. | ||
* @returns | ||
@@ -169,7 +184,27 @@ * Returns a promise that resolves to an object, string | ||
*/ | ||
async getValue(key) { | ||
async getValue(key, defaultValue) { | ||
(0, ow_1.default)(key, ow_1.default.string.nonEmpty); | ||
const record = await this.client.getRecord(key); | ||
return record?.value ?? null; | ||
return record?.value ?? defaultValue ?? null; | ||
} | ||
async getAutoSavedValue(key, defaultValue = {}) { | ||
if (this.cache.has(key)) { | ||
return this.cache.get(key); | ||
} | ||
const value = await this.getValue(key, defaultValue); | ||
this.cache.set(key, value); | ||
this.ensurePersistStateEvent(); | ||
return value; | ||
} | ||
ensurePersistStateEvent() { | ||
if (this.persistStateEventStarted) { | ||
return; | ||
} | ||
this.config.getEventManager().on('persistState', async () => { | ||
for (const [key, value] of this.cache) { | ||
await this.setValue(key, value); | ||
} | ||
}); | ||
this.persistStateEventStarted = true; | ||
} | ||
/** | ||
@@ -233,2 +268,18 @@ * Saves or deletes a record in the key-value store. | ||
const optionsCopy = { ...options }; | ||
// If we try to set the value of a cached state to a different reference, we need to update the cache accordingly. | ||
const cachedValue = this.cache.get(key); | ||
if (this.cache.has(key) && cachedValue !== value) { | ||
if (value === null) { | ||
// Cached state can be only object, so a propagation of `null` means removing all its properties. | ||
Object.keys(cachedValue).forEach((k) => this.cache.delete(k)); | ||
} | ||
else if (typeof value === 'object') { | ||
// We need to remove the keys that are no longer present in the new value. | ||
Object.keys(cachedValue) | ||
.filter((k) => !(k in value)) | ||
.forEach((k) => this.cache.delete(k)); | ||
// And update the existing ones + add new ones. | ||
Object.assign(cachedValue, value); | ||
} | ||
} | ||
// In this case delete the record. | ||
@@ -250,3 +301,3 @@ if (value === null) | ||
await this.client.delete(); | ||
const manager = new storage_manager_1.StorageManager(KeyValueStore, this.config); | ||
const manager = storage_manager_1.StorageManager.getManager(KeyValueStore, this.config); | ||
manager.closeStorage(this); | ||
@@ -262,2 +313,6 @@ } | ||
} | ||
/** @internal */ | ||
clearCache() { | ||
this.cache.clear(); | ||
} | ||
/** | ||
@@ -317,7 +372,7 @@ * Iterates over key-value store keys, yielding each in turn to an `iteratee` function. | ||
static async open(storeIdOrName, options = {}) { | ||
(0, ow_1.default)(storeIdOrName, ow_1.default.optional.string); | ||
(0, ow_1.default)(storeIdOrName, ow_1.default.optional.any(ow_1.default.string, ow_1.default.null)); | ||
(0, ow_1.default)(options, ow_1.default.object.exactShape({ | ||
config: ow_1.default.optional.object.instanceOf(configuration_1.Configuration), | ||
})); | ||
const manager = new storage_manager_1.StorageManager(KeyValueStore, options.config); | ||
const manager = storage_manager_1.StorageManager.getManager(KeyValueStore, options.config); | ||
return manager.openStorage(storeIdOrName); | ||
@@ -346,2 +401,3 @@ } | ||
* @param key Unique record key. | ||
* @param defaultValue Fallback that will be returned if no value if present in the storage. | ||
* @returns | ||
@@ -354,6 +410,10 @@ * Returns a promise that resolves to an object, string | ||
*/ | ||
static async getValue(key) { | ||
static async getValue(key, defaultValue) { | ||
const store = await this.open(); | ||
return store.getValue(key); | ||
return store.getValue(key, defaultValue); | ||
} | ||
static async getAutoSavedValue(key, defaultValue = {}) { | ||
const store = await this.open(); | ||
return store.getAutoSavedValue(key, defaultValue); | ||
} | ||
/** | ||
@@ -360,0 +420,0 @@ * Stores or deletes a value in the default {@link KeyValueStore} associated with the current crawler run. |
@@ -704,3 +704,3 @@ "use strict"; | ||
static async open(listName, sources, options = {}) { | ||
(0, ow_1.default)(listName, ow_1.default.any(ow_1.default.string, ow_1.default.null)); | ||
(0, ow_1.default)(listName, ow_1.default.optional.any(ow_1.default.string, ow_1.default.null)); | ||
(0, ow_1.default)(sources, ow_1.default.array); | ||
@@ -707,0 +707,0 @@ (0, ow_1.default)(options, ow_1.default.object.is((v) => !Array.isArray(v))); |
import { LruCache } from '@apify/datastructures'; | ||
import { BatchAddRequestsResult, Dictionary, RequestQueueClient, RequestQueueInfo, StorageClient } from '@crawlee/types'; | ||
import { StorageManagerOptions } from './storage_manager'; | ||
import { Request, RequestOptions } from '../request'; | ||
import { Configuration } from '../configuration'; | ||
/** | ||
@@ -109,2 +111,3 @@ * When requesting queue head we always fetch requestsInProgressCount * QUERY_HEAD_BUFFER number of requests. | ||
export declare class RequestQueue { | ||
readonly config: Configuration; | ||
log: import("@apify/log/log").Log; | ||
@@ -142,3 +145,3 @@ id: string; | ||
*/ | ||
constructor(options: RequestQueueOptions); | ||
constructor(options: RequestQueueOptions, config?: Configuration); | ||
/** | ||
@@ -306,3 +309,3 @@ * @ignore | ||
*/ | ||
static open(queueIdOrName?: string | null): Promise<RequestQueue>; | ||
static open(queueIdOrName?: string | null, options?: StorageManagerOptions): Promise<RequestQueue>; | ||
} | ||
@@ -309,0 +312,0 @@ export interface RequestQueueOptions { |
@@ -14,2 +14,3 @@ "use strict"; | ||
const request_1 = require("../request"); | ||
const configuration_1 = require("../configuration"); | ||
const MAX_CACHED_REQUESTS = 1000000; | ||
@@ -114,3 +115,9 @@ /** | ||
*/ | ||
constructor(options) { | ||
constructor(options, config = configuration_1.Configuration.getGlobalConfig()) { | ||
Object.defineProperty(this, "config", { | ||
enumerable: true, | ||
configurable: true, | ||
writable: true, | ||
value: config | ||
}); | ||
Object.defineProperty(this, "log", { | ||
@@ -665,3 +672,3 @@ enumerable: true, | ||
await this.client.delete(); | ||
const manager = new storage_manager_1.StorageManager(RequestQueue); | ||
const manager = storage_manager_1.StorageManager.getManager(RequestQueue, this.config); | ||
manager.closeStorage(this); | ||
@@ -726,5 +733,8 @@ } | ||
*/ | ||
static async open(queueIdOrName) { | ||
static async open(queueIdOrName, options = {}) { | ||
(0, ow_1.default)(queueIdOrName, ow_1.default.optional.string); | ||
const manager = new storage_manager_1.StorageManager(RequestQueue); | ||
(0, ow_1.default)(options, ow_1.default.object.exactShape({ | ||
config: ow_1.default.optional.object.instanceOf(configuration_1.Configuration), | ||
})); | ||
const manager = storage_manager_1.StorageManager.getManager(RequestQueue, options.config); | ||
return manager.openStorage(queueIdOrName); | ||
@@ -731,0 +741,0 @@ } |
@@ -1,2 +0,2 @@ | ||
import { StorageClient } from '@crawlee/types'; | ||
import { Dictionary, StorageClient } from '@crawlee/types'; | ||
import { Configuration } from '../configuration'; | ||
@@ -14,3 +14,3 @@ import { Constructor } from '../typedefs'; | ||
private readonly config; | ||
private static readonly MAX_OPENED_STORAGES; | ||
private static readonly storageManagers; | ||
private readonly name; | ||
@@ -20,3 +20,7 @@ private readonly StorageConstructor; | ||
constructor(StorageConstructor: Constructor<T>, config?: Configuration); | ||
openStorage(idOrName?: string, client?: StorageClient): Promise<T>; | ||
static openStorage<T extends IStorage>(storageClass: Constructor<T>, idOrName?: string, client?: StorageClient, config?: Configuration): Promise<T>; | ||
static getManager<T extends IStorage>(storageClass: Constructor<T>, config?: Configuration): StorageManager<T>; | ||
/** @internal */ | ||
static clearCache(): void; | ||
openStorage(idOrName?: string | null, client?: StorageClient): Promise<T>; | ||
closeStorage(storage: { | ||
@@ -31,3 +35,3 @@ id: string; | ||
protected _getStorageClientFactories(client: StorageClient, storageConstructorName: string): { | ||
createStorageClient: ((id: string) => import("@crawlee/types").DatasetClient<import("@crawlee/types").Dictionary<any>>) | ((id: string) => import("@crawlee/types").KeyValueStoreClient) | ((id: string, options?: import("@crawlee/types").RequestQueueOptions | undefined) => import("@crawlee/types").RequestQueueClient); | ||
createStorageClient: ((id: string) => import("@crawlee/types").DatasetClient<Dictionary<any>>) | ((id: string) => import("@crawlee/types").KeyValueStoreClient) | ((id: string, options?: import("@crawlee/types").RequestQueueOptions | undefined) => import("@crawlee/types").RequestQueueClient); | ||
createStorageCollectionClient: (() => import("@crawlee/types").DatasetCollectionClient) | (() => import("@crawlee/types").KeyValueStoreCollectionClient) | (() => import("@crawlee/types").RequestQueueCollectionClient); | ||
@@ -34,0 +38,0 @@ }; |
@@ -5,3 +5,2 @@ "use strict"; | ||
const consts_1 = require("@apify/consts"); | ||
const datastructures_1 = require("@apify/datastructures"); | ||
const configuration_1 = require("../configuration"); | ||
@@ -46,8 +45,28 @@ const DEFAULT_ID_ENV_VAR_NAMES = { | ||
writable: true, | ||
value: void 0 | ||
value: new Map() | ||
}); | ||
this.StorageConstructor = StorageConstructor; | ||
this.name = this.StorageConstructor.name; | ||
this.cache = new datastructures_1.LruCache({ maxLength: StorageManager.MAX_OPENED_STORAGES }); | ||
} | ||
static openStorage(storageClass, idOrName, client, config = configuration_1.Configuration.getGlobalConfig()) { | ||
return this.getManager(storageClass, config).openStorage(idOrName, client); | ||
} | ||
static getManager(storageClass, config = configuration_1.Configuration.getGlobalConfig()) { | ||
if (!this.storageManagers.has(storageClass)) { | ||
const manager = new StorageManager(storageClass, config); | ||
this.storageManagers.set(storageClass, manager); | ||
} | ||
return this.storageManagers.get(storageClass); | ||
} | ||
/** @internal */ | ||
static clearCache() { | ||
this.storageManagers.forEach((manager) => { | ||
if (manager.name === 'KeyValueStore') { | ||
manager.cache.forEach((item) => { | ||
item.clearCache?.(); | ||
}); | ||
} | ||
}); | ||
this.storageManagers.clear(); | ||
} | ||
async openStorage(idOrName, client) { | ||
@@ -77,6 +96,6 @@ if (!idOrName) { | ||
const idKey = storage.id; | ||
this.cache.remove(idKey); | ||
this.cache.delete(idKey); | ||
if (storage.name) { | ||
const nameKey = storage.name; | ||
this.cache.remove(nameKey); | ||
this.cache.delete(nameKey); | ||
} | ||
@@ -108,6 +127,6 @@ } | ||
const idKey = storage.id; | ||
this.cache.add(idKey, storage); | ||
this.cache.set(idKey, storage); | ||
if (storage.name) { | ||
const nameKey = storage.name; | ||
this.cache.add(nameKey, storage); | ||
this.cache.set(nameKey, storage); | ||
} | ||
@@ -117,8 +136,8 @@ } | ||
exports.StorageManager = StorageManager; | ||
Object.defineProperty(StorageManager, "MAX_OPENED_STORAGES", { | ||
Object.defineProperty(StorageManager, "storageManagers", { | ||
enumerable: true, | ||
configurable: true, | ||
writable: true, | ||
value: 1000 | ||
value: new Map() | ||
}); | ||
//# sourceMappingURL=storage_manager.js.map |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
726583
10600