Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@crawlee/core

Package Overview
Dependencies
Maintainers
1
Versions
1229
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@crawlee/core - npm Package Compare versions

Comparing version 3.0.0-beta.33 to 3.0.0-beta.34

1

events/event_manager.d.ts

@@ -37,2 +37,3 @@ /// <reference types="node" />

emit(event: EventTypeName, ...args: unknown[]): void;
isInitialized(): boolean;
/**

@@ -39,0 +40,0 @@ * @internal

@@ -90,2 +90,5 @@ "use strict";

}
isInitialized() {
return this.initialized;
}
/**

@@ -92,0 +95,0 @@ * @internal

@@ -15,2 +15,3 @@ export * from './autoscaling';

export * from './validators';
export { Dictionary, Awaitable, Constructor } from '@crawlee/types';
//# sourceMappingURL=index.d.ts.map

10

package.json
{
"name": "@crawlee/core",
"version": "3.0.0-beta.33",
"version": "3.0.0-beta.34",
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",

@@ -58,6 +58,6 @@ "engines": {

"@apify/utilities": "^1.2.12",
"@crawlee/browser-pool": "^3.0.0-beta.33",
"@crawlee/memory-storage": "^3.0.0-beta.33",
"@crawlee/types": "^3.0.0-beta.33",
"@crawlee/utils": "^3.0.0-beta.33",
"@crawlee/browser-pool": "^3.0.0-beta.34",
"@crawlee/memory-storage": "^3.0.0-beta.34",
"@crawlee/types": "^3.0.0-beta.34",
"@crawlee/utils": "^3.0.0-beta.34",
"@types/tough-cookie": "^4.0.2",

@@ -64,0 +64,0 @@ "iconv-lite": "^0.6.3",

import type { DatasetClient, DatasetInfo, PaginatedList, StorageClient, Dictionary } from '@crawlee/types';
import { Configuration } from '../configuration';
import { Awaitable } from '../typedefs';

@@ -133,2 +134,3 @@ import { StorageManagerOptions } from './storage_manager';

export declare class Dataset<Data extends Dictionary = Dictionary> {
readonly config: Configuration;
id: string;

@@ -141,3 +143,3 @@ name?: string;

*/
constructor(options: DatasetOptions);
constructor(options: DatasetOptions, config?: Configuration);
/**

@@ -144,0 +146,0 @@ * Stores an object or an array of objects to the dataset.

@@ -133,3 +133,9 @@ "use strict";

*/
constructor(options) {
constructor(options, config = configuration_1.Configuration.getGlobalConfig()) {
Object.defineProperty(this, "config", {
enumerable: true,
configurable: true,
writable: true,
value: config
});
Object.defineProperty(this, "id", {

@@ -341,3 +347,3 @@ enumerable: true,

await this.client.delete();
const manager = new storage_manager_1.StorageManager(Dataset);
const manager = storage_manager_1.StorageManager.getManager(Dataset, this.config);
manager.closeStorage(this);

@@ -364,4 +370,5 @@ }

}));
const manager = new storage_manager_1.StorageManager(Dataset, options.config);
return manager.openStorage(datasetIdOrName);
options.config ?? (options.config = configuration_1.Configuration.getGlobalConfig());
const manager = storage_manager_1.StorageManager.getManager(Dataset, options.config);
return manager.openStorage(datasetIdOrName, options.config.getStorageClient());
}

@@ -368,0 +375,0 @@ /**

@@ -83,3 +83,6 @@ /// <reference types="node" />

readonly name?: string;
private client;
private readonly client;
private persistStateEventStarted;
/** Cache for persistent (auto-saved) values. When we try to set such value, the cache will be updated automatically. */
private readonly cache;
/**

@@ -114,2 +117,4 @@ * @internal

* of the following characters: `a`-`z`, `A`-`Z`, `0`-`9` and `!-_.'()`
* @param defaultValue
* Fallback that will be returned if no value if present in the storage.
* @returns

@@ -120,3 +125,5 @@ * Returns a promise that resolves to an object, string

*/
getValue<T = unknown>(key: string): Promise<T | null>;
getValue<T = unknown>(key: string, defaultValue?: T): Promise<T | null>;
getAutoSavedValue<T extends Dictionary = Dictionary>(key: string, defaultValue?: T): Promise<T>;
private ensurePersistStateEvent;
/**

@@ -176,2 +183,4 @@ * Saves or deletes a record in the key-value store.

getPublicUrl(key: string): string;
/** @internal */
clearCache(): void;
/**

@@ -236,2 +245,3 @@ * Iterates over key-value store keys, yielding each in turn to an `iteratee` function.

* @param key Unique record key.
* @param defaultValue Fallback that will be returned if no value if present in the storage.
* @returns

@@ -244,3 +254,4 @@ * Returns a promise that resolves to an object, string

*/
static getValue<T = unknown>(key: string): Promise<T | null>;
static getValue<T = unknown>(key: string, defaultValue?: T): Promise<T | null>;
static getAutoSavedValue<T extends Dictionary = Dictionary>(key: string, defaultValue?: T): Promise<T>;
/**

@@ -247,0 +258,0 @@ * Stores or deletes a value in the default {@link KeyValueStore} associated with the current crawler run.

@@ -133,2 +133,15 @@ "use strict";

});
Object.defineProperty(this, "persistStateEventStarted", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
/** Cache for persistent (auto-saved) values. When we try to set such value, the cache will be updated automatically. */
Object.defineProperty(this, "cache", {
enumerable: true,
configurable: true,
writable: true,
value: new Map()
});
this.id = options.id;

@@ -163,2 +176,4 @@ this.name = options.name;

* of the following characters: `a`-`z`, `A`-`Z`, `0`-`9` and `!-_.'()`
* @param defaultValue
* Fallback that will be returned if no value if present in the storage.
* @returns

@@ -169,7 +184,27 @@ * Returns a promise that resolves to an object, string

*/
async getValue(key) {
async getValue(key, defaultValue) {
(0, ow_1.default)(key, ow_1.default.string.nonEmpty);
const record = await this.client.getRecord(key);
return record?.value ?? null;
return record?.value ?? defaultValue ?? null;
}
async getAutoSavedValue(key, defaultValue = {}) {
if (this.cache.has(key)) {
return this.cache.get(key);
}
const value = await this.getValue(key, defaultValue);
this.cache.set(key, value);
this.ensurePersistStateEvent();
return value;
}
ensurePersistStateEvent() {
if (this.persistStateEventStarted) {
return;
}
this.config.getEventManager().on('persistState', async () => {
for (const [key, value] of this.cache) {
await this.setValue(key, value);
}
});
this.persistStateEventStarted = true;
}
/**

@@ -233,2 +268,18 @@ * Saves or deletes a record in the key-value store.

const optionsCopy = { ...options };
// If we try to set the value of a cached state to a different reference, we need to update the cache accordingly.
const cachedValue = this.cache.get(key);
if (this.cache.has(key) && cachedValue !== value) {
if (value === null) {
// Cached state can be only object, so a propagation of `null` means removing all its properties.
Object.keys(cachedValue).forEach((k) => this.cache.delete(k));
}
else if (typeof value === 'object') {
// We need to remove the keys that are no longer present in the new value.
Object.keys(cachedValue)
.filter((k) => !(k in value))
.forEach((k) => this.cache.delete(k));
// And update the existing ones + add new ones.
Object.assign(cachedValue, value);
}
}
// In this case delete the record.

@@ -250,3 +301,3 @@ if (value === null)

await this.client.delete();
const manager = new storage_manager_1.StorageManager(KeyValueStore, this.config);
const manager = storage_manager_1.StorageManager.getManager(KeyValueStore, this.config);
manager.closeStorage(this);

@@ -262,2 +313,6 @@ }

}
/** @internal */
clearCache() {
this.cache.clear();
}
/**

@@ -317,7 +372,7 @@ * Iterates over key-value store keys, yielding each in turn to an `iteratee` function.

static async open(storeIdOrName, options = {}) {
(0, ow_1.default)(storeIdOrName, ow_1.default.optional.string);
(0, ow_1.default)(storeIdOrName, ow_1.default.optional.any(ow_1.default.string, ow_1.default.null));
(0, ow_1.default)(options, ow_1.default.object.exactShape({
config: ow_1.default.optional.object.instanceOf(configuration_1.Configuration),
}));
const manager = new storage_manager_1.StorageManager(KeyValueStore, options.config);
const manager = storage_manager_1.StorageManager.getManager(KeyValueStore, options.config);
return manager.openStorage(storeIdOrName);

@@ -346,2 +401,3 @@ }

* @param key Unique record key.
* @param defaultValue Fallback that will be returned if no value if present in the storage.
* @returns

@@ -354,6 +410,10 @@ * Returns a promise that resolves to an object, string

*/
static async getValue(key) {
static async getValue(key, defaultValue) {
const store = await this.open();
return store.getValue(key);
return store.getValue(key, defaultValue);
}
static async getAutoSavedValue(key, defaultValue = {}) {
const store = await this.open();
return store.getAutoSavedValue(key, defaultValue);
}
/**

@@ -360,0 +420,0 @@ * Stores or deletes a value in the default {@link KeyValueStore} associated with the current crawler run.

@@ -704,3 +704,3 @@ "use strict";

static async open(listName, sources, options = {}) {
(0, ow_1.default)(listName, ow_1.default.any(ow_1.default.string, ow_1.default.null));
(0, ow_1.default)(listName, ow_1.default.optional.any(ow_1.default.string, ow_1.default.null));
(0, ow_1.default)(sources, ow_1.default.array);

@@ -707,0 +707,0 @@ (0, ow_1.default)(options, ow_1.default.object.is((v) => !Array.isArray(v)));

import { LruCache } from '@apify/datastructures';
import { BatchAddRequestsResult, Dictionary, RequestQueueClient, RequestQueueInfo, StorageClient } from '@crawlee/types';
import { StorageManagerOptions } from './storage_manager';
import { Request, RequestOptions } from '../request';
import { Configuration } from '../configuration';
/**

@@ -109,2 +111,3 @@ * When requesting queue head we always fetch requestsInProgressCount * QUERY_HEAD_BUFFER number of requests.

export declare class RequestQueue {
readonly config: Configuration;
log: import("@apify/log/log").Log;

@@ -142,3 +145,3 @@ id: string;

*/
constructor(options: RequestQueueOptions);
constructor(options: RequestQueueOptions, config?: Configuration);
/**

@@ -306,3 +309,3 @@ * @ignore

*/
static open(queueIdOrName?: string | null): Promise<RequestQueue>;
static open(queueIdOrName?: string | null, options?: StorageManagerOptions): Promise<RequestQueue>;
}

@@ -309,0 +312,0 @@ export interface RequestQueueOptions {

@@ -14,2 +14,3 @@ "use strict";

const request_1 = require("../request");
const configuration_1 = require("../configuration");
const MAX_CACHED_REQUESTS = 1000000;

@@ -114,3 +115,9 @@ /**

*/
constructor(options) {
constructor(options, config = configuration_1.Configuration.getGlobalConfig()) {
Object.defineProperty(this, "config", {
enumerable: true,
configurable: true,
writable: true,
value: config
});
Object.defineProperty(this, "log", {

@@ -665,3 +672,3 @@ enumerable: true,

await this.client.delete();
const manager = new storage_manager_1.StorageManager(RequestQueue);
const manager = storage_manager_1.StorageManager.getManager(RequestQueue, this.config);
manager.closeStorage(this);

@@ -726,5 +733,8 @@ }

*/
static async open(queueIdOrName) {
static async open(queueIdOrName, options = {}) {
(0, ow_1.default)(queueIdOrName, ow_1.default.optional.string);
const manager = new storage_manager_1.StorageManager(RequestQueue);
(0, ow_1.default)(options, ow_1.default.object.exactShape({
config: ow_1.default.optional.object.instanceOf(configuration_1.Configuration),
}));
const manager = storage_manager_1.StorageManager.getManager(RequestQueue, options.config);
return manager.openStorage(queueIdOrName);

@@ -731,0 +741,0 @@ }

@@ -1,2 +0,2 @@

import { StorageClient } from '@crawlee/types';
import { Dictionary, StorageClient } from '@crawlee/types';
import { Configuration } from '../configuration';

@@ -14,3 +14,3 @@ import { Constructor } from '../typedefs';

private readonly config;
private static readonly MAX_OPENED_STORAGES;
private static readonly storageManagers;
private readonly name;

@@ -20,3 +20,7 @@ private readonly StorageConstructor;

constructor(StorageConstructor: Constructor<T>, config?: Configuration);
openStorage(idOrName?: string, client?: StorageClient): Promise<T>;
static openStorage<T extends IStorage>(storageClass: Constructor<T>, idOrName?: string, client?: StorageClient, config?: Configuration): Promise<T>;
static getManager<T extends IStorage>(storageClass: Constructor<T>, config?: Configuration): StorageManager<T>;
/** @internal */
static clearCache(): void;
openStorage(idOrName?: string | null, client?: StorageClient): Promise<T>;
closeStorage(storage: {

@@ -31,3 +35,3 @@ id: string;

protected _getStorageClientFactories(client: StorageClient, storageConstructorName: string): {
createStorageClient: ((id: string) => import("@crawlee/types").DatasetClient<import("@crawlee/types").Dictionary<any>>) | ((id: string) => import("@crawlee/types").KeyValueStoreClient) | ((id: string, options?: import("@crawlee/types").RequestQueueOptions | undefined) => import("@crawlee/types").RequestQueueClient);
createStorageClient: ((id: string) => import("@crawlee/types").DatasetClient<Dictionary<any>>) | ((id: string) => import("@crawlee/types").KeyValueStoreClient) | ((id: string, options?: import("@crawlee/types").RequestQueueOptions | undefined) => import("@crawlee/types").RequestQueueClient);
createStorageCollectionClient: (() => import("@crawlee/types").DatasetCollectionClient) | (() => import("@crawlee/types").KeyValueStoreCollectionClient) | (() => import("@crawlee/types").RequestQueueCollectionClient);

@@ -34,0 +38,0 @@ };

@@ -5,3 +5,2 @@ "use strict";

const consts_1 = require("@apify/consts");
const datastructures_1 = require("@apify/datastructures");
const configuration_1 = require("../configuration");

@@ -46,8 +45,28 @@ const DEFAULT_ID_ENV_VAR_NAMES = {

writable: true,
value: void 0
value: new Map()
});
this.StorageConstructor = StorageConstructor;
this.name = this.StorageConstructor.name;
this.cache = new datastructures_1.LruCache({ maxLength: StorageManager.MAX_OPENED_STORAGES });
}
static openStorage(storageClass, idOrName, client, config = configuration_1.Configuration.getGlobalConfig()) {
return this.getManager(storageClass, config).openStorage(idOrName, client);
}
static getManager(storageClass, config = configuration_1.Configuration.getGlobalConfig()) {
if (!this.storageManagers.has(storageClass)) {
const manager = new StorageManager(storageClass, config);
this.storageManagers.set(storageClass, manager);
}
return this.storageManagers.get(storageClass);
}
/** @internal */
static clearCache() {
this.storageManagers.forEach((manager) => {
if (manager.name === 'KeyValueStore') {
manager.cache.forEach((item) => {
item.clearCache?.();
});
}
});
this.storageManagers.clear();
}
async openStorage(idOrName, client) {

@@ -77,6 +96,6 @@ if (!idOrName) {

const idKey = storage.id;
this.cache.remove(idKey);
this.cache.delete(idKey);
if (storage.name) {
const nameKey = storage.name;
this.cache.remove(nameKey);
this.cache.delete(nameKey);
}

@@ -108,6 +127,6 @@ }

const idKey = storage.id;
this.cache.add(idKey, storage);
this.cache.set(idKey, storage);
if (storage.name) {
const nameKey = storage.name;
this.cache.add(nameKey, storage);
this.cache.set(nameKey, storage);
}

@@ -117,8 +136,8 @@ }

exports.StorageManager = StorageManager;
Object.defineProperty(StorageManager, "MAX_OPENED_STORAGES", {
Object.defineProperty(StorageManager, "storageManagers", {
enumerable: true,
configurable: true,
writable: true,
value: 1000
value: new Map()
});
//# sourceMappingURL=storage_manager.js.map

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc