@crawlee/utils
Advanced tools
Comparing version 3.0.0-beta.60 to 3.0.0-beta.61
@@ -40,13 +40,4 @@ "use strict"; | ||
return ''; | ||
// TODO: Add support for "html" being a Cheerio element, otherwise the only way | ||
// to use it is e.g. htmlToText($('p').html())) which is inefficient | ||
// Also, it seems this doesn't work well in CheerioScraper, e.g. htmlToText($) | ||
// produces really text with a lot of HTML elements in it. Let's just deprecate this sort of usage, | ||
// and make the parameter "htmlOrCheerioElement" | ||
const $ = typeof htmlOrCheerioElement === 'function' ? htmlOrCheerioElement : cheerio_1.default.load(htmlOrCheerioElement, { decodeEntities: true }); | ||
let text = ''; | ||
// TODO: the type for elems is very annoying to work with. | ||
// The correct type is Node[] from cheerio but it needs a lot more casting in each branch, or alternatively, | ||
// use the is* methods from domhandler (isText, isTag, isComment, etc.) | ||
// @ts-expect-error | ||
const process = (elems) => { | ||
@@ -65,3 +56,3 @@ const len = elems ? elems.length : 0; | ||
if (compr.startsWith(' ') && /(^|\s)$/.test(text)) | ||
compr = compr.substr(1); | ||
compr = compr.substring(1); | ||
text += compr; | ||
@@ -68,0 +59,0 @@ } |
@@ -44,13 +44,8 @@ "use strict"; | ||
mainProcessBytes = process.memoryUsage().rss; | ||
// TODO this is quite ugly, let's introduce some local variables | ||
// https://stackoverflow.com/a/55914335/129415 | ||
childProcessesBytes = +(0, node_child_process_1.execSync)('cat /proc/meminfo') | ||
.toString() | ||
.split(/[\n: ]/) | ||
.filter((val) => val.trim())[19] | ||
* 1000 // meminfo reports in kb, not bytes | ||
// the total used memory is reported by meminfo | ||
// subtract memory used by the main node process | ||
// in order to infer memory used by any child processes | ||
- mainProcessBytes; | ||
const memInfo = (0, node_child_process_1.execSync)('cat /proc/meminfo').toString(); | ||
const values = memInfo.split(/[\n: ]/).filter((val) => val.trim()); | ||
// /proc/meminfo reports in kb, not bytes, the total used memory is reported by meminfo | ||
// subtract memory used by the main node process in order to infer memory used by any child processes | ||
childProcessesBytes = +values[19] * 1000 - mainProcessBytes; | ||
} | ||
@@ -57,0 +52,0 @@ else { |
{ | ||
"name": "@crawlee/utils", | ||
"version": "3.0.0-beta.60", | ||
"version": "3.0.0-beta.61", | ||
"description": "A set of shared utilities that can be used by crawlers", | ||
@@ -52,3 +52,3 @@ "engines": { | ||
"@apify/ps-tree": "^1.1.4", | ||
"@crawlee/types": "^3.0.0-beta.60", | ||
"@crawlee/types": "^3.0.0-beta.61", | ||
"cheerio": "1.0.0-rc.11", | ||
@@ -55,0 +55,0 @@ "content-type": "^1.0.4", |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
225712
1845