New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

website-scraper

Package Overview
Dependencies
Maintainers
1
Versions
60
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

website-scraper - npm Package Compare versions

Comparing version 5.1.0 to 5.2.0

1

lib/config/defaults.js

@@ -52,2 +52,3 @@ import fs from 'fs';

responseType: 'buffer',
//cookieJar: true,
decompress: true,

@@ -54,0 +55,0 @@ headers: {

3

lib/plugins/save-resource-to-fs-plugin.js

@@ -23,4 +23,3 @@ import path from 'path';

const text = resource.getText();
const encoding = typeof text === 'string' ? 'utf-8' : 'binary';
await fs.outputFile(filename, text, { encoding });
await fs.outputFile(filename, text, { encoding: resource.getEncoding() });
loadedResources.push(resource);

@@ -27,0 +26,0 @@ });

import got from 'got';
import logger from './logger.js';
import types from './config/resource-types.js';
import { extend, isPlainObject, getTypeByMime } from './utils/index.js';
import { extend } from './utils/index.js';
const TEXT_RESOURCE_TYPES = [types.html, types.css];
function getMimeType (contentType) {

@@ -12,26 +9,69 @@ return contentType ? contentType.split(';')[0] : null;

function defaultResponseHandler ({response, type}) {
if (TEXT_RESOURCE_TYPES.includes(type)) {
return response.body.toString();
function defaultResponseHandler ({response}) {
return Promise.resolve(response);
}
function extractEncodingFromHeader (headers) {
const contentTypeHeader = headers['content-type'];
return contentTypeHeader && contentTypeHeader.includes('utf-8') ? 'utf8' : 'binary';
}
function getEncoding (response) {
if (response && typeof response === 'object') {
if (response.headers && typeof response.headers === 'object') {
return extractEncodingFromHeader(response.headers);
} else if (response.encoding) {
return response.encoding;
}
}
return response.body;
return 'binary';
}
function throwTypeError (result) {
let type = typeof result;
if (result instanceof Error) {
throw result;
} else if (type === 'object' && Array.isArray(result)) {
type = 'array';
}
throw new Error(`Wrong response handler result. Expected string or object, but received ${type}`);
}
function getData (result) {
let data = result;
if (result && typeof result === 'object' && 'body' in result) {
data = result.body;
}
return data;
}
function transformResult (result) {
switch (true) {
case typeof result === 'string' || Buffer.isBuffer(result):
return {
body: result,
metadata: null
};
case isPlainObject(result):
return {
body: result.body,
metadata: result.metadata || null
};
case result === null:
return null;
default:
throw new Error('Wrong response handler result. Expected string or object, but received ' + typeof result);
const encoding = getEncoding(result);
const data = getData(result);
// Check for no data
if (data === null || data === undefined) {
return null;
}
// Then stringify it.
let body = null;
if (data instanceof Buffer) {
body = data.toString(encoding);
} else if (typeof data === 'string') {
body = data;
} else {
throwTypeError(result);
}
return {
body,
encoding,
metadata: result.metadata || data.metadata || null
};
}

@@ -51,8 +91,4 @@

logger.debug(`[request] received response for ${response.url}, statusCode ${response.statusCode}`);
const responseHandlerResult = transformResult(await afterResponse({response}));
const mimeType = getMimeType(response.headers['content-type']);
const resourceType = getTypeByMime(mimeType);
const responseHandlerResult = transformResult(await afterResponse({ response, type: resourceType }));
if (!responseHandlerResult) {

@@ -63,6 +99,6 @@ return null;

url: response.url,
type: resourceType,
mimeType,
mimeType: getMimeType(response.headers['content-type']),
body: responseHandlerResult.body,
metadata: responseHandlerResult.metadata
metadata: responseHandlerResult.metadata,
encoding: responseHandlerResult.encoding
};

@@ -72,3 +108,5 @@ }

export default {
get: getRequest
get: getRequest,
getEncoding,
transformResult
};

@@ -15,2 +15,3 @@ import types from './config/resource-types.js';

this.saved = false;
this.encoding = 'binary';
}

@@ -73,2 +74,10 @@

setEncoding (encoding) {
this.encoding = encoding;
}
getEncoding () {
return this.encoding;
}
isHtml () {

@@ -75,0 +84,0 @@ return this.getType() === types.html;

@@ -16,3 +16,3 @@ import PromiseQueue from 'p-queue';

import * as utils from './utils/index.js';
const { extend, union, urlsEqual, getTypeByFilename, series } = utils;
const { extend, union, urlsEqual, getTypeByMime, getTypeByFilename, series } = utils;
import NormalizedUrlMap from './utils/normalized-url-map.js';

@@ -174,3 +174,4 @@

resource.setType(responseData.type);
resource.setEncoding(responseData.encoding);
resource.setType(getTypeByMime(responseData.mimeType));

@@ -177,0 +178,0 @@ const { filename } = await self.runActions('generateFilename', { resource, responseData });

{
"name": "website-scraper",
"version": "5.1.0",
"version": "5.2.0",
"description": "Download website to a local directory (including all css, images, js, etc.)",

@@ -5,0 +5,0 @@ "readmeFilename": "README.md",

@@ -329,4 +329,8 @@ [![Version](https://img.shields.io/npm/v/website-scraper.svg?style=flat)](https://www.npmjs.org/package/website-scraper)

Promise should be resolved with:
* `string` which contains response body
* or object with properties `body` (response body, string) and `metadata` - everything you want to save for this resource (like headers, original text, timestamps, etc.), scraper will not use this field at all, it is only for result.
* the `response` object with the `body` modified in place as necessary.
* or object with properties
* `body` (response body, string)
* `encoding` (`binary` or `utf8`) used to save the file, binary used by default.
* `metadata` (object) - everything you want to save for this resource (like headers, original text, timestamps, etc.), scraper will not use this field at all, it is only for result.
* a binary `string`. This is advised against because of the binary assumption being made can foul up saving of `utf8` responses to the filesystem.

@@ -346,3 +350,4 @@ If multiple actions `afterResponse` added - scraper will use result from last one.

someOtherData: [ 1, 2, 3 ]
}
},
encoding: 'utf8'
}

@@ -349,0 +354,0 @@ }

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc