New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

website-scraper

Package Overview
Dependencies
Maintainers
1
Versions
60
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

website-scraper - npm Package Compare versions

Comparing version 0.1.0 to 0.1.1

4

index.js
var Scraper = require('./lib/load.js');
module.exports.scrape = function (data, callback) {
return new Scraper(data).scrape(callback);
module.exports.scrape = function (options, callback) {
return new Scraper(options).scrape(callback);
};

@@ -7,34 +7,34 @@ var config = {

selector: 'img',
attributeName: 'src'
attr: 'src'
},
{
selector: 'input',
attributeName: 'src'
attr: 'src'
},
{
selector: 'object',
attributeName: 'data'
attr: 'data'
},
{
selector: 'embed',
attributeName: 'src'
attr: 'src'
},
{
selector: 'param[name="movie"]',
attributeName: 'value'
attr: 'value'
},
{
selector: 'script',
attributeName: 'src'
attr: 'src'
},
{
selector: 'link[rel="stylesheet"]',
attributeName: 'href'
attr: 'href'
},
{
selector: 'link[rel*="icon"]',
attributeName: 'href'
attr: 'href'
},
],
staticDirectories: [
directories: [
{

@@ -41,0 +41,0 @@ directory: 'images',

@@ -9,9 +9,8 @@ var Promise = require('bluebird'),

Logger = require('./log.js'),
config = require('./defaults.js');
defaults = require('./defaults.js');
var encoding = 'binary';
/** @constructor */
var Loader = function (data) {
var options = {},
var options = _.clone(data),
encoding = 'binary',
loadedFiles = {},

@@ -21,12 +20,6 @@ staticFullPaths,

options.url = data.url;
options.path = data.path;
options.indexFile = _.has(data, 'indexFile') ? data.indexFile : config.indexFile;
options.srcToLoad = _.has(data, 'srcToLoad') ? data.srcToLoad : config.srcToLoad;
options.staticDirectories = _.has(data, 'staticDirectories') ? data.staticDirectories : config.staticDirectories;
options.log = _.has(data, 'log') ? data.log : config.log;
_.each(_.keys(defaults), function (key) {
options[key] = _.has(options, key) ? options[key] : defaults[key];
});
staticFullPaths = _.map(options.staticDirectories, function (dir) {
return path.resolve(options.path, dir.directory)
});
logger = new Logger(options.log);

@@ -100,3 +93,3 @@

function getDirectoryByExtension(ext) {
var dirObj = _.chain(options.staticDirectories)
var dirObj = _.chain(options.directories)
.filter(function (dir) {

@@ -251,5 +244,4 @@ return _.indexOf(dir.extensions, ext) >= 0

sourcesRegexps = [
/(url[\s]*\([\s'"]*)(.+?)([\s'"]*\))/gi,
/(@import[\s]*['"]?[\s]*)(.+?)([\s]*['"]?;)/gi,
/(@import[\s]*url[\s]*\([\s'"]*)(.+?)([\s'"]*\))/gi
/((?:@import[\s]*)?url[\s]*\([\s'"]*)(.+?)([\s'"]*\))/gi
],

@@ -295,5 +287,10 @@ urlPromises = [];

})
.then(function (html) { // Load css sources in index page
.then(function (html) {
fs.ensureDirSync(options.path);
setLoadedFilename(options.url, options.path);
staticFullPaths = _.map(options.directories, function (dir) {
return path.resolve(options.path, dir.directory)
});
return loadCssSources(html, options.url);

@@ -310,3 +307,3 @@ })

p = p.then(function (newHtml) {
return loadSources(newHtml, src.selector, src.attributeName)
return loadSources(newHtml, src.selector, src.attr)
});

@@ -318,7 +315,5 @@ });

p = p.then(function (html) {
return fs.outputFileAsync(indexFilePath, html, {encoding: encoding})
})
.then(function () {
return {status: 'success'}
});
fs.outputFileSync(indexFilePath, html, {encoding: encoding});
return {html: html}
});

@@ -328,15 +323,27 @@ return p;

function errorCleanup() {
return fs.removeAsync(options.path);
}
function noop() {}
return {
scrape: function (callback) {
callback = typeof callback === 'function' ? callback : noop;
if (!options.path) {
return callback(new Error('Path is not defined'));
}
if (fs.existsSync(options.path)) {
return callback(new Error('Path ' + options.path + ' exists!'), null);
return callback(new Error('Path ' + options.path + ' exists'));
}
process()
.then(function (res) {
return callback(null, res)
})
.catch(function (e) {
return callback(e, null)
})
.then(function (res, e) {
if (e) {
errorCleanup();
res = null;
}
return callback(e, res);
});
}

@@ -343,0 +350,0 @@ }

{
"name": "website-scraper",
"version": "0.1.0",
"version": "0.1.1",
"description": "full web-page's scraping including all css, images, js, etc.",

@@ -35,4 +35,5 @@ "main": "index.js",

"cheerio": "0.11.0",
"request": "^2.42.0",
"underscore": "^1.7.0"
}
}

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc