html-scrape
Advanced tools
Comparing version 0.0.1 to 0.1.0
'use strict'; | ||
var _scrape = require('./utils/scrape.js'); | ||
var _lodash = require('lodash'); | ||
module.exports = _scrape.scrape; | ||
var _lodash2 = _interopRequireDefault(_lodash); | ||
var _validate = require('./utils/validate'); | ||
var _validate2 = _interopRequireDefault(_validate); | ||
var _url = require('./utils/url'); | ||
var _url2 = _interopRequireDefault(_url); | ||
var _elements = require('./utils/elements'); | ||
var _elements2 = _interopRequireDefault(_elements); | ||
var _regex = require('./utils/regex'); | ||
var _regex2 = _interopRequireDefault(_regex); | ||
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } | ||
var html = null; | ||
var scrape = function scrape(host, elems, callback) { | ||
(0, _validate2.default)(elems, function (error) { | ||
if (error) { | ||
callback(error); | ||
} | ||
(0, _url2.default)(host, function (error, html) { | ||
if (error) { | ||
callback(error); | ||
} | ||
html = html; | ||
var count = 0; | ||
var numOfElements = _lodash2.default.size(elems); | ||
var data = {}; | ||
_lodash2.default.forEach(elems, function (element, key) { | ||
count++; | ||
if (element.start && element.end) { | ||
// Send it off to the regex utility: | ||
(0, _regex2.default)(html, element, function (error, result) { | ||
data[key] = result; | ||
}); | ||
} else if (element.el) { | ||
(0, _elements2.default)(html, element, function (error, result) { | ||
if (error) { | ||
data[key] = null; | ||
} | ||
data[key] = result; | ||
}); | ||
} else { | ||
callback('Please check your configuration.'); | ||
} | ||
if (count === numOfElements) { | ||
callback(false, data); | ||
} | ||
}); | ||
}); | ||
}); | ||
}; | ||
module.exports = scrape; |
{ | ||
"name": "html-scrape", | ||
"description": "A lightweight tool that scrapes webpages with ease.", | ||
"version": "0.0.1", | ||
"version": "0.1.0", | ||
"main": "./lib", | ||
@@ -34,2 +34,4 @@ "scripts": { | ||
"dependencies": { | ||
"jquery": "^2.2.1", | ||
"jsdom": "^8.0.4", | ||
"lodash": "^4.5.1", | ||
@@ -36,0 +38,0 @@ "needler": "0.0.7", |
@@ -16,6 +16,6 @@ # HTML Scrape | ||
title: { start: '<title>', end: '</title>' }, | ||
meta: { start: '<meta content="', end: '"'} | ||
explicit: { el: '#explicit > a' } | ||
} | ||
scrape ('http://www.google.com', elements, (error, data) => { | ||
scrape ('https://npmjs.com', elements, (error, data) => { | ||
if (error) { | ||
@@ -30,6 +30,3 @@ console.log(error); | ||
{ | ||
title: 'Google', | ||
meta: 'Search the world\'s information, including webpages, images, videos and more. Google has many special features to help you find exactly what you\'re looking for.' | ||
} | ||
{ title: 'npm', explicit: 'packages people \'npm install\' a lot' } | ||
@@ -41,6 +38,8 @@ ## options | ||
> > **start** [string | required] - String before the value you are searching for. For instance - `<title>value</title>`: In this example, the header would be `<title>` | ||
> > **start** String before the value you are searching for. For instance - `<title>value</title>`: In this example, the header would be `<title>`. **If using the needle method, both start and end are required** | ||
> > **end** (find, replace) [string | required] - String after the value you are searching for. For instance - `<title>value</title>`: In this example, the header would be `</title>` | ||
> > **end** String after the value you are searching for. For instance - `<title>value</title>`: In this example, the header would be `</title>`. **If using the needle method, both start and end are required** | ||
> > **el** [string] Element id/class to get value of. Can also be declared like: `#el > a` to get value of the link. | ||
> **callback** [function] - Function that returns data after scraping is finished. | ||
@@ -47,0 +46,0 @@ |
6354
7
120
5
47
+ Addedjquery@^2.2.1
+ Addedjsdom@^8.0.4
+ Addedabab@1.0.4(transitive)
+ Addedacorn@2.7.0(transitive)
+ Addedacorn-globals@1.0.9(transitive)
+ Addedarray-equal@1.0.2(transitive)
+ Addedcssom@0.3.8(transitive)
+ Addedcssstyle@0.2.37(transitive)
+ Addeddeep-is@0.1.4(transitive)
+ Addedescodegen@1.14.3(transitive)
+ Addedesprima@4.0.1(transitive)
+ Addedestraverse@4.3.0(transitive)
+ Addedesutils@2.0.3(transitive)
+ Addedfast-levenshtein@2.0.6(transitive)
+ Addediconv-lite@0.4.24(transitive)
+ Addedjquery@2.2.4(transitive)
+ Addedjsdom@8.5.0(transitive)
+ Addedlevn@0.3.0(transitive)
+ Addednwmatcher@1.4.4(transitive)
+ Addedoptionator@0.8.3(transitive)
+ Addedparse5@1.5.1(transitive)
+ Addedprelude-ls@1.1.2(transitive)
+ Addedsax@1.4.1(transitive)
+ Addedsource-map@0.6.1(transitive)
+ Addedsymbol-tree@3.2.4(transitive)
+ Addedtr46@0.0.3(transitive)
+ Addedtype-check@0.3.2(transitive)
+ Addedwebidl-conversions@3.0.1(transitive)
+ Addedwhatwg-url@2.0.1(transitive)
+ Addedword-wrap@1.2.5(transitive)
+ Addedxml-name-validator@2.0.1(transitive)