airbnb-scrapper
Advanced tools
Comparing version 1.0.0 to 1.1.0
118
lib/index.js
@@ -16,3 +16,3 @@ 'use strict'; | ||
exports = module.exports = airbnbScrape; | ||
exports = module.exports = main; | ||
@@ -27,42 +27,30 @@ /** | ||
function airbnbScrape(program) { | ||
var a = new AirbnbScrapper(program || {}); | ||
function main(program) { | ||
program || (program = { args: [] }); | ||
if(program.args.length === 0) { | ||
program.outputHelp(); | ||
process.exit(1); | ||
return process.exit(1); | ||
} | ||
return a.scrape(program.args); | ||
return Promise | ||
.map(program.args, _.partial(exports.downloadPosting, program)) | ||
.map(function(html) { | ||
return cheerio.load(html); | ||
}) | ||
.map(exports.extractInfo) | ||
.then(_.partial(exports.outputInfo, program)); | ||
} | ||
// Main functions and methods: | ||
/** | ||
* Represents an airbnb.com scrapper. Realizes operations on an array of URLs in | ||
* order to scrape them and output parsed information. | ||
* A helper function to log messages, which is guarded by a guard for | ||
* `options.verbose` being truthy. | ||
* | ||
* @constructor | ||
* | ||
* @param {Object} options | ||
* @param {Boolean} [options.verbose] If true, will be verbose about operations | ||
* @param {Boolean} [options.csv] If true, will output information in CSV | ||
* @param {Boolean} [options.json] If true, will output information in JSON | ||
* @param {Object} [options.verbose] | ||
*/ | ||
function AirbnbScrapper(options) { | ||
this.verbose = options.verbose; | ||
this.csv = options.csv; | ||
this.json = options.json; | ||
} | ||
/** | ||
* Logs a set of messages, if the `airbnbScrapper.verbose` property is true. | ||
* | ||
* @param {Mixed} args... | ||
*/ | ||
AirbnbScrapper.prototype.log = function(/*args...*/) { | ||
if(this.verbose) { | ||
console.log.apply(console, arguments); | ||
exports.log = function log(options/*, args...*/) { | ||
if(options.verbose) { | ||
var args = Array.prototype.slice.call(arguments, 1); | ||
console.log.apply(console, args); | ||
} | ||
@@ -72,17 +60,2 @@ }; | ||
/** | ||
* Downloads an Array of URLs, extracts relevant information from their HTML | ||
* pages and outputs the result. | ||
* | ||
* @param {Array.<String>} postings An array of AirBNB posting URLs | ||
* @return {Promise} A promise to the result | ||
*/ | ||
AirbnbScrapper.prototype.scrape = function(postings) { | ||
return Promise | ||
.map(postings, this.downloadPosting.bind(this)) | ||
.map(this.extractInfo.bind(this)) | ||
.then(this.outputInfo.bind(this)); | ||
}; | ||
/** | ||
* Downloads a resource at some URL and returns a promise to it. | ||
@@ -95,4 +68,8 @@ * | ||
AirbnbScrapper.prototype.downloadPosting = function(url) { | ||
this.log('Downloading information for posting "' + url + '"...'); | ||
exports.downloadPosting = function downloadPosting(options, url) { | ||
exports.log( | ||
options, | ||
'Downloading information for posting "' + url + '"...' | ||
); | ||
return request | ||
@@ -109,3 +86,3 @@ .get(url) | ||
* | ||
* @param {String} html Some AirBNB posting's HTML | ||
* @param {Object} $ Some AirBNB posting's HTML loaded into cheerio | ||
* @return {Object} An object representation of the relevant information for | ||
@@ -115,9 +92,8 @@ * this HTML string | ||
AirbnbScrapper.prototype.extractInfo = function(html) { | ||
var $ = cheerio.load(html); | ||
exports.extractInfo = function extractInfo($) { | ||
return _.extend({ | ||
title: trim($('#listing_name').text()), | ||
price_per_night: this.getPrice($), | ||
host_profile_url: this.getHostUrl($), | ||
}, this.getDetails($)); | ||
price_per_night: exports.getPrice($), | ||
host_profile_url: exports.getHostUrl($), | ||
}, exports.getDetails($)); | ||
}; | ||
@@ -129,8 +105,11 @@ | ||
* @param {Array.<Object>} parsed_postings | ||
* @return {Mixed} A promise to the output's operation or undefined | ||
*/ | ||
AirbnbScrapper.prototype.outputInfo = function(parsed_postings) { | ||
if(this.csv) { | ||
return this.outputCsv(parsed_postings); | ||
} else if(this.json) { | ||
exports.outputInfo = function outputInfo(options, parsed_postings) { | ||
if(options.csv) { | ||
exports.log(options, 'Generating CSV...'); | ||
return exports.outputCsv(parsed_postings); | ||
} else if(options.json) { | ||
exports.log(options, 'Generating JSON...'); | ||
return console.log(JSON.stringify(parsed_postings, null, 2)); | ||
@@ -142,5 +121,10 @@ } | ||
// Helper functions and methods: | ||
AirbnbScrapper.prototype.outputCsv = function(parsed_postings) { | ||
this.log('Generating csv...'); | ||
/** | ||
* Outputs a set of postings as CSV | ||
* | ||
* @param {Array.<Object>} parsed_postings | ||
* @return {Promise} A promise to the output operation | ||
*/ | ||
exports.outputCsv = function outputCsv(parsed_postings) { | ||
var csvP = json2csvAsync({ | ||
@@ -156,3 +140,3 @@ data: parsed_postings, | ||
AirbnbScrapper.prototype.getPrice = function($) { | ||
exports.getPrice = function getPrice($) { | ||
var str = $('#dayly_price_string').text(); | ||
@@ -167,3 +151,3 @@ if(!str) { | ||
AirbnbScrapper.prototype.getHostUrl = function($) { | ||
exports.getHostUrl = function getHostUrl($) { | ||
var img = $('#host-profile a > img')[0]; | ||
@@ -173,3 +157,3 @@ return HOST + img.parent.attribs.href; | ||
AirbnbScrapper.prototype.getDetails = function($) { | ||
exports.getDetails = function getDetails($) { | ||
return $('#details-column .row > .col-9 > .row > .col-6 > div') | ||
@@ -192,7 +176,7 @@ .map(function(i, el) { | ||
function trim(str) { | ||
var trim = exports.trim = function trim(str) { | ||
return str.replace(/(^(\s|\n|\t)+)|((\s|\n|\t)+$)/g, ''); | ||
} | ||
}; | ||
function getFields(parsed_postings) { | ||
var getFields = exports.getFields = function getFields(parsed_postings) { | ||
var head = _.first(parsed_postings); | ||
@@ -208,2 +192,2 @@ var tail = _.rest(parsed_postings); | ||
}, head)); | ||
} | ||
}; |
{ | ||
"name": "airbnb-scrapper", | ||
"version": "1.0.0", | ||
"version": "1.1.0", | ||
"description": "A command-line tool for scrapping AirBNB postings.", | ||
@@ -5,0 +5,0 @@ "main": "lib/index.js", |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
8014
152