amazon-buddy
Advanced tools
Comparing version 1.2.2 to 1.3.0
@@ -16,3 +16,3 @@ #!/usr/bin/env node | ||
await AmazonScraper(argv)._searchProduct() | ||
await AmazonScraper(argv)._startScraper() | ||
} catch(error){ | ||
@@ -25,5 +25,6 @@ console.log(error); | ||
.usage('Usage: $0 <command> [options]') | ||
.example(`$0 search -k 'Xbox one'`) | ||
.example(`$0 products -k 'Xbox one'`) | ||
.example(`$0 reviews -a B01GW3H3U8`) | ||
.command( | ||
"search", | ||
"products", | ||
"scrape for a products from the provided key word", | ||
@@ -35,2 +36,10 @@ {}, | ||
) | ||
.command( | ||
"reviews", | ||
"scrape reviews from a product, by providing ASIN", | ||
{}, | ||
(argv) => { | ||
startScraper(argv); | ||
} | ||
) | ||
.options({ | ||
@@ -47,17 +56,27 @@ 'help': { | ||
}, | ||
'asin': { | ||
alias: 'a', | ||
default: '', | ||
type: 'string', | ||
describe: "To scrape reviews you need to provide product ASIN(amazon product id)" | ||
}, | ||
'number':{ | ||
alias: 'n', | ||
default: 20, | ||
default: 10, | ||
type: 'integer', | ||
describe: 'Number of products to scrape. Maximum 100' | ||
describe: 'Number of products to scrape. Maximum 100 products or 300 reviews' | ||
}, | ||
'save':{ | ||
alias: 's', | ||
default: false, | ||
default: true, | ||
type: 'boolean', | ||
describe: 'Save to a CSV file?' | ||
}, | ||
'sort':{ | ||
default: false, | ||
type: 'boolean', | ||
describe: 'If searching for a products then list will be sorted by a higher score(number of reviews*rating). If searching for a reviews then they will be sorted by rating.' | ||
}, | ||
}) | ||
.demandCommand() | ||
.demandOption(['keyword']) | ||
.argv |
25
index.js
@@ -8,3 +8,3 @@ "use strict"; | ||
try{ | ||
resolve(await AmazonScraper(options)._searchProduct()); | ||
resolve(await AmazonScraper(options)._startScraper()); | ||
} catch(error){ | ||
@@ -16,2 +16,23 @@ reject(error); | ||
module.exports = scraper; | ||
exports.products = ( options ) => { | ||
return new Promise( async (resolve, reject) => { | ||
options.scrapeType = 'products'; | ||
try{ | ||
return resolve(await scraper(options)); | ||
}catch(error){ | ||
return reject(error); | ||
} | ||
}) | ||
} | ||
exports.reviews = ( options ) => { | ||
return new Promise( async (resolve, reject) => { | ||
options.scrapeType = 'reviews'; | ||
try{ | ||
return resolve(await scraper(options)); | ||
}catch(error){ | ||
return reject(error); | ||
} | ||
}) | ||
} |
206
lib/index.js
@@ -12,7 +12,8 @@ 'use strict' | ||
const json2csvParser = new Json2csvParser({ fields: ['title', 'price', 'rating', 'reviews', 'score', 'url', 'sponsored', 'discounted', 'before_discount', 'asin' ] }); | ||
const productsParser = new Json2csvParser({ fields: ['title', 'price', 'rating', 'reviews', 'score', 'url', 'sponsored', 'discounted', 'before_discount', 'asin' ] }); | ||
const reviewsParser = new Json2csvParser({ fields: ['id', 'review_data', 'name', 'rating', 'title', 'review' ] }); | ||
class AmazonScraper{ | ||
constructor({ keyword, number, sponsored, proxy, cli, save }){ | ||
constructor({ keyword, number, sponsored, proxy, cli, save, scrapeType, asin, sort}){ | ||
this._mainHost = `https://www.amazon.com/`; | ||
@@ -23,3 +24,3 @@ this._cookieJar = jar(); | ||
this._keyword = keyword; | ||
this._number = parseInt(number) || 20; | ||
this._number = parseInt(number) || 10; | ||
this._continue = true; | ||
@@ -31,2 +32,5 @@ this._searchPage = 1; | ||
this._cli = cli || false; | ||
this._scrapeType = scrapeType; | ||
this._asin = '' || asin; | ||
this._sort = false || sort; | ||
} | ||
@@ -78,13 +82,23 @@ | ||
_searchProduct(){ | ||
_startScraper(){ | ||
return new Promise( async (resolve, reject) => { | ||
if (!this._keyword){ | ||
return reject('Keyword is missing'); | ||
if (this._scrapeType === 'products'){ | ||
if (!this._keyword){ | ||
return reject('Keyword is missing'); | ||
} | ||
if (this._number>100){ | ||
return reject('Wow.... slow down cowboy. Maximum you can get is 100 products'); | ||
} | ||
if (typeof(this._sponsored)!=='boolean'){ | ||
return reject('Sponsored can only be {true} or {false}'); | ||
} | ||
} | ||
if (this._number>100){ | ||
return reject('Wow.... slow down cowboy. Maximum you can get is 100 products'); | ||
if (this._scrapeType === 'reviews'){ | ||
if (!this._asin){ | ||
return reject('ASIN is missing'); | ||
} | ||
if (this._number>200){ | ||
return reject('Wow.... slow down cowboy. Maximum you can get is 200 reviews'); | ||
} | ||
} | ||
if (typeof(this._sponsored)!=='boolean'){ | ||
return reject('Sponsored can only be {true} or {false}'); | ||
} | ||
if(this._cli){ | ||
@@ -96,16 +110,37 @@ spinner.start() | ||
while(this._continue){ | ||
if (Object.keys(this._scrapedProducts).length>=this._number){ | ||
if (this._endProductList.length>=this._number){ | ||
break; | ||
} | ||
let body = await this._initSearch(); | ||
this._grabProduct(body); | ||
try{ | ||
let body = await this._initSearch(); | ||
if (this._scrapeType === 'products'){ | ||
this._grabProduct(body); | ||
} | ||
if (this._scrapeType === 'reviews'){ | ||
this._grabReviews(body); | ||
} | ||
}catch(error){ | ||
break; | ||
} | ||
} | ||
for(let key in this._scrapedProducts){ | ||
this._endProductList.push(this._scrapedProducts[key]) | ||
if (this._sort){ | ||
if (this._scrapeType === 'products'){ | ||
this._endProductList.sort((a,b)=>{ | ||
return b.score-a.score; | ||
}) | ||
} | ||
if (this._scrapeType === 'reviews'){ | ||
this._endProductList.sort((a,b)=>{ | ||
return b.rating-a.rating; | ||
}) | ||
} | ||
} | ||
this._endProductList.sort((a,b)=>{ | ||
return b.score-a.score; | ||
}) | ||
if (this._save){ | ||
fs.writeFileSync(`${Date.now()}.csv`, json2csvParser.parse(this._endProductList)); | ||
if (this._scrapeType === 'products'){ | ||
fs.writeFileSync(`${Date.now()}_products.csv`, productsParser.parse(this._endProductList)); | ||
} | ||
if (this._scrapeType === 'reviews'){ | ||
fs.writeFileSync(`${Date.now()}_${this._asin}_reviews.csv`, reviewsParser.parse(this._endProductList)); | ||
} | ||
} | ||
@@ -121,13 +156,28 @@ if (this._cli){ | ||
return new Promise( async (resolve, reject) => { | ||
let request = { | ||
'method': 'GET', | ||
'uri': 's', | ||
'qs':{ | ||
'k': this._keyword, | ||
...(this._searchPage>1 ? {'page': this._searchPage, 'ref': `sr_pg_${this._searchPage}` }: {}) | ||
}, | ||
'headers':{ | ||
'referer':'https://www.amazon.com/', | ||
let request; | ||
if (this._scrapeType === 'products'){ | ||
request = { | ||
'method': 'GET', | ||
'uri': 's', | ||
'qs':{ | ||
'k': this._keyword, | ||
...(this._searchPage>1 ? {'page': this._searchPage, 'ref': `sr_pg_${this._searchPage}` }: {}) | ||
}, | ||
'headers':{ | ||
'referer':'https://www.amazon.com/', | ||
} | ||
} | ||
} | ||
if (this._scrapeType === 'reviews'){ | ||
request = { | ||
'method': 'GET', | ||
'uri': `product-reviews/${this._asin}/`, | ||
'qs':{ | ||
...(this._searchPage>1 ? {'pageNumber': this._searchPage }: {}) | ||
}, | ||
'headers':{ | ||
'referer':'https://www.amazon.com/', | ||
} | ||
} | ||
} | ||
try{ | ||
@@ -143,7 +193,75 @@ let response = await this._request(request); | ||
_grabReviews(body){ | ||
let $ = cheerio.load(body.replace(/\s\s+/g, '').replace(/\n/g, '')); | ||
let reviewsList = $('.a-section.a-spacing-none.review-views.celwidget')[0].children; | ||
let scrapingResult = {}; | ||
for(let i=0; i<reviewsList.length; i++){ | ||
let totalInResult = Object.keys(scrapingResult).length+this._endProductList.length; | ||
if (totalInResult >=this._number){ | ||
break; | ||
} | ||
if (!reviewsList[i].attribs['id']){ | ||
continue; | ||
} | ||
scrapingResult[reviewsList[i].attribs['id']] = { id: reviewsList[i].attribs['id'] } | ||
} | ||
for (let key in scrapingResult){ | ||
let search = $(`#${key} [data-hook="review-date"]`); | ||
try{ | ||
scrapingResult[key].review_data = search[0].children[0].data | ||
}catch(error){ | ||
continue; | ||
} | ||
} | ||
for (let key in scrapingResult){ | ||
let search = $(`#${key} .a-profile-name`); | ||
try{ | ||
scrapingResult[key].name = search[0].children[0].data | ||
}catch(error){ | ||
continue; | ||
} | ||
} | ||
for (let key in scrapingResult){ | ||
let search = $(`#${key} [data-hook="review-star-rating"]`); | ||
try{ | ||
scrapingResult[key].rating = parseFloat(search[0].children[0].children[0].data.split(' ')[0]) | ||
}catch(error){ | ||
continue; | ||
} | ||
} | ||
for (let key in scrapingResult){ | ||
let search = $(`#${key} [data-hook="review-title"]`); | ||
try{ | ||
scrapingResult[key].title = $(search[0]).text().toString() | ||
}catch(error){ | ||
continue; | ||
} | ||
} | ||
for (let key in scrapingResult){ | ||
let search = $(`#${key} [data-hook="review-body"]`); | ||
try{ | ||
scrapingResult[key].review = $(search[0]).text() | ||
}catch(error){ | ||
continue; | ||
} | ||
} | ||
for(let key in scrapingResult){ | ||
this._endProductList.push(scrapingResult[key]) | ||
} | ||
return; | ||
} | ||
_grabProduct(body){ | ||
let $ = cheerio.load(body.replace(/\s\s+/g, '').replace(/\n/g, '')); | ||
let productList = $('div[data-index]'); | ||
let scrapingResult = {}; | ||
for(let i=0; i<productList.length; i++){ | ||
if (Object.keys(this._scrapedProducts).length >=this._number){ | ||
let totalInResult = Object.keys(scrapingResult).length+this._endProductList.length; | ||
if (totalInResult >=this._number){ | ||
break; | ||
@@ -154,11 +272,12 @@ } | ||
} | ||
this._scrapedProducts[productList[i].attribs['data-asin']] = { asin: productList[i].attribs['data-asin'], discounted: false, sponsored: false } | ||
scrapingResult[productList[i].attribs['data-asin']] = { asin: productList[i].attribs['data-asin'], discounted: false, sponsored: false, reviews:0, rating:0, score:0 } | ||
} | ||
for (let key in this._scrapedProducts){ | ||
for (let key in scrapingResult){ | ||
let search = $(`div[data-asin=${key}] .a-offscreen`); | ||
try{ | ||
this._scrapedProducts[key].price = search[0].children[0].data; | ||
scrapingResult[key].price = search[0].children[0].data; | ||
if (search.length>1){ | ||
this._scrapedProducts[key].before_discount = search[1].children[0].data; | ||
this._scrapedProducts[key].discounted = true; | ||
scrapingResult[key].before_discount = search[1].children[0].data; | ||
scrapingResult[key].discounted = true; | ||
} | ||
@@ -170,8 +289,8 @@ }catch(err){ | ||
for (let key in this._scrapedProducts){ | ||
for (let key in scrapingResult){ | ||
let search = $(`div[data-asin=${key}] .a-icon-star-small`); | ||
try{ | ||
this._scrapedProducts[key].rating = parseFloat(search[0].children[0].children[0].data) | ||
this._scrapedProducts[key].reviews = parseInt(search[0].parent.parent.parent.next.attribs['aria-label'].replace(/\,/g, '')); | ||
this._scrapedProducts[key].score = parseFloat(this._scrapedProducts[key].rating*this._scrapedProducts[key].reviews).toFixed(2); | ||
scrapingResult[key].rating = parseFloat(search[0].children[0].children[0].data) | ||
scrapingResult[key].reviews = parseInt(search[0].parent.parent.parent.next.attribs['aria-label'].replace(/\,/g, '')); | ||
scrapingResult[key].score = parseFloat(scrapingResult[key].rating*scrapingResult[key].reviews).toFixed(2); | ||
}catch(err){ | ||
@@ -181,7 +300,7 @@ continue; | ||
} | ||
for (let key in this._scrapedProducts){ | ||
for (let key in scrapingResult){ | ||
let search = $(`div[data-asin=${key}] [data-image-source-density="1"]`); | ||
try{ | ||
this._scrapedProducts[key].title = search[0].attribs.alt | ||
this._scrapedProducts[key].url = `https://www.amazon.com${search[0].parent.parent.attribs.href}`; | ||
scrapingResult[key].title = search[0].attribs.alt | ||
scrapingResult[key].url = `https://www.amazon.com${search[0].parent.parent.attribs.href}`; | ||
}catch(err){ | ||
@@ -191,2 +310,5 @@ continue; | ||
} | ||
for(let key in scrapingResult){ | ||
this._endProductList.push(scrapingResult[key]) | ||
} | ||
return; | ||
@@ -193,0 +315,0 @@ } |
{ | ||
"name": "amazon-buddy", | ||
"version": "1.2.2", | ||
"version": "1.3.0", | ||
"description": "Amazon Scraper. Scrape useful product information from the amazon search results", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
# Amazon Product Scraper | ||
![NPM](https://img.shields.io/npm/l/amazon-buddy.svg?style=for-the-badge) ![npm](https://img.shields.io/npm/v/amazon-buddy.svg?style=for-the-badge) | ||
@@ -8,7 +8,11 @@ Useful tool to scrape product information from amazon | ||
## Features | ||
* Scrape products from amazon search result: asin, rating, number of reviews, price, title, url, sponsored or not, discounted or not | ||
* **Scrape products** from amazon search result: asin, rating, number of reviews, price, title, url, sponsored or not, discounted or not | ||
* **Scrape reviews** from amazon search result: title, review, rating, reviewer name and date when it was posted | ||
* Result can be save to a CSV file | ||
* You can scrape up to 100 produtcs | ||
* You can scrape up to **100 produtcs** and **300 reviews** | ||
**Product List** | ||
![alt text](https://i.imgur.com/FfNDX2J.png) | ||
**Review List** | ||
![alt text](https://i.imgur.com/HuBW3rl.png) | ||
@@ -43,26 +47,40 @@ **Note:** | ||
Commands: | ||
amazon-buddy search [options] | ||
amazon-buddy products scrape for a products from the provided key word | ||
amazon-buddy reviews scrape reviews from a product | ||
Options: | ||
--help, -h help [boolean] | ||
--version Show version number [boolean] | ||
--keyword, -k Amazon search keyword ex. 'Xbox one' | ||
[string] [required] [default: ""] | ||
--products, -p Number of products to scrape. Maximum 100 [default: 20] | ||
--save, -s Save to a CSV file? [boolean] [default: false] | ||
--sponsored, -s Scrape sponsored products [boolean] [default: false] | ||
--help, -h help [boolean] | ||
--version Show version number [boolean] | ||
--keyword, -k Amazon search keyword ex. 'Xbox one' [string] [default: ""] | ||
--asin, -a To scrape reviews you need to provide product ASIN(amazon | ||
product id) [string] [default: ""] | ||
--number, -n Number of products to scrape. Maximum 100 products or 300 reviews [default: 10] | ||
--save, -s Save to a CSV file? [boolean] [default: true] | ||
--sort If searching for a products then list will be sorted by a higher | ||
score(reviews*rating). If searching for a reviews then they will | ||
be sorted by rating. [boolean] [default: false] | ||
Examples: | ||
amazon-buddy search -k 'Xbox one' | ||
amazon-buddy products -k 'Xbox one' | ||
amazon-buddy reviews -a B01GW3H3U8 | ||
``` | ||
**Example** | ||
**Example 1** | ||
Scrape 40 producs from the "vacume cleaner" keyword and save everything to a CSV file | ||
```sh | ||
$ amazon-buddy search -k 'vacume cleaner' -s -p 40 | ||
$ amazon-buddy products -k 'vacume cleaner' -n 40 | ||
``` | ||
**The file will be saved in a folder from which you run the script: | ||
1552945544582_products.csv** | ||
**Example 2** | ||
Scrape 100 reviews from a product by using ASIN. | ||
***NOTE: ASIN is a uniq amazon product ID, it can be found in product URL or if you have scraped product list with our tool you will find it in a CSV file*** | ||
```sh | ||
$ amazon-buddy reviews -a B01GW3H3U8 -n 100 | ||
``` | ||
**The file will be saved in a folder from which you run the script: | ||
1552945544582.csv** | ||
1552945544582_B01GW3H3U8_products.csv** | ||
@@ -75,4 +93,4 @@ **Module** | ||
try{ | ||
let result = await amazonScraper({keyword: 'Xbox One', number: 50, save: true }); | ||
console.log(result) | ||
let products = await amazonScraper.products({keyword: 'Xbox One', number: 50, save: true }); | ||
let reviews = await amazonScraper.rewviews({asin: 'B01GW3H3U8', number: 50, save: true }); | ||
}catch(error){ | ||
@@ -83,3 +101,3 @@ console.log(error); | ||
``` | ||
**JSON/CSV output:** | ||
**JSON/CSV output(products):** | ||
``` | ||
@@ -96,2 +114,13 @@ [{ | ||
``` | ||
**JSON/CSV output(reviews):** | ||
``` | ||
[{ | ||
id: 'R335O5YFEWQUNE', | ||
review_data: '6-Apr-17', | ||
name: 'Bob', | ||
title: 'Happy Gamer', | ||
rating: 5, | ||
review: 'blah blah blah' | ||
}...] | ||
``` | ||
@@ -104,3 +133,3 @@ **Options** | ||
//Number of products to scrape. Default 20 | ||
//Number of products to scrape. Default 10 | ||
number: 20, | ||
@@ -113,2 +142,5 @@ | ||
proxy: "", | ||
//Sorting. If searching for a products then list will be sorted by a higher score(number of reviews*rating). If searching for a reviews then they will be sorted by rating. | ||
sort: true | ||
}; | ||
@@ -115,0 +147,0 @@ ``` |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
19423
396
147