Socket
Socket
Sign inDemoInstall

amazon-buddy

Package Overview
Dependencies
Maintainers
1
Versions
87
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

amazon-buddy - npm Package Compare versions

Comparing version 1.2.2 to 1.3.0

33

bin/cli.js

@@ -16,3 +16,3 @@ #!/usr/bin/env node

await AmazonScraper(argv)._searchProduct()
await AmazonScraper(argv)._startScraper()
} catch(error){

@@ -25,5 +25,6 @@ console.log(error);

.usage('Usage: $0 <command> [options]')
.example(`$0 search -k 'Xbox one'`)
.example(`$0 products -k 'Xbox one'`)
.example(`$0 reviews -a B01GW3H3U8`)
.command(
"search",
"products",
"scrape for a products from the provided key word",

@@ -35,2 +36,10 @@ {},

)
.command(
"reviews",
"scrape reviews from a product, by providing ASIN",
{},
(argv) => {
startScraper(argv);
}
)
.options({

@@ -47,17 +56,27 @@ 'help': {

},
'asin': {
alias: 'a',
default: '',
type: 'string',
describe: "To scrape reviews you need to provide product ASIN(amazon product id)"
},
'number':{
alias: 'n',
default: 20,
default: 10,
type: 'integer',
describe: 'Number of products to scrape. Maximum 100'
describe: 'Number of products to scrape. Maximum 100 products or 300 reviews'
},
'save':{
alias: 's',
default: false,
default: true,
type: 'boolean',
describe: 'Save to a CSV file?'
},
'sort':{
default: false,
type: 'boolean',
describe: 'If searching for a products then list will be sorted by a higher score(number of reviews*rating). If searching for a reviews then they will be sorted by rating.'
},
})
.demandCommand()
.demandOption(['keyword'])
.argv

@@ -8,3 +8,3 @@ "use strict";

try{
resolve(await AmazonScraper(options)._searchProduct());
resolve(await AmazonScraper(options)._startScraper());
} catch(error){

@@ -16,2 +16,23 @@ reject(error);

module.exports = scraper;
exports.products = ( options ) => {
return new Promise( async (resolve, reject) => {
options.scrapeType = 'products';
try{
return resolve(await scraper(options));
}catch(error){
return reject(error);
}
})
}
exports.reviews = ( options ) => {
return new Promise( async (resolve, reject) => {
options.scrapeType = 'reviews';
try{
return resolve(await scraper(options));
}catch(error){
return reject(error);
}
})
}

@@ -12,7 +12,8 @@ 'use strict'

const json2csvParser = new Json2csvParser({ fields: ['title', 'price', 'rating', 'reviews', 'score', 'url', 'sponsored', 'discounted', 'before_discount', 'asin' ] });
const productsParser = new Json2csvParser({ fields: ['title', 'price', 'rating', 'reviews', 'score', 'url', 'sponsored', 'discounted', 'before_discount', 'asin' ] });
const reviewsParser = new Json2csvParser({ fields: ['id', 'review_data', 'name', 'rating', 'title', 'review' ] });
class AmazonScraper{
constructor({ keyword, number, sponsored, proxy, cli, save }){
constructor({ keyword, number, sponsored, proxy, cli, save, scrapeType, asin, sort}){
this._mainHost = `https://www.amazon.com/`;

@@ -23,3 +24,3 @@ this._cookieJar = jar();

this._keyword = keyword;
this._number = parseInt(number) || 20;
this._number = parseInt(number) || 10;
this._continue = true;

@@ -31,2 +32,5 @@ this._searchPage = 1;

this._cli = cli || false;
this._scrapeType = scrapeType;
this._asin = '' || asin;
this._sort = false || sort;
}

@@ -78,13 +82,23 @@

_searchProduct(){
_startScraper(){
return new Promise( async (resolve, reject) => {
if (!this._keyword){
return reject('Keyword is missing');
if (this._scrapeType === 'products'){
if (!this._keyword){
return reject('Keyword is missing');
}
if (this._number>100){
return reject('Wow.... slow down cowboy. Maximum you can get is 100 products');
}
if (typeof(this._sponsored)!=='boolean'){
return reject('Sponsored can only be {true} or {false}');
}
}
if (this._number>100){
return reject('Wow.... slow down cowboy. Maximum you can get is 100 products');
if (this._scrapeType === 'reviews'){
if (!this._asin){
return reject('ASIN is missing');
}
if (this._number>200){
return reject('Wow.... slow down cowboy. Maximum you can get is 200 reviews');
}
}
if (typeof(this._sponsored)!=='boolean'){
return reject('Sponsored can only be {true} or {false}');
}
if(this._cli){

@@ -96,16 +110,37 @@ spinner.start()

while(this._continue){
if (Object.keys(this._scrapedProducts).length>=this._number){
if (this._endProductList.length>=this._number){
break;
}
let body = await this._initSearch();
this._grabProduct(body);
try{
let body = await this._initSearch();
if (this._scrapeType === 'products'){
this._grabProduct(body);
}
if (this._scrapeType === 'reviews'){
this._grabReviews(body);
}
}catch(error){
break;
}
}
for(let key in this._scrapedProducts){
this._endProductList.push(this._scrapedProducts[key])
if (this._sort){
if (this._scrapeType === 'products'){
this._endProductList.sort((a,b)=>{
return b.score-a.score;
})
}
if (this._scrapeType === 'reviews'){
this._endProductList.sort((a,b)=>{
return b.rating-a.rating;
})
}
}
this._endProductList.sort((a,b)=>{
return b.score-a.score;
})
if (this._save){
fs.writeFileSync(`${Date.now()}.csv`, json2csvParser.parse(this._endProductList));
if (this._scrapeType === 'products'){
fs.writeFileSync(`${Date.now()}_products.csv`, productsParser.parse(this._endProductList));
}
if (this._scrapeType === 'reviews'){
fs.writeFileSync(`${Date.now()}_${this._asin}_reviews.csv`, reviewsParser.parse(this._endProductList));
}
}

@@ -121,13 +156,28 @@ if (this._cli){

return new Promise( async (resolve, reject) => {
let request = {
'method': 'GET',
'uri': 's',
'qs':{
'k': this._keyword,
...(this._searchPage>1 ? {'page': this._searchPage, 'ref': `sr_pg_${this._searchPage}` }: {})
},
'headers':{
'referer':'https://www.amazon.com/',
let request;
if (this._scrapeType === 'products'){
request = {
'method': 'GET',
'uri': 's',
'qs':{
'k': this._keyword,
...(this._searchPage>1 ? {'page': this._searchPage, 'ref': `sr_pg_${this._searchPage}` }: {})
},
'headers':{
'referer':'https://www.amazon.com/',
}
}
}
if (this._scrapeType === 'reviews'){
request = {
'method': 'GET',
'uri': `product-reviews/${this._asin}/`,
'qs':{
...(this._searchPage>1 ? {'pageNumber': this._searchPage }: {})
},
'headers':{
'referer':'https://www.amazon.com/',
}
}
}
try{

@@ -143,7 +193,75 @@ let response = await this._request(request);

_grabReviews(body){
let $ = cheerio.load(body.replace(/\s\s+/g, '').replace(/\n/g, ''));
let reviewsList = $('.a-section.a-spacing-none.review-views.celwidget')[0].children;
let scrapingResult = {};
for(let i=0; i<reviewsList.length; i++){
let totalInResult = Object.keys(scrapingResult).length+this._endProductList.length;
if (totalInResult >=this._number){
break;
}
if (!reviewsList[i].attribs['id']){
continue;
}
scrapingResult[reviewsList[i].attribs['id']] = { id: reviewsList[i].attribs['id'] }
}
for (let key in scrapingResult){
let search = $(`#${key} [data-hook="review-date"]`);
try{
scrapingResult[key].review_data = search[0].children[0].data
}catch(error){
continue;
}
}
for (let key in scrapingResult){
let search = $(`#${key} .a-profile-name`);
try{
scrapingResult[key].name = search[0].children[0].data
}catch(error){
continue;
}
}
for (let key in scrapingResult){
let search = $(`#${key} [data-hook="review-star-rating"]`);
try{
scrapingResult[key].rating = parseFloat(search[0].children[0].children[0].data.split(' ')[0])
}catch(error){
continue;
}
}
for (let key in scrapingResult){
let search = $(`#${key} [data-hook="review-title"]`);
try{
scrapingResult[key].title = $(search[0]).text().toString()
}catch(error){
continue;
}
}
for (let key in scrapingResult){
let search = $(`#${key} [data-hook="review-body"]`);
try{
scrapingResult[key].review = $(search[0]).text()
}catch(error){
continue;
}
}
for(let key in scrapingResult){
this._endProductList.push(scrapingResult[key])
}
return;
}
_grabProduct(body){
let $ = cheerio.load(body.replace(/\s\s+/g, '').replace(/\n/g, ''));
let productList = $('div[data-index]');
let scrapingResult = {};
for(let i=0; i<productList.length; i++){
if (Object.keys(this._scrapedProducts).length >=this._number){
let totalInResult = Object.keys(scrapingResult).length+this._endProductList.length;
if (totalInResult >=this._number){
break;

@@ -154,11 +272,12 @@ }

}
this._scrapedProducts[productList[i].attribs['data-asin']] = { asin: productList[i].attribs['data-asin'], discounted: false, sponsored: false }
scrapingResult[productList[i].attribs['data-asin']] = { asin: productList[i].attribs['data-asin'], discounted: false, sponsored: false, reviews:0, rating:0, score:0 }
}
for (let key in this._scrapedProducts){
for (let key in scrapingResult){
let search = $(`div[data-asin=${key}] .a-offscreen`);
try{
this._scrapedProducts[key].price = search[0].children[0].data;
scrapingResult[key].price = search[0].children[0].data;
if (search.length>1){
this._scrapedProducts[key].before_discount = search[1].children[0].data;
this._scrapedProducts[key].discounted = true;
scrapingResult[key].before_discount = search[1].children[0].data;
scrapingResult[key].discounted = true;
}

@@ -170,8 +289,8 @@ }catch(err){

for (let key in this._scrapedProducts){
for (let key in scrapingResult){
let search = $(`div[data-asin=${key}] .a-icon-star-small`);
try{
this._scrapedProducts[key].rating = parseFloat(search[0].children[0].children[0].data)
this._scrapedProducts[key].reviews = parseInt(search[0].parent.parent.parent.next.attribs['aria-label'].replace(/\,/g, ''));
this._scrapedProducts[key].score = parseFloat(this._scrapedProducts[key].rating*this._scrapedProducts[key].reviews).toFixed(2);
scrapingResult[key].rating = parseFloat(search[0].children[0].children[0].data)
scrapingResult[key].reviews = parseInt(search[0].parent.parent.parent.next.attribs['aria-label'].replace(/\,/g, ''));
scrapingResult[key].score = parseFloat(scrapingResult[key].rating*scrapingResult[key].reviews).toFixed(2);
}catch(err){

@@ -181,7 +300,7 @@ continue;

}
for (let key in this._scrapedProducts){
for (let key in scrapingResult){
let search = $(`div[data-asin=${key}] [data-image-source-density="1"]`);
try{
this._scrapedProducts[key].title = search[0].attribs.alt
this._scrapedProducts[key].url = `https://www.amazon.com${search[0].parent.parent.attribs.href}`;
scrapingResult[key].title = search[0].attribs.alt
scrapingResult[key].url = `https://www.amazon.com${search[0].parent.parent.attribs.href}`;
}catch(err){

@@ -191,2 +310,5 @@ continue;

}
for(let key in scrapingResult){
this._endProductList.push(scrapingResult[key])
}
return;

@@ -193,0 +315,0 @@ }

{
"name": "amazon-buddy",
"version": "1.2.2",
"version": "1.3.0",
"description": "Amazon Scraper. Scrape useful product information from the amazon search results",

@@ -5,0 +5,0 @@ "main": "index.js",

# Amazon Product Scraper
![NPM](https://img.shields.io/npm/l/amazon-buddy.svg?style=for-the-badge) ![npm](https://img.shields.io/npm/v/amazon-buddy.svg?style=for-the-badge)

@@ -8,7 +8,11 @@ Useful tool to scrape product information from amazon

## Features
* Scrape products from amazon search result: asin, rating, number of reviews, price, title, url, sponsored or not, discounted or not
* **Scrape products** from amazon search result: asin, rating, number of reviews, price, title, url, sponsored or not, discounted or not
* **Scrape reviews** from amazon search result: title, review, rating, reviewer name and date when it was posted
* Result can be save to a CSV file
* You can scrape up to 100 produtcs
* You can scrape up to **100 produtcs** and **300 reviews**
**Product List**
![alt text](https://i.imgur.com/FfNDX2J.png)
**Review List**
![alt text](https://i.imgur.com/HuBW3rl.png)

@@ -43,26 +47,40 @@ **Note:**

Commands:
amazon-buddy search [options]
amazon-buddy products scrape for a products from the provided key word
amazon-buddy reviews scrape reviews from a product
Options:
--help, -h help [boolean]
--version Show version number [boolean]
--keyword, -k Amazon search keyword ex. 'Xbox one'
[string] [required] [default: ""]
--products, -p Number of products to scrape. Maximum 100 [default: 20]
--save, -s Save to a CSV file? [boolean] [default: false]
--sponsored, -s Scrape sponsored products [boolean] [default: false]
--help, -h help [boolean]
--version Show version number [boolean]
--keyword, -k Amazon search keyword ex. 'Xbox one' [string] [default: ""]
--asin, -a To scrape reviews you need to provide product ASIN(amazon
product id) [string] [default: ""]
--number, -n Number of products to scrape. Maximum 100 products or 300 reviews [default: 10]
--save, -s Save to a CSV file? [boolean] [default: true]
--sort If searching for a products then list will be sorted by a higher
score(reviews*rating). If searching for a reviews then they will
be sorted by rating. [boolean] [default: false]
Examples:
amazon-buddy search -k 'Xbox one'
amazon-buddy products -k 'Xbox one'
amazon-buddy reviews -a B01GW3H3U8
```
**Example**
**Example 1**
Scrape 40 producs from the "vacume cleaner" keyword and save everything to a CSV file
```sh
$ amazon-buddy search -k 'vacume cleaner' -s -p 40
$ amazon-buddy products -k 'vacume cleaner' -n 40
```
**The file will be saved in a folder from which you run the script:
1552945544582_products.csv**
**Example 2**
Scrape 100 reviews from a product by using ASIN.
***NOTE: ASIN is a uniq amazon product ID, it can be found in product URL or if you have scraped product list with our tool you will find it in a CSV file***
```sh
$ amazon-buddy reviews -a B01GW3H3U8 -n 100
```
**The file will be saved in a folder from which you run the script:
1552945544582.csv**
1552945544582_B01GW3H3U8_products.csv**

@@ -75,4 +93,4 @@ **Module**

try{
let result = await amazonScraper({keyword: 'Xbox One', number: 50, save: true });
console.log(result)
let products = await amazonScraper.products({keyword: 'Xbox One', number: 50, save: true });
let reviews = await amazonScraper.rewviews({asin: 'B01GW3H3U8', number: 50, save: true });
}catch(error){

@@ -83,3 +101,3 @@ console.log(error);

```
**JSON/CSV output:**
**JSON/CSV output(products):**
```

@@ -96,2 +114,13 @@ [{

```
**JSON/CSV output(reviews):**
```
[{
id: 'R335O5YFEWQUNE',
review_data: '6-Apr-17',
name: 'Bob',
title: 'Happy Gamer',
rating: 5,
review: 'blah blah blah'
}...]
```

@@ -104,3 +133,3 @@ **Options**

//Number of products to scrape. Default 20
//Number of products to scrape. Default 10
number: 20,

@@ -113,2 +142,5 @@

proxy: "",
//Sorting. If searching for a products then list will be sorted by a higher score(number of reviews*rating). If searching for a reviews then they will be sorted by rating.
sort: true
};

@@ -115,0 +147,0 @@ ```

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc