amazon-buddy
Advanced tools
Comparing version 2.2.3 to 2.2.4
@@ -459,3 +459,3 @@ // @ts-nocheck | ||
} catch { | ||
//continue regardless of error | ||
// continue regardless of error | ||
} | ||
@@ -471,3 +471,3 @@ | ||
} catch { | ||
//continue regardless of error | ||
// continue regardless of error | ||
} | ||
@@ -511,14 +511,26 @@ | ||
try { | ||
const bestSalesSection = $('#detailBulletsWrapper_feature_div > ul:nth-child(5) > li > span > ul > li'); | ||
for (let item in bestSalesSection) { | ||
const rank = $(bestSalesSection[item].children[0].children[0]).text(); | ||
const category = $(bestSalesSection[item].children[0].children[1]).text(); | ||
const link = bestSalesSection[item].children[0].children[1].attribs.href; | ||
if (category && rank) { | ||
const bestSeller = this.geo.best_seller(`${rank} ${category}`); | ||
const bestSalesSection = $(`#detailBulletsWrapper_feature_div > ul:nth-child(5) > li > span`)[0]; | ||
bestSalesSection.children.forEach((item) => { | ||
if (item.type === 'text' && item.data != '') { | ||
const bestSeller = this.geo.best_seller(item.data); | ||
if (bestSeller) { | ||
bestsellers_rank.push({ ...bestSeller, link: `${this.mainHost}${link}` }); | ||
bestsellers_rank.push({ ...bestSeller, link: `${this.mainHost}${item.next.attribs.href}` }); | ||
} | ||
} | ||
} | ||
if (item.type === 'tag' && item.name === 'ul') { | ||
if (item.children) { | ||
item.children.forEach((rankItem) => { | ||
const rank = $(rankItem.children[0].children[0]).text(); | ||
const category = $(rankItem.children[0].children[1]).text(); | ||
const link = rankItem.children[0].children[1].attribs.href; | ||
if (category && rank) { | ||
const bestSeller = this.geo.best_seller(`${rank} ${category}`); | ||
if (bestSeller) { | ||
bestsellers_rank.push({ ...bestSeller, link: `${this.mainHost}${link}` }); | ||
} | ||
} | ||
}); | ||
} | ||
} | ||
}); | ||
} catch {} | ||
@@ -551,3 +563,3 @@ | ||
} catch { | ||
//continue regardless of error | ||
// continue regardless of error | ||
} | ||
@@ -559,3 +571,3 @@ } else { | ||
} catch { | ||
//continue regardless of error | ||
// continue regardless of error | ||
} | ||
@@ -585,3 +597,3 @@ } | ||
} catch { | ||
//continue regardless of error | ||
// continue regardless of error | ||
} | ||
@@ -608,3 +620,3 @@ } | ||
} catch { | ||
//continue regardless of error | ||
// continue regardless of error | ||
} | ||
@@ -650,3 +662,3 @@ | ||
} catch { | ||
//continue regardless of error | ||
// continue regardless of error | ||
} | ||
@@ -672,3 +684,3 @@ | ||
} catch { | ||
//continue regardless of error | ||
// continue regardless of error | ||
} | ||
@@ -707,3 +719,3 @@ variants.push(variant); | ||
} catch { | ||
//continue regardless of error | ||
// continue regardless of error | ||
} | ||
@@ -720,3 +732,3 @@ variant.title = item.attribs['title'] ? item.attribs['title'].split(this.geo.variants.split_text)[1] : ''; //item.children[0].children[0].children[0].children[0].children[0].children[0].children[1].children[0].children[0].attribs.alt; | ||
} catch { | ||
//continue regardless of error | ||
// continue regardless of error | ||
} | ||
@@ -904,2 +916,3 @@ } | ||
variants: [], | ||
categories: [], | ||
asin: this.asin, | ||
@@ -951,4 +964,13 @@ url: `${this.mainHost}/dp/${this.asin}`, | ||
also_bought: [], | ||
other_sellers: [], | ||
}; | ||
output.other_sellers = this.extractOtherSellers($); | ||
const authors = this.extractAuthors($); | ||
if (authors.length) { | ||
output.authors = authors; | ||
} | ||
output.categories = this.extractCategories($); | ||
output.item_available = $('span.qa-availability-message').text() ? false : true; | ||
@@ -988,3 +1010,3 @@ | ||
} catch { | ||
//continue regardless of error | ||
// continue regardless of error | ||
} | ||
@@ -998,3 +1020,3 @@ } | ||
} catch { | ||
//continue regardless of error | ||
// continue regardless of error | ||
} | ||
@@ -1037,13 +1059,3 @@ } | ||
const thumbnail = $('span[data-action="thumb-action"]'); | ||
for (let i = 0; i < thumbnail.length; i++) { | ||
try { | ||
let url = thumbnail[i].children[0].children[0].children[1].children[0].attribs.src; | ||
if (url.indexOf('x-locale/common') === -1) { | ||
output.images.push(`${url.split('._')[0]}._AC_SY879_.jpg`); | ||
} | ||
} catch { | ||
// continue regardless of error | ||
} | ||
} | ||
output.images = this.extractImages($, body); | ||
output.main_image = output.images.length ? output.images[0] : ''; | ||
@@ -1195,21 +1207,165 @@ output.total_images = output.images.length; | ||
async extractCategories() { | ||
const body = await this.buildRequest(); | ||
const $ = cheerio.load(body.replace(/\s\s+/g, '').replace(/\n/g, '')); | ||
const categorySelect = $('#searchDropdownBox')[0]; | ||
/** | ||
* Extract product category/subcategory | ||
* @param {*} $ | ||
*/ | ||
extractCategories($) { | ||
const categories = []; | ||
const cateogriesss = $('#wayfinding-breadcrumbs_feature_div > ul')[0]; | ||
if (cateogriesss && cateogriesss.children) { | ||
cateogriesss.children.forEach((item) => { | ||
try { | ||
if (!item.attribs.class) { | ||
const url = `${this.mainHost}${item.children[0].children[0].attribs.href}`; | ||
const category = item.children[0].children[0].children[0].data; | ||
categories.push({ | ||
category, | ||
url, | ||
}); | ||
} | ||
} catch { | ||
// continue regardless of error | ||
} | ||
}); | ||
} | ||
return categories; | ||
} | ||
if (!Array.isArray(categorySelect.children)) { | ||
throw new Error("Can't find category selector"); | ||
/** | ||
* In case of a book we can extract name of authors | ||
*/ | ||
extractAuthors($) { | ||
const authors = []; | ||
const byAuthors = $('#bylineInfo')[0]; | ||
if (byAuthors && byAuthors.children) { | ||
byAuthors.children.forEach((item, index) => { | ||
try { | ||
if (item && item.name === 'span' && item.attribs.class.indexOf('author') > -1) { | ||
let url = ''; | ||
let author = ''; | ||
let role = $(`#bylineInfo > span:nth-child(${index}) > span.contribution`).text(); | ||
if (role) { | ||
role = role.replace(/[(),\s]/g, ''); | ||
} | ||
if (item.children[0].name === 'a') { | ||
const link = $(`#bylineInfo > span:nth-child(${index}) > a`)[0].attribs.href; | ||
if (link != '#') { | ||
url = `${this.mainHost}${link}`; | ||
author = $(`#bylineInfo > span:nth-child(${index}) > a`).text(); | ||
} | ||
} else { | ||
const link = $(`#bylineInfo > span:nth-child(1) > span.a-declarative > a.a-link-normal.contributorNameID`)[0].attribs | ||
.href; | ||
url = `${this.mainHost}${link}`; | ||
author = $(`#bylineInfo > span:nth-child(1) > span.a-declarative > a.a-link-normal.contributorNameID`).text(); | ||
} | ||
if (author && url) { | ||
authors.push({ | ||
author, | ||
role, | ||
url, | ||
}); | ||
} | ||
} | ||
} catch { | ||
// continue regardless of error | ||
} | ||
}); | ||
} | ||
return authors; | ||
} | ||
let categories = {}; | ||
for (let select of categorySelect.children) { | ||
const category = select.attribs.value.split('search-alias=')[1]; | ||
categories[category] = { | ||
name: select.children[0].data, | ||
category, | ||
}; | ||
/** | ||
* Extract data from section "Other sellers on Amazon" | ||
* @param {*} $ | ||
*/ | ||
extractOtherSellers($) { | ||
const other_sellers = []; | ||
const moreBuyingOptions = $('#mbc')[0]; | ||
let position = 0; | ||
if (moreBuyingOptions && moreBuyingOptions.children) { | ||
moreBuyingOptions.children.forEach((item, index) => { | ||
try { | ||
if (item.attribs.class && item.attribs.class === 'a-box mbc-offer-row pa_mbc_on_amazon_offer') { | ||
position += 1; | ||
const price = $(`#mbc > div:nth-child(${index + 1}) > div > span.a-declarative > div > div:nth-child(1)`).text(); | ||
const seller = $(`#mbc-sold-by-${position} > span.a-size-small.mbcMerchantName`).text(); | ||
const url = `${this.mainHost}${$(`#mbc-buybutton-addtocart-${position}-announce`)[0].attribs.href}`; | ||
other_sellers.push({ | ||
position, | ||
seller, | ||
url, | ||
price: { | ||
symbol: this.geo.symbol, | ||
currency: this.geo.currency, | ||
current_price: this.geo.price_format(price), | ||
}, | ||
}); | ||
} | ||
} catch { | ||
// continue regardless of error | ||
} | ||
}); | ||
} | ||
return other_sellers; | ||
} | ||
return categories; | ||
/** | ||
* Extract images | ||
* @param {*} $ | ||
* @param {*} body | ||
*/ | ||
extractImages($, body) { | ||
let images = []; | ||
/** | ||
* Some product have all the images located in the imageGalleryData array | ||
* We will check if this array exists, if exists then we will extract it and collect the image url's | ||
* Product types: books | ||
* */ | ||
const imgRegex = /'imageGalleryData' : (.+),'centerColMargin'/.exec(body); | ||
if (imgRegex) { | ||
try { | ||
const imageGalleryData = JSON.parse(imgRegex[1]); | ||
images = imageGalleryData.map((item) => item.mainUrl); | ||
} catch { | ||
// continue regardless of error | ||
} | ||
} | ||
/** | ||
* If for example book item does have only one image | ||
* then {imageGalleryData} won't exist and we will use different way of extracting required data | ||
* Product types: books | ||
*/ | ||
if (!images.length) { | ||
const imageData = $('#imgBlkFront')[0]; | ||
if (imageData) { | ||
const data = imageData.attribs['data-a-dynamic-image']; | ||
const json = JSON.parse(data); | ||
const keys = Object.keys(json); | ||
const imageIdregex = /\/(\w+)\._/.exec(keys[0]); | ||
if (imageIdregex) { | ||
images.push(`https://images-na.ssl-images-amazon.com/images/I/${imageIdregex[1]}.jpg`); | ||
} | ||
} | ||
} | ||
/** | ||
* Extract images from other types of products | ||
* Product types: all other | ||
*/ | ||
if (!images.length) { | ||
const thumbnail = $('span[data-action="thumb-action"]'); | ||
for (let i = 0; i < thumbnail.length; i++) { | ||
try { | ||
let url = thumbnail[i].children[0].children[0].children[1].children[0].attribs.src; | ||
if (url.indexOf('x-locale/common') === -1) { | ||
images.push(`${url.split('._')[0]}._AC_SY879_.jpg`); | ||
} | ||
} catch { | ||
// continue regardless of error | ||
} | ||
} | ||
} | ||
return images; | ||
} | ||
@@ -1216,0 +1372,0 @@ } |
@@ -35,3 +35,3 @@ module.exports = { | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -66,2 +66,10 @@ } | ||
Department: { key: 'department' }, | ||
Language: { key: 'language' }, | ||
Publisher: { key: 'publisher' }, | ||
'Reading level': { key: 'reading_level' }, | ||
'Grade Level': { key: 'grade_level' }, | ||
Hardcover: { key: 'hardcover' }, | ||
Paperback: { key: 'paperback' }, | ||
'ISBN-10': { key: 'ISBN-10' }, | ||
'ISBN-13': { key: 'ISBN-13' }, | ||
}, | ||
@@ -82,3 +90,3 @@ }, | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -128,3 +136,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -175,3 +183,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -221,3 +229,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -261,3 +269,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -308,3 +316,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -355,3 +363,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -402,3 +410,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -453,3 +461,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -500,3 +508,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -543,3 +551,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -589,3 +597,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -631,3 +639,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -674,3 +682,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -720,3 +728,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -766,3 +774,3 @@ } | ||
if (match) { | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2] }; | ||
return { rank: parseInt(match[1].replace(/[^\d]/g, '')), category: match[2].trim() }; | ||
} | ||
@@ -769,0 +777,0 @@ } |
{ | ||
"name": "amazon-buddy", | ||
"version": "2.2.3", | ||
"version": "2.2.4", | ||
"description": "Amazon Scraper. You can scrape products from amazon search result and you can also scrape reviews from a specific product", | ||
@@ -5,0 +5,0 @@ "main": "./lib/index.js", |
146900
2286