bandcamp-fetch
Advanced tools
Comparing version 0.1.0-a-20210210 to 0.1.0-a-20210213
100
lib/index.js
@@ -290,8 +290,96 @@ const fetch = require('node-fetch'); | ||
async function _fetchPage(url, json = false) { | ||
return _cache.getOrSet('page', url + (json ? ':json' : ':html'), () => { | ||
return fetch(url).then( res => json ? res.json() : res.text() ); | ||
async function getReleasesByTagFilterOptions(tagUrl) { | ||
return getReleasesByTagFilterValueNames(tagUrl) | ||
.then( filterValueNames => { | ||
const opts = { | ||
filterValueNames | ||
}; | ||
return _fetchPage(tagUrl) | ||
.then( html => parser.parseReleasesByTagFilterOptions(html, opts)); | ||
}); | ||
} | ||
async function getReleasesByTagFilterValueNames(tagUrl) { | ||
return _fetchPage(utils.getReleasesByTagUrl(tagUrl)) | ||
.then( html => parser.parseHubJSPath(html) ) | ||
.then( path => { | ||
return _fetchPage(path).then( js => { | ||
return parser.parseHubJSFilterValueNames(js); | ||
}); | ||
}); | ||
} | ||
async function getReleasesByTag(tagUrl, params = {}, options = {}) { | ||
const imageConstants = await _getImageConstants(); | ||
const opts = { | ||
imageBaseUrl: imageConstants.baseUrl, | ||
imageFormat: await _parseImageFormatArg(options.imageFormat, 9) | ||
}; | ||
return getReleasesByTagFilterOptions(tagUrl) | ||
.then( filterOptions => { | ||
const defaultFilters = {}; | ||
filterOptions.forEach( filter => { | ||
let selectedOption = filter.options.find( o => o.selected ); | ||
let defaultOption = filter.options.find( o => o.default ); | ||
if (selectedOption) { | ||
if (filter.name === 'tags') { | ||
defaultFilters[filter.name] = [selectedOption.value]; | ||
} | ||
else { | ||
defaultFilters[filter.name] = selectedOption.value; | ||
} | ||
} | ||
else if (defaultOption) { | ||
defaultFilters[filter.name] = defaultOption.value; | ||
} | ||
}); | ||
const paramFilters = params.filters ? Object.assign(defaultFilters, params.filters) : defaultFilters; | ||
return { | ||
filters: paramFilters, | ||
page: params.page || 1 | ||
}; | ||
}) | ||
.then( postData => { | ||
return _fetchPage(utils.getDigDeeperUrl(), true, _getPostFetchOptions(postData)) | ||
.then( json => parser.parseReleasesByTag(json, opts)); | ||
}); | ||
} | ||
async function searchTag(params) { | ||
const postData = { | ||
search_term: params.q, | ||
count: params.limit | ||
}; | ||
return _fetchPage(utils.getSearchTagUrl(), true, _getPostFetchOptions(postData)) | ||
.then( json => parser.parseSearchTagResults(json)); | ||
} | ||
async function searchLocation(params) { | ||
const postData = { | ||
q: params.q, | ||
n: params.limit, | ||
geocoder_fallback: true | ||
}; | ||
return _fetchPage(utils.getSearchLocationUrl(), true, _getPostFetchOptions(postData)) | ||
.then( json => parser.parseSearchLocationResults(json)); | ||
} | ||
async function _fetchPage(url, json = false, fetchOptions = null) { | ||
return _cache.getOrSet('page', url + (json ? ':json' : ':html') + (fetchOptions ? ':' + JSON.stringify(fetchOptions) : ''), () => { | ||
const doFetch = fetchOptions ? fetch(url, fetchOptions) : fetch(url); | ||
return doFetch.then( res => json ? res.json() : res.text() ); | ||
}); | ||
} | ||
function _getPostFetchOptions(postData) { | ||
return { | ||
method: 'POST', | ||
body: JSON.stringify(postData), | ||
headers: { 'Content-Type': 'application/x-www-form-urlencoded' } | ||
}; | ||
} | ||
// Cache functions | ||
@@ -325,3 +413,7 @@ const cache = { | ||
getArticleList, | ||
getArticle | ||
getArticle, | ||
getReleasesByTagFilterOptions, | ||
getReleasesByTag, | ||
searchTag, | ||
searchLocation | ||
}; |
@@ -5,2 +5,3 @@ const cheerio = require('cheerio'); | ||
const {EOL} = require('os'); | ||
const safeEval = require('safe-eval'); | ||
@@ -194,3 +195,3 @@ // https://github.com/masterT/bandcamp-scraper/blob/master/lib/htmlParser.js | ||
duration: getAdditionalPropertyValue(track.item, 'duration_secs'), | ||
streamUrl: _getStreamUrl(track.item.url) | ||
streamUrl: getAdditionalPropertyValue(track.item, 'file_mp3-128') || null | ||
}); | ||
@@ -1012,2 +1013,198 @@ }); | ||
function parseHubJSPath(html) { | ||
const jsMatch = /src="((?:.+?)hub-(?:.+?).js)"/g.exec(html); | ||
return jsMatch[1] || null; | ||
} | ||
function parseHubJSFilterValueNames(js) { | ||
const filterValueNames = {}; | ||
const tObj = /"hubs\/digdeeper\/filter_value":(.+?)}\);/gs.exec(js); | ||
if (tObj[1]) { | ||
const t = safeEval(tObj[1]); | ||
if (t && t[0] && Array.isArray(t[0].blocks)) { | ||
const _getValFromBlockAttachment = attachment => { | ||
if (typeof attachment === 'object' && attachment.type === 'translate') { | ||
return utils.stripLineBreaks(attachment.nodelist[0]).trim(); | ||
} | ||
else if (typeof attachment === 'string') { | ||
return utils.stripLineBreaks(attachment).trim(); | ||
} | ||
else { | ||
return ''; | ||
} | ||
}; | ||
t[0].blocks.forEach( filterBlock => { | ||
const filter = safeEval(filterBlock.expression.split('==')[1]); | ||
if (filter) { | ||
filterBlock | ||
.attachment.find( a => a.blocks ) | ||
.blocks.filter( block => block.expression ) | ||
.forEach( valueBlock => { | ||
const value = safeEval(valueBlock.expression.split('==')[1]); | ||
if (value != null && valueBlock.attachment) { | ||
let valueName = valueBlock.attachment.reduce( (a, c) => { | ||
cVal = utils.stripLineBreaks(_getValFromBlockAttachment(c)).trim(); | ||
if (cVal !== '') { | ||
return a !== '' ? a + ' ' + cVal : cVal; | ||
} | ||
else { | ||
return a; | ||
} | ||
}, ''); | ||
//console.log('value name: ' + valueName); | ||
if (valueName) { | ||
if (!filterValueNames[filter]) { | ||
filterValueNames[filter] = {}; | ||
} | ||
filterValueNames[filter][value] = valueName; | ||
} | ||
} | ||
}); | ||
} | ||
}); | ||
} | ||
} | ||
return filterValueNames; | ||
} | ||
function parseReleasesByTagFilterOptions(html, opts) { | ||
const $ = cheerio.load(html); | ||
const blob = decode($('#pagedata[data-blob]').attr('data-blob')); | ||
const parsed = JSON.parse(blob); | ||
const filters = []; | ||
if (typeof parsed === 'object' && parsed.hub && Array.isArray(parsed.hub.tabs)) { | ||
const tab = parsed.hub.tabs[1]; // All releases | ||
const _setOrAdd = (f, t, prop) => { | ||
const target = f.options.find( f => f.value === t.value ); | ||
if (target) { | ||
target[prop] = true; | ||
} | ||
else if (t.value && t.name) { | ||
const tAdd = { | ||
value: t.value, | ||
name: t.name, | ||
}; | ||
tAdd[prop] = true; | ||
f.options.push(tAdd); | ||
} | ||
} | ||
if (tab && tab.dig_deeper && typeof tab.dig_deeper.filters === 'object') { | ||
const filterKeys = Object.keys(tab.dig_deeper.filters); | ||
filterKeys.forEach( filterName => { | ||
const filter = { | ||
name: filterName, | ||
options: [] | ||
} | ||
const filterData = tab.dig_deeper.filters[filterName]; | ||
if (Array.isArray(filterData.options)) { | ||
filterData.options.forEach( filterOption => { | ||
const valueName = opts.filterValueNames[filterName] && opts.filterValueNames[filterName][filterOption.value] ? opts.filterValueNames[filterName][filterOption.value] : filterOption.name || filterOption.value; | ||
filter.options.push({ | ||
value: filterOption.value, | ||
name: valueName | ||
}) | ||
}); | ||
} | ||
if (typeof filterData.selected === 'object' && !Array.isArray(filterData.selected)) { | ||
_setOrAdd(filter, filterData.selected, 'selected'); | ||
} | ||
else if (Array.isArray(filterData.selected)) { | ||
filterData.selected.forEach( s => { | ||
_setOrAdd(filter, s, 'selected'); | ||
}) | ||
} | ||
if (filterData.default) { | ||
_setOrAdd(filter, filterData.default, 'default'); | ||
} | ||
filters.push(filter); | ||
}); | ||
} | ||
} | ||
return filters; | ||
} | ||
function parseReleasesByTag(json, opts) { | ||
if (typeof json === 'object' && Array.isArray(json.items)) { | ||
const results = { | ||
items: [] | ||
}; | ||
json.items.forEach(function (item) { | ||
const mediaItem = { | ||
type: 'unknown', | ||
name: item.title, | ||
url: item.tralbum_url, | ||
imageUrl: '', | ||
genre: item.genre, | ||
artist: { | ||
name: item.artist, | ||
url: item.band_url | ||
}, | ||
featuredTrack: '' | ||
}; | ||
if (item.type === 'a') { | ||
mediaItem.type = 'album'; | ||
} | ||
else if (item.type === 't') { | ||
mediaItem.type = 'track'; | ||
} | ||
if (item.art_id) { | ||
mediaItem.imageUrl = opts.imageBaseUrl + '/img/a' + item.art_id + '_' + opts.imageFormat.id + '.jpg'; | ||
} | ||
if (item.featured_track_title) { | ||
mediaItem.featuredTrack = { | ||
name: item.featured_track_title, | ||
streamUrl: (item.audio_url ? item.audio_url['mp3-128'] : null) || null | ||
}; | ||
} | ||
results.items.push(mediaItem); | ||
}); | ||
results.hasMore = json.more_available; | ||
results.filters = JSON.parse(json.filters); | ||
return results; | ||
} | ||
else { | ||
console.log('Failed to parse releases by tag'); | ||
return null; | ||
} | ||
} | ||
function parseSearchTagResults(json) { | ||
if (typeof json === 'object' && Array.isArray(json.matching_tags)) { | ||
const results = []; | ||
json.matching_tags.forEach( match => { | ||
results.push({ | ||
count: match.count, | ||
value: match.tag_norm_name, | ||
name: match.tag_name | ||
}); | ||
}); | ||
return results; | ||
} | ||
else { | ||
console.log('Failed to parse search tag results'); | ||
return null; | ||
} | ||
} | ||
function parseSearchLocationResults(json) { | ||
if (typeof json === 'object' && Array.isArray(json.results)) { | ||
const results = []; | ||
json.results.forEach( match => { | ||
results.push({ | ||
value: match.id, | ||
name: match.name, | ||
fullName: match.fullname | ||
}); | ||
}); | ||
return results; | ||
} | ||
else { | ||
console.log('Failed to parse search location results'); | ||
return null; | ||
} | ||
} | ||
module.exports = { | ||
@@ -1029,3 +1226,9 @@ parseDiscoverResults, | ||
parseArticleList, | ||
parseArticle | ||
parseArticle, | ||
parseHubJSPath, | ||
parseHubJSFilterValueNames, | ||
parseReleasesByTagFilterOptions, | ||
parseReleasesByTag, | ||
parseSearchTagResults, | ||
parseSearchLocationResults | ||
}; |
@@ -143,2 +143,18 @@ const querystring = require('querystring'); | ||
function getReleasesByTagUrl(tagUrl) { | ||
return `${tagUrl}?tab=all_releases`; | ||
} | ||
function getDigDeeperUrl() { | ||
return 'https://bandcamp.com/api/hub/2/dig_deeper'; | ||
} | ||
function getSearchTagUrl() { | ||
return 'https://bandcamp.com/api/fansignup/1/search_tag'; | ||
} | ||
function getSearchLocationUrl() { | ||
return 'https://bandcamp.com/api/location/1/geoname_search'; | ||
} | ||
module.exports = { | ||
@@ -161,3 +177,7 @@ getUrl, | ||
getShowUrl, | ||
getDailyUrl | ||
getDailyUrl, | ||
getReleasesByTagUrl, | ||
getDigDeeperUrl, | ||
getSearchTagUrl, | ||
getSearchLocationUrl | ||
}; |
{ | ||
"name": "bandcamp-fetch", | ||
"version": "0.1.0a-20210210", | ||
"version": "0.1.0a-20210213", | ||
"description": "JS library for scraping Bandcamp content", | ||
@@ -28,4 +28,5 @@ "main": "lib/index.js", | ||
"node-cache": "^5.1.2", | ||
"node-fetch": "^2.6.1" | ||
"node-fetch": "^2.6.1", | ||
"safe-eval": "^0.4.1" | ||
} | ||
} |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Manifest confusion
Supply chain riskThis package has inconsistent metadata. This could be malicious or caused by an error when publishing the package.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Manifest confusion
Supply chain riskThis package has inconsistent metadata. This could be malicious or caused by an error when publishing the package.
Found 1 instance in 1 package
357564
45
1965
5
3
+ Addedsafe-eval@^0.4.1
+ Addedsafe-eval@0.4.1(transitive)