Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

bandcamp-fetch

Package Overview
Dependencies
Maintainers
1
Versions
24
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

bandcamp-fetch - npm Package Compare versions

Comparing version 0.1.0-a-20210203 to 0.1.0-a-20210210

examples/getArticle_output.txt

31

lib/index.js

@@ -262,2 +262,30 @@ const fetch = require('node-fetch');

async function getArticleCategories() {
return _fetchPage(utils.getDailyUrl())
.then( html => parser.parseArticleCategories(html) );
}
async function getArticleList(params = {}, options = {}) {
if (params.categoryUrl == undefined) {
params.categoryUrl = utils.getUrl('latest', utils.getDailyUrl());
}
const opts = {
imageFormat: await _parseImageFormatArg(options.imageFormat)
};
return _fetchPage(utils.getDailyUrl(params))
.then( html => parser.parseArticleList(html, opts) );
}
async function getArticle(articleUrl, options = {}) {
const imageConstants = await _getImageConstants();
const opts = {
imageBaseUrl: imageConstants.baseUrl,
albumImageFormat: await _parseImageFormatArg(options.albumImageFormat, 9),
artistImageFormat: await _parseImageFormatArg(options.artistImageFormat, 21),
includeRawData: options.includeRawData ? true : false
};
return _fetchPage(articleUrl)
.then( html => parser.parseArticle(html, opts) );
}
async function _fetchPage(url, json = false) {

@@ -295,2 +323,5 @@ return _cache.getOrSet('page', url + (json ? ':json' : ':html'), () => {

getShow,
getArticleCategories,
getArticleList,
getArticle
};

314

lib/parser.js
const cheerio = require('cheerio');
const {decode} = require('html-entities');
const utils = require('./utils.js');
const {URL} = require('url');
const {EOL} = require('os');

@@ -192,3 +192,3 @@ // https://github.com/masterT/bandcamp-scraper/blob/master/lib/htmlParser.js

url: trackUrl,
duration: track.item.duration_secs,
duration: getAdditionalPropertyValue(track.item, 'duration_secs'),
streamUrl: _getStreamUrl(track.item.url)

@@ -237,3 +237,3 @@ });

releaseDate: extra.current.release_date,
duration: basic.duration_secs,
duration: getAdditionalPropertyValue(basic, 'duration_secs'),
streamUrl: extra.trackinfo && extra.trackinfo[0] && extra.trackinfo[0].file && extra.trackinfo[0].file['mp3-128'] ? extra.trackinfo[0].file['mp3-128'] : null,

@@ -294,2 +294,12 @@ artist: {

function getAdditionalPropertyValue(o, propName) {
if (Array.isArray(o.additionalProperty)) {
const p = o.additionalProperty.find( prop => prop.name === propName );
if (p && p.value !== undefined) {
return p.value;
}
}
return undefined;
}
function parseDiscography(html, opts) {

@@ -708,2 +718,297 @@ const $ = cheerio.load(html);

function parseArticleCategories(html) {
const $ = cheerio.load(html);
const dailyUrl = utils.getDailyUrl();
const _parseSection = (section) => {
const h = section.prev('h2');
const title = h.length ? h.text() : '';
const s = {
name: section.attr('class'),
title,
sections: [],
categories: []
}
section.children().each( (i, c) => {
const tag = c.tagName;
c = $(c);
if (tag === 'section') {
const parsed = _parseSection($(c));
if (parsed !== null) {
s.sections.push(parsed);
}
}
else if (tag === 'div') {
c.find('a').each( (i, a) => {
a = $(a);
let url = a.attr('href');
if (!utils.isAbsoluteUrl(url)) {
url = utils.getUrl(url, dailyUrl);
}
s.categories.push({
url,
name: a.text()
});
});
}
});
if (s.sections.length === 0) {
delete s.sections;
}
if (s.categories.length === 0) {
delete s.categories;
}
if (!s.sections && !s.categories) {
return null;
}
else {
return s;
}
};
const sections = $('#daily-view-all').children('section');
const results = [];
sections.each( (i, section) => {
const parsed = _parseSection($(section));
if (parsed !== null) {
results.push(parsed);
}
});
return results;
}
function parseArticleList(html, opts) {
const $ = cheerio.load(html);
const dailyUrl = utils.getDailyUrl();
const results = {
articles: [],
total: 0,
start: 0,
end: 0
};
$('articles-list').each( (i, list) => {
$('.list-article', $(list)).each( (i, article) => {
article = $(article);
const imageUrl = article.find('img').attr('src') || null;
// category
const infoText = article.find('.article-info-text');
const infoTextCategoryLink = infoText.find('a.franchise');
const infoTextMiddot = infoText.find('.middot');
const category = {
url: infoTextCategoryLink.attr('href') || null,
name: infoTextCategoryLink.text() || ''
};
if (!utils.isAbsoluteUrl(category.url)) {
category.url = utils.getUrl(category.url, dailyUrl);
}
// date
infoTextCategoryLink.remove();
infoTextMiddot.remove();
const date = utils.stripLineBreaks(infoText.text()).trim();
// title and url
const titleLink = article.find('a.title');
const title = titleLink.text();
let url = titleLink.attr('href');
if (!utils.isAbsoluteUrl(url)) {
url = utils.getUrl(url, dailyUrl);
}
if (titleLink) {
results.articles.push({
url,
title,
date,
imageUrl: utils.reformatImageUrl(imageUrl, opts.imageFormat),
category,
});
}
});
});
const resultsText = utils.stripLineBreaks($('#num-results').text()).trim();
const rtm = resultsText.match(/(\d+)(?:\s*to\s*)(\d+)(?:\s*of\s*)(\d+)/);
if (rtm.length === 4) {
results.total = parseInt(rtm[3], 10);
results.start = parseInt(rtm[1], 10);
results.end = parseInt(rtm[2], 10);
}
return results;
}
function parseArticle(html, opts) {
const $ = cheerio.load(html);
const basic = JSON.parse($('script[type="application/ld+json"]').html());
const players = JSON.parse(decode($('#p-daily-article').attr('data-player-infos')));
const article = {
title: basic.headline,
description: basic.description,
url: basic['@id'],
imageUrl: basic.image,
date: basic.datePublished,
category: {
name: basic.articleSection,
url: null
},
genre: null,
author: {
name: basic.author.name,
url: basic.author['@id']
},
mediaItems: [],
sections: {}
};
// get genre
const genreLink = $('.genre a');
if (genreLink.length > 0) {
article.genre = {
name: genreLink.text(),
url: genreLink.attr('href')
};
const genreReadMoreLink = $('.moreingenre a');
if (genreReadMoreLink.length > 0) {
article.genre.readMoreUrl = genreReadMoreLink.attr('href');
if (!utils.isAbsoluteUrl(article.genre.readMoreUrl)) {
article.genre.readMoreUrl = utils.getUrl(article.genre.readMoreUrl, utils.getDailyUrl());
}
}
}
// get category url
const categoryLink = $('article-type a');
if (categoryLink.length > 0) {
article.category.url = categoryLink.attr('href');
if (!utils.isAbsoluteUrl(article.category.url)) {
article.category.url = utils.getUrl(article.category.url, utils.getDailyUrl());
}
}
// get media items (albums and tracks featured in article)
if (Array.isArray(players)) {
players.forEach( player => {
const mediaItem = {
type: 'unknown',
name: player.title,
url: player.tralbum_url,
imageUrl: '',
featuredTrackPosition: player.featured_track_number,
artist: {
name: player.band_name,
url: player.band_url,
imageUrl: '',
location: player.band_location
},
tracks: [],
mediaItemRef: player.player_id
};
if (player.parent_tralbum_type === 'a') {
mediaItem.type = 'album';
}
else if (player.parent_tralbum_type === 't') {
mediaItem.type = 'track';
}
if (player.art_id) {
mediaItem.imageUrl = opts.imageBaseUrl + '/img/a' + player.art_id + '_' + opts.albumImageFormat.id + '.jpg';
}
if (player.band_image_id) {
mediaItem.artist.imageUrl = opts.imageBaseUrl + '/img/' + player.band_image_id + '_' + opts.artistImageFormat.id + '.jpg';
}
if (Array.isArray(player.tracklist)) {
player.tracklist.forEach( trackInfo => {
const track = {
position: trackInfo.track_number,
name: trackInfo.track_title,
duration: trackInfo.audio_track_duration,
streamUrl: trackInfo.audio_url['mp3-128']
}
mediaItem.tracks.push(track);
});
}
article.mediaItems.push(mediaItem);
});
}
// Function that returns a section corresponding to a media item
const _getSectionByPlayer = player => {
const section = {
heading: null,
html: '',
text: '',
mediaItemRef: null
};
// Get heading
const heading = player.prevUntil('bamplayer-art', 'h3, h2').first();
if (heading.length > 0) {
section.heading = {
html: heading.html(),
text: utils.stripTags(utils.brToNewLine(heading.html())).trim()
};
}
// Get html and text
const paragraphs = player.nextUntil('bamplayer-art, h3, h5, article-end', 'p');
paragraphs.each( (i, p) => {
p = $(p);
section.html += (section.html !== '' ? EOL : '') + p.html();
section.text += (section.text !== '' ? EOL + EOL : '') + p.text();
});
// get mediaItemRef
const playerIdMatch = player.attr('data-bind').match(/playerMap\["(.+?)"]/);
section.mediaItemRef = playerIdMatch[1] || null;
return section;
}
// Function that returns the introductory paragraph(s) of the article
const _getIntroSection = articleBody => {
const firstPlayer = articleBody.find('bamplayer-art').first();
const paragraphs = firstPlayer.length > 0 ? firstPlayer.prevAll('p') : articleBody.find('p');
if (paragraphs.length > 0) {
const section = {
html: '',
text: ''
};
paragraphs.each( (i, p) => {
p = $(p);
section.html += (section.html !== '' ? EOL : '') + p.html();
section.text += (section.text !== '' ? EOL + EOL : '') + p.text();
});
return section;
}
else {
return null;
}
}
// sections
const articleBody = $('#p-daily-article article');
const sections = [];
const introSection = _getIntroSection(articleBody);
if (introSection) {
sections.push(introSection);
}
const bcplayers = articleBody.find('bamplayer-art');
bcplayers.each( (i, player) => {
sections.push(_getSectionByPlayer($(player)));
});
article.sections = sections;
if (opts.includeRawData) {
article.raw = {
basic,
mediaItems: players,
body: articleBody.html()
};
}
return article;
}
module.exports = {

@@ -723,2 +1028,5 @@ parseDiscoverResults,

parseShow,
parseArticleCategories,
parseArticleList,
parseArticle
};

@@ -135,2 +135,10 @@ const querystring = require('querystring');

function getDailyUrl(params = {}) {
let url = params.categoryUrl || 'https://daily.bandcamp.com';
if (params.page) {
url += '?page=' + params.page;
}
return url;
}
module.exports = {

@@ -152,3 +160,4 @@ getUrl,

getShowIdFromUrl,
getShowUrl
getShowUrl,
getDailyUrl
};

2

package.json
{
"name": "bandcamp-fetch",
"version": "0.1.0a-20210203",
"version": "0.1.0a-20210210",
"description": "JS library for scraping Bandcamp content",

@@ -5,0 +5,0 @@ "main": "lib/index.js",

@@ -185,2 +185,31 @@ # bandcamp-fetch

### `getArticleCategories()`
[**Example**](examples/getArticleCategories.js) ([output](examples/getArticleCategories_output.txt))
Fetches the list of Bandcamp Daily article categories. Categories are grouped into sections.
### `getArticleList([params], [options])`
[**Example**](examples/getArticleList.js) ([output](examples/getArticleList_output.txt))
Fetches the list of Bandcamp Daily articles under the category specified by `params.categoryUrl` (or all categories if not specified).
- `params` (optional)
- categoryUrl
- `options` (optional)
- imageFormat
### `getArticle(articleUrl, [options])`
[**Example**](examples/getArticle.js) ([output](examples/getArticle_output.txt))
Fetches the contents of the Bandcamp Daily article at `articleUrl`.
- `articleUrl`
- `options` (optional)
- albumImageFormat
- artistImageFormat
- includeRawData
## Caching

@@ -187,0 +216,0 @@

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc