Add getReleasesByTag() and related functions

This commit is contained in:
patrickkfkan 2021-02-13 18:04:18 +08:00
parent 62bd0e0346
commit bebb26a144
8 changed files with 375 additions and 7 deletions

View File

@ -0,0 +1,22 @@
const bcfetch = require('../');
const util = require('util');
const tagUrl = 'https://bandcamp.com/tag/dark-ambient';
const params = {
filters: {
tags: [ 'dark-ambient', 'electronica' ],
sort: 'random'
},
page: 2
};
const options = {
imageFormat: 2
}
bcfetch.getReleasesByTag(tagUrl, params, options).then( results => {
console.log(util.inspect(results, false, null, false));
});

View File

@ -0,0 +1,8 @@
const bcfetch = require('../');
const util = require('util');
const tagUrl = 'https://bandcamp.com/tag/dark-ambient';
bcfetch.getReleasesByTagFilterOptions(tagUrl).then( results => {
console.log(util.inspect(results, false, null, false));
});

View File

@ -0,0 +1,11 @@
const bcfetch = require('../');
const util = require('util');
const params = {
q: 'Fin',
limit: 10
}
bcfetch.searchLocation(params).then( results => {
console.log(util.inspect(results, false, null, false));
});

11
examples/searchTag.js Normal file
View File

@ -0,0 +1,11 @@
const bcfetch = require('../');
const util = require('util');
const params = {
q: 'ambient',
limit: 10
}
bcfetch.searchTag(params).then( results => {
console.log(util.inspect(results, false, null, false));
});

View File

@ -287,12 +287,100 @@ async function getArticle(articleUrl, options = {}) {
.then( html => parser.parseArticle(html, opts) ); .then( html => parser.parseArticle(html, opts) );
} }
async function _fetchPage(url, json = false) { async function getReleasesByTagFilterOptions(tagUrl) {
return _cache.getOrSet('page', url + (json ? ':json' : ':html'), () => { return getReleasesByTagFilterValueNames(tagUrl)
return fetch(url).then( res => json ? res.json() : res.text() ); .then( filterValueNames => {
const opts = {
filterValueNames
};
return _fetchPage(tagUrl)
.then( html => parser.parseReleasesByTagFilterOptions(html, opts));
});
}
async function getReleasesByTagFilterValueNames(tagUrl) {
return _fetchPage(utils.getReleasesByTagUrl(tagUrl))
.then( html => parser.parseHubJSPath(html) )
.then( path => {
return _fetchPage(path).then( js => {
return parser.parseHubJSFilterValueNames(js);
});
});
}
async function getReleasesByTag(tagUrl, params = {}, options = {}) {
const imageConstants = await _getImageConstants();
const opts = {
imageBaseUrl: imageConstants.baseUrl,
imageFormat: await _parseImageFormatArg(options.imageFormat, 9)
};
return getReleasesByTagFilterOptions(tagUrl)
.then( filterOptions => {
const defaultFilters = {};
filterOptions.forEach( filter => {
let selectedOption = filter.options.find( o => o.selected );
let defaultOption = filter.options.find( o => o.default );
if (selectedOption) {
if (filter.name === 'tags') {
defaultFilters[filter.name] = [selectedOption.value];
}
else {
defaultFilters[filter.name] = selectedOption.value;
}
}
else if (defaultOption) {
defaultFilters[filter.name] = defaultOption.value;
}
});
const paramFilters = params.filters ? Object.assign(defaultFilters, params.filters) : defaultFilters;
return {
filters: paramFilters,
page: params.page || 1
};
})
.then( postData => {
return _fetchPage(utils.getDigDeeperUrl(), true, _getPostFetchOptions(postData))
.then( json => parser.parseReleasesByTag(json, opts));
});
}
async function searchTag(params) {
const postData = {
search_term: params.q,
count: params.limit
};
return _fetchPage(utils.getSearchTagUrl(), true, _getPostFetchOptions(postData))
.then( json => parser.parseSearchTagResults(json));
}
async function searchLocation(params) {
const postData = {
q: params.q,
n: params.limit,
geocoder_fallback: true
};
return _fetchPage(utils.getSearchLocationUrl(), true, _getPostFetchOptions(postData))
.then( json => parser.parseSearchLocationResults(json));
}
async function _fetchPage(url, json = false, fetchOptions = null) {
return _cache.getOrSet('page', url + (json ? ':json' : ':html') + (fetchOptions ? ':' + JSON.stringify(fetchOptions) : ''), () => {
const doFetch = fetchOptions ? fetch(url, fetchOptions) : fetch(url);
return doFetch.then( res => json ? res.json() : res.text() );
}); });
} }
function _getPostFetchOptions(postData) {
return {
method: 'POST',
body: JSON.stringify(postData),
headers: { 'Content-Type': 'application/x-www-form-urlencoded' }
};
}
// Cache functions // Cache functions
const cache = { const cache = {
setTTL: _cache.setTTL.bind(_cache), setTTL: _cache.setTTL.bind(_cache),
@ -321,5 +409,9 @@ module.exports = {
getShow, getShow,
getArticleCategories, getArticleCategories,
getArticleList, getArticleList,
getArticle getArticle,
getReleasesByTagFilterOptions,
getReleasesByTag,
searchTag,
searchLocation
}; };

View File

@ -2,6 +2,7 @@ const cheerio = require('cheerio');
const {decode} = require('html-entities'); const {decode} = require('html-entities');
const utils = require('./utils.js'); const utils = require('./utils.js');
const {EOL} = require('os'); const {EOL} = require('os');
const safeEval = require('safe-eval');
// https://github.com/masterT/bandcamp-scraper/blob/master/lib/htmlParser.js // https://github.com/masterT/bandcamp-scraper/blob/master/lib/htmlParser.js
function assignProps(objFrom, objTo, propNames) { function assignProps(objFrom, objTo, propNames) {
@ -1007,6 +1008,202 @@ function parseArticle(html, opts) {
return article; return article;
} }
function parseHubJSPath(html) {
const jsMatch = /src="((?:.+?)hub-(?:.+?).js)"/g.exec(html);
return jsMatch[1] || null;
}
function parseHubJSFilterValueNames(js) {
const filterValueNames = {};
const tObj = /"hubs\/digdeeper\/filter_value":(.+?)}\);/gs.exec(js);
if (tObj[1]) {
const t = safeEval(tObj[1]);
if (t && t[0] && Array.isArray(t[0].blocks)) {
const _getValFromBlockAttachment = attachment => {
if (typeof attachment === 'object' && attachment.type === 'translate') {
return utils.stripLineBreaks(attachment.nodelist[0]).trim();
}
else if (typeof attachment === 'string') {
return utils.stripLineBreaks(attachment).trim();
}
else {
return '';
}
};
t[0].blocks.forEach( filterBlock => {
const filter = safeEval(filterBlock.expression.split('==')[1]);
if (filter) {
filterBlock
.attachment.find( a => a.blocks )
.blocks.filter( block => block.expression )
.forEach( valueBlock => {
const value = safeEval(valueBlock.expression.split('==')[1]);
if (value != null && valueBlock.attachment) {
let valueName = valueBlock.attachment.reduce( (a, c) => {
cVal = utils.stripLineBreaks(_getValFromBlockAttachment(c)).trim();
if (cVal !== '') {
return a !== '' ? a + ' ' + cVal : cVal;
}
else {
return a;
}
}, '');
//console.log('value name: ' + valueName);
if (valueName) {
if (!filterValueNames[filter]) {
filterValueNames[filter] = {};
}
filterValueNames[filter][value] = valueName;
}
}
});
}
});
}
}
return filterValueNames;
}
function parseReleasesByTagFilterOptions(html, opts) {
const $ = cheerio.load(html);
const blob = decode($('#pagedata[data-blob]').attr('data-blob'));
const parsed = JSON.parse(blob);
const filters = [];
if (typeof parsed === 'object' && parsed.hub && Array.isArray(parsed.hub.tabs)) {
const tab = parsed.hub.tabs[1]; // All releases
const _setOrAdd = (f, t, prop) => {
const target = f.options.find( f => f.value === t.value );
if (target) {
target[prop] = true;
}
else if (t.value && t.name) {
const tAdd = {
value: t.value,
name: t.name,
};
tAdd[prop] = true;
f.options.push(tAdd);
}
}
if (tab && tab.dig_deeper && typeof tab.dig_deeper.filters === 'object') {
const filterKeys = Object.keys(tab.dig_deeper.filters);
filterKeys.forEach( filterName => {
const filter = {
name: filterName,
options: []
}
const filterData = tab.dig_deeper.filters[filterName];
if (Array.isArray(filterData.options)) {
filterData.options.forEach( filterOption => {
const valueName = opts.filterValueNames[filterName] && opts.filterValueNames[filterName][filterOption.value] ? opts.filterValueNames[filterName][filterOption.value] : filterOption.name || filterOption.value;
filter.options.push({
value: filterOption.value,
name: valueName
})
});
}
if (typeof filterData.selected === 'object' && !Array.isArray(filterData.selected)) {
_setOrAdd(filter, filterData.selected, 'selected');
}
else if (Array.isArray(filterData.selected)) {
filterData.selected.forEach( s => {
_setOrAdd(filter, s, 'selected');
})
}
if (filterData.default) {
_setOrAdd(filter, filterData.default, 'default');
}
filters.push(filter);
});
}
}
return filters;
}
function parseReleasesByTag(json, opts) {
if (typeof json === 'object' && Array.isArray(json.items)) {
const results = {
items: []
};
json.items.forEach(function (item) {
const mediaItem = {
type: 'unknown',
name: item.title,
url: item.tralbum_url,
imageUrl: '',
genre: item.genre,
artist: {
name: item.artist,
url: item.band_url
},
featuredTrack: ''
};
if (item.type === 'a') {
mediaItem.type = 'album';
}
else if (item.type === 't') {
mediaItem.type = 'track';
}
if (item.art_id) {
mediaItem.imageUrl = opts.imageBaseUrl + '/img/a' + item.art_id + '_' + opts.imageFormat.id + '.jpg';
}
if (item.featured_track_title) {
mediaItem.featuredTrack = {
name: item.featured_track_title,
streamUrl: (item.audio_url ? item.audio_url['mp3-128'] : null) || null
};
}
results.items.push(mediaItem);
});
results.hasMore = json.more_available;
results.filters = JSON.parse(json.filters);
return results;
}
else {
console.log('Failed to parse releases by tag');
return null;
}
}
function parseSearchTagResults(json) {
if (typeof json === 'object' && Array.isArray(json.matching_tags)) {
const results = [];
json.matching_tags.forEach( match => {
results.push({
count: match.count,
value: match.tag_norm_name,
name: match.tag_name
});
});
return results;
}
else {
console.log('Failed to parse search tag results');
return null;
}
}
function parseSearchLocationResults(json) {
if (typeof json === 'object' && Array.isArray(json.results)) {
const results = [];
json.results.forEach( match => {
results.push({
value: match.id,
name: match.name,
fullName: match.fullname
});
});
return results;
}
else {
console.log('Failed to parse search location results');
return null;
}
}
module.exports = { module.exports = {
parseDiscoverResults, parseDiscoverResults,
parseDiscoverOptions, parseDiscoverOptions,
@ -1023,5 +1220,11 @@ module.exports = {
parseShow, parseShow,
parseArticleCategories, parseArticleCategories,
parseArticleList, parseArticleList,
parseArticle parseArticle,
parseHubJSPath,
parseHubJSFilterValueNames,
parseReleasesByTagFilterOptions,
parseReleasesByTag,
parseSearchTagResults,
parseSearchLocationResults
}; };

View File

@ -140,6 +140,22 @@ function getDailyUrl(params = {}) {
return url; return url;
} }
function getReleasesByTagUrl(tagUrl) {
return `${tagUrl}?tab=all_releases`;
}
function getDigDeeperUrl() {
return 'https://bandcamp.com/api/hub/2/dig_deeper';
}
function getSearchTagUrl() {
return 'https://bandcamp.com/api/fansignup/1/search_tag';
}
function getSearchLocationUrl() {
return 'https://bandcamp.com/api/location/1/geoname_search';
}
module.exports = { module.exports = {
getUrl, getUrl,
getSiteUrl, getSiteUrl,
@ -157,5 +173,9 @@ module.exports = {
getAllShowsUrl, getAllShowsUrl,
getShowIdFromUrl, getShowIdFromUrl,
getShowUrl, getShowUrl,
getDailyUrl getDailyUrl,
getReleasesByTagUrl,
getDigDeeperUrl,
getSearchTagUrl,
getSearchLocationUrl
}; };

View File

@ -25,6 +25,7 @@
"cheerio": "^1.0.0-rc.5", "cheerio": "^1.0.0-rc.5",
"html-entities": "^2.0.2", "html-entities": "^2.0.2",
"node-cache": "^5.1.2", "node-cache": "^5.1.2",
"node-fetch": "^2.6.1" "node-fetch": "^2.6.1",
"safe-eval": "^0.4.1"
} }
} }