From bebb26a144d8cf67e1a005c370bb1e62f0311e3d Mon Sep 17 00:00:00 2001 From: patrickkfkan Date: Sat, 13 Feb 2021 18:04:18 +0800 Subject: [PATCH] Add getReleasesByTag() and related functions --- examples/getReleasesByTag.js | 22 +++ examples/getReleasesByTagFilterOptions.js | 8 + examples/searchLocation.js | 11 ++ examples/searchTag.js | 11 ++ lib/index.js | 100 ++++++++++- lib/parser.js | 205 +++++++++++++++++++++- lib/utils.js | 22 ++- package.json | 3 +- 8 files changed, 375 insertions(+), 7 deletions(-) create mode 100644 examples/getReleasesByTag.js create mode 100644 examples/getReleasesByTagFilterOptions.js create mode 100644 examples/searchLocation.js create mode 100644 examples/searchTag.js diff --git a/examples/getReleasesByTag.js b/examples/getReleasesByTag.js new file mode 100644 index 0000000..b4eb9af --- /dev/null +++ b/examples/getReleasesByTag.js @@ -0,0 +1,22 @@ +const bcfetch = require('../'); +const util = require('util'); + +const tagUrl = 'https://bandcamp.com/tag/dark-ambient'; + +const params = { + filters: { + tags: [ 'dark-ambient', 'electronica' ], + sort: 'random' + }, + page: 2 +}; + +const options = { + imageFormat: 2 +} + +bcfetch.getReleasesByTag(tagUrl, params, options).then( results => { + console.log(util.inspect(results, false, null, false)); +}); + + diff --git a/examples/getReleasesByTagFilterOptions.js b/examples/getReleasesByTagFilterOptions.js new file mode 100644 index 0000000..7dc8bec --- /dev/null +++ b/examples/getReleasesByTagFilterOptions.js @@ -0,0 +1,8 @@ +const bcfetch = require('../'); +const util = require('util'); + +const tagUrl = 'https://bandcamp.com/tag/dark-ambient'; + +bcfetch.getReleasesByTagFilterOptions(tagUrl).then( results => { + console.log(util.inspect(results, false, null, false)); +}); diff --git a/examples/searchLocation.js b/examples/searchLocation.js new file mode 100644 index 0000000..9f452e7 --- /dev/null +++ b/examples/searchLocation.js @@ -0,0 +1,11 @@ +const bcfetch = require('../'); +const util = require('util'); + +const params = { + q: 'Fin', + limit: 10 +} + +bcfetch.searchLocation(params).then( results => { + console.log(util.inspect(results, false, null, false)); +}); diff --git a/examples/searchTag.js b/examples/searchTag.js new file mode 100644 index 0000000..9f3d099 --- /dev/null +++ b/examples/searchTag.js @@ -0,0 +1,11 @@ +const bcfetch = require('../'); +const util = require('util'); + +const params = { + q: 'ambient', + limit: 10 +} + +bcfetch.searchTag(params).then( results => { + console.log(util.inspect(results, false, null, false)); +}); diff --git a/lib/index.js b/lib/index.js index 556e82a..3d3ea18 100644 --- a/lib/index.js +++ b/lib/index.js @@ -287,12 +287,100 @@ async function getArticle(articleUrl, options = {}) { .then( html => parser.parseArticle(html, opts) ); } -async function _fetchPage(url, json = false) { - return _cache.getOrSet('page', url + (json ? ':json' : ':html'), () => { - return fetch(url).then( res => json ? res.json() : res.text() ); +async function getReleasesByTagFilterOptions(tagUrl) { + return getReleasesByTagFilterValueNames(tagUrl) + .then( filterValueNames => { + const opts = { + filterValueNames + }; + return _fetchPage(tagUrl) + .then( html => parser.parseReleasesByTagFilterOptions(html, opts)); + }); +} + +async function getReleasesByTagFilterValueNames(tagUrl) { + return _fetchPage(utils.getReleasesByTagUrl(tagUrl)) + .then( html => parser.parseHubJSPath(html) ) + .then( path => { + return _fetchPage(path).then( js => { + return parser.parseHubJSFilterValueNames(js); + }); + }); +} + +async function getReleasesByTag(tagUrl, params = {}, options = {}) { + const imageConstants = await _getImageConstants(); + const opts = { + imageBaseUrl: imageConstants.baseUrl, + imageFormat: await _parseImageFormatArg(options.imageFormat, 9) + }; + + return getReleasesByTagFilterOptions(tagUrl) + .then( filterOptions => { + const defaultFilters = {}; + filterOptions.forEach( filter => { + let selectedOption = filter.options.find( o => o.selected ); + let defaultOption = filter.options.find( o => o.default ); + if (selectedOption) { + if (filter.name === 'tags') { + defaultFilters[filter.name] = [selectedOption.value]; + } + else { + defaultFilters[filter.name] = selectedOption.value; + } + } + else if (defaultOption) { + defaultFilters[filter.name] = defaultOption.value; + } + }); + + const paramFilters = params.filters ? Object.assign(defaultFilters, params.filters) : defaultFilters; + + return { + filters: paramFilters, + page: params.page || 1 + }; + }) + .then( postData => { + return _fetchPage(utils.getDigDeeperUrl(), true, _getPostFetchOptions(postData)) + .then( json => parser.parseReleasesByTag(json, opts)); + }); +} + +async function searchTag(params) { + const postData = { + search_term: params.q, + count: params.limit + }; + return _fetchPage(utils.getSearchTagUrl(), true, _getPostFetchOptions(postData)) + .then( json => parser.parseSearchTagResults(json)); +} + +async function searchLocation(params) { + const postData = { + q: params.q, + n: params.limit, + geocoder_fallback: true + }; + return _fetchPage(utils.getSearchLocationUrl(), true, _getPostFetchOptions(postData)) + .then( json => parser.parseSearchLocationResults(json)); +} + +async function _fetchPage(url, json = false, fetchOptions = null) { + return _cache.getOrSet('page', url + (json ? ':json' : ':html') + (fetchOptions ? ':' + JSON.stringify(fetchOptions) : ''), () => { + const doFetch = fetchOptions ? fetch(url, fetchOptions) : fetch(url); + return doFetch.then( res => json ? res.json() : res.text() ); }); } +function _getPostFetchOptions(postData) { + return { + method: 'POST', + body: JSON.stringify(postData), + headers: { 'Content-Type': 'application/x-www-form-urlencoded' } + }; +} + // Cache functions const cache = { setTTL: _cache.setTTL.bind(_cache), @@ -321,5 +409,9 @@ module.exports = { getShow, getArticleCategories, getArticleList, - getArticle + getArticle, + getReleasesByTagFilterOptions, + getReleasesByTag, + searchTag, + searchLocation }; \ No newline at end of file diff --git a/lib/parser.js b/lib/parser.js index 155a34c..174b26b 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -2,6 +2,7 @@ const cheerio = require('cheerio'); const {decode} = require('html-entities'); const utils = require('./utils.js'); const {EOL} = require('os'); +const safeEval = require('safe-eval'); // https://github.com/masterT/bandcamp-scraper/blob/master/lib/htmlParser.js function assignProps(objFrom, objTo, propNames) { @@ -1007,6 +1008,202 @@ function parseArticle(html, opts) { return article; } +function parseHubJSPath(html) { + const jsMatch = /src="((?:.+?)hub-(?:.+?).js)"/g.exec(html); + return jsMatch[1] || null; +} + +function parseHubJSFilterValueNames(js) { + const filterValueNames = {}; + const tObj = /"hubs\/digdeeper\/filter_value":(.+?)}\);/gs.exec(js); + if (tObj[1]) { + const t = safeEval(tObj[1]); + if (t && t[0] && Array.isArray(t[0].blocks)) { + const _getValFromBlockAttachment = attachment => { + if (typeof attachment === 'object' && attachment.type === 'translate') { + return utils.stripLineBreaks(attachment.nodelist[0]).trim(); + } + else if (typeof attachment === 'string') { + return utils.stripLineBreaks(attachment).trim(); + } + else { + return ''; + } + }; + t[0].blocks.forEach( filterBlock => { + const filter = safeEval(filterBlock.expression.split('==')[1]); + if (filter) { + filterBlock + .attachment.find( a => a.blocks ) + .blocks.filter( block => block.expression ) + .forEach( valueBlock => { + const value = safeEval(valueBlock.expression.split('==')[1]); + if (value != null && valueBlock.attachment) { + let valueName = valueBlock.attachment.reduce( (a, c) => { + cVal = utils.stripLineBreaks(_getValFromBlockAttachment(c)).trim(); + if (cVal !== '') { + return a !== '' ? a + ' ' + cVal : cVal; + } + else { + return a; + } + }, ''); + //console.log('value name: ' + valueName); + if (valueName) { + if (!filterValueNames[filter]) { + filterValueNames[filter] = {}; + } + filterValueNames[filter][value] = valueName; + } + } + }); + } + }); + } + } + return filterValueNames; +} + +function parseReleasesByTagFilterOptions(html, opts) { + const $ = cheerio.load(html); + const blob = decode($('#pagedata[data-blob]').attr('data-blob')); + const parsed = JSON.parse(blob); + const filters = []; + if (typeof parsed === 'object' && parsed.hub && Array.isArray(parsed.hub.tabs)) { + const tab = parsed.hub.tabs[1]; // All releases + + const _setOrAdd = (f, t, prop) => { + const target = f.options.find( f => f.value === t.value ); + if (target) { + target[prop] = true; + } + else if (t.value && t.name) { + const tAdd = { + value: t.value, + name: t.name, + }; + tAdd[prop] = true; + f.options.push(tAdd); + } + } + + if (tab && tab.dig_deeper && typeof tab.dig_deeper.filters === 'object') { + const filterKeys = Object.keys(tab.dig_deeper.filters); + filterKeys.forEach( filterName => { + const filter = { + name: filterName, + options: [] + } + const filterData = tab.dig_deeper.filters[filterName]; + if (Array.isArray(filterData.options)) { + filterData.options.forEach( filterOption => { + const valueName = opts.filterValueNames[filterName] && opts.filterValueNames[filterName][filterOption.value] ? opts.filterValueNames[filterName][filterOption.value] : filterOption.name || filterOption.value; + filter.options.push({ + value: filterOption.value, + name: valueName + }) + }); + } + if (typeof filterData.selected === 'object' && !Array.isArray(filterData.selected)) { + _setOrAdd(filter, filterData.selected, 'selected'); + } + else if (Array.isArray(filterData.selected)) { + filterData.selected.forEach( s => { + _setOrAdd(filter, s, 'selected'); + }) + } + if (filterData.default) { + _setOrAdd(filter, filterData.default, 'default'); + } + + filters.push(filter); + }); + } + } + return filters; +} + +function parseReleasesByTag(json, opts) { + if (typeof json === 'object' && Array.isArray(json.items)) { + const results = { + items: [] + }; + json.items.forEach(function (item) { + const mediaItem = { + type: 'unknown', + name: item.title, + url: item.tralbum_url, + imageUrl: '', + genre: item.genre, + artist: { + name: item.artist, + url: item.band_url + }, + featuredTrack: '' + }; + if (item.type === 'a') { + mediaItem.type = 'album'; + } + else if (item.type === 't') { + mediaItem.type = 'track'; + } + if (item.art_id) { + mediaItem.imageUrl = opts.imageBaseUrl + '/img/a' + item.art_id + '_' + opts.imageFormat.id + '.jpg'; + } + if (item.featured_track_title) { + mediaItem.featuredTrack = { + name: item.featured_track_title, + streamUrl: (item.audio_url ? item.audio_url['mp3-128'] : null) || null + }; + } + results.items.push(mediaItem); + }); + results.hasMore = json.more_available; + results.filters = JSON.parse(json.filters); + return results; + } + else { + console.log('Failed to parse releases by tag'); + return null; + } +} + +function parseSearchTagResults(json) { + if (typeof json === 'object' && Array.isArray(json.matching_tags)) { + const results = []; + json.matching_tags.forEach( match => { + results.push({ + count: match.count, + value: match.tag_norm_name, + name: match.tag_name + }); + }); + return results; + } + else { + console.log('Failed to parse search tag results'); + return null; + } +} + +function parseSearchLocationResults(json) { + if (typeof json === 'object' && Array.isArray(json.results)) { + const results = []; + json.results.forEach( match => { + results.push({ + value: match.id, + name: match.name, + fullName: match.fullname + }); + }); + return results; + } + else { + console.log('Failed to parse search location results'); + return null; + } +} + module.exports = { parseDiscoverResults, parseDiscoverOptions, @@ -1023,5 +1220,11 @@ module.exports = { parseShow, parseArticleCategories, parseArticleList, - parseArticle + parseArticle, + parseHubJSPath, + parseHubJSFilterValueNames, + parseReleasesByTagFilterOptions, + parseReleasesByTag, + parseSearchTagResults, + parseSearchLocationResults }; \ No newline at end of file diff --git a/lib/utils.js b/lib/utils.js index 15b2ac8..f6c38d3 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -140,6 +140,22 @@ function getDailyUrl(params = {}) { return url; } +function getReleasesByTagUrl(tagUrl) { + return `${tagUrl}?tab=all_releases`; +} + +function getDigDeeperUrl() { + return 'https://bandcamp.com/api/hub/2/dig_deeper'; +} + +function getSearchTagUrl() { + return 'https://bandcamp.com/api/fansignup/1/search_tag'; +} + +function getSearchLocationUrl() { + return 'https://bandcamp.com/api/location/1/geoname_search'; +} + module.exports = { getUrl, getSiteUrl, @@ -157,5 +173,9 @@ module.exports = { getAllShowsUrl, getShowIdFromUrl, getShowUrl, - getDailyUrl + getDailyUrl, + getReleasesByTagUrl, + getDigDeeperUrl, + getSearchTagUrl, + getSearchLocationUrl }; \ No newline at end of file diff --git a/package.json b/package.json index 2c3051f..5e96a83 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ "cheerio": "^1.0.0-rc.5", "html-entities": "^2.0.2", "node-cache": "^5.1.2", - "node-fetch": "^2.6.1" + "node-fetch": "^2.6.1", + "safe-eval": "^0.4.1" } }