More robust fetching of artist / label info

This commit is contained in:
patrickkfkan 2021-01-21 21:31:40 +08:00
parent aeed0098c0
commit e3260ece1c
2 changed files with 18 additions and 11 deletions

View File

@ -122,7 +122,9 @@ async function getArtistOrLabelInfo(artistOrLabelUrl, options = {}) {
artistOrLabelUrl, artistOrLabelUrl,
imageFormat: await _parseImageFormatArg(options.imageFormat) imageFormat: await _parseImageFormatArg(options.imageFormat)
}; };
return fetch(artistOrLabelUrl) // Some pages don't actually show the 'bio' column.
// The /music page does seem to always show it though, so parse from that.
return fetch(utils.getUrl('music', artistOrLabelUrl))
.then( res => res.text() ) .then( res => res.text() )
.then( html => parser.parseArtistOrLabelInfo(html, opts) ); .then( html => parser.parseArtistOrLabelInfo(html, opts) );
} }

View File

@ -328,20 +328,25 @@ function parseArtistOrLabelInfo(html, opts) {
const $ = cheerio.load(html); const $ = cheerio.load(html);
let bioText = $('#bio-text'); let bioText = $('#bio-text');
let bioTextMore = bioText.find('.peekaboo-text');
let description; let description;
if (bioTextMore.length) { if (bioText.length) {
bioTextMore.find('.lightweightBreak').remove(); let bioTextMore = bioText.find('.peekaboo-text');
bioText.find('.peekaboo-text, .peekaboo-link').remove(); if (bioTextMore.length) {
description = (bioText.html().trim() + ' ' + bioTextMore.html()).trim(); bioTextMore.find('.lightweightBreak').remove();
bioText.find('.peekaboo-text, .peekaboo-link').remove();
description = (bioText.html().trim() + ' ' + bioTextMore.html()).trim();
}
else {
description = bioText.html().trim();
}
description = utils.stripLineBreaks(description);
description = utils.brToNewLine(description);
description = utils.stripTags(description);
description = decode(description);
} }
else { else {
description = bioText.html().trim(); description = '';
} }
description = utils.stripLineBreaks(description);
description = utils.brToNewLine(description);
description = utils.stripTags(description);
description = decode(description);
let isLabel = $('a[href="/artists"]').length; let isLabel = $('a[href="/artists"]').length;
let label = null; let label = null;