Initial fork

This commit is contained in:
encode42 2024-02-03 03:14:08 -05:00
parent 7a7241785c
commit b65f69e69f
No known key found for this signature in database
GPG Key ID: 7E6D1008AC19B80B
9 changed files with 136 additions and 114 deletions

View File

@ -2,6 +2,17 @@
# bandcamp-fetch
This fork removes Node dependencies to work with Cloudflare Pages. It might work in other environments that provide `fetch` as well.
- `node-cache` has been replaced with a simple in-memory record store.
- `URL` has been replaced with a barebones implementation.
- `EOL` has been replaced with `\\n`. Development outside of Linux might not be supported!
- `node-fetch` has been removed, falling back to the environment's native `fetch` functions.
This fork will likely not be maintained outside of my own interest!
---
Library for scraping Bandcamp content.
Coverage:

99
package-lock.json generated
View File

@ -13,9 +13,7 @@
"cheerio": "^1.0.0-rc.5",
"cookie": "^0.5.0",
"eval5": "^1.4.7",
"html-entities": "^2.4.0",
"node-cache": "^5.1.2",
"node-fetch": "^2.6.9"
"html-entities": "^2.4.0"
},
"devDependencies": {
"@types/cookie": "^0.5.3",
@ -32,7 +30,7 @@
"typescript": "^4.9.5"
},
"engines": {
"node": ">=14"
"node": ">=18"
}
},
"node_modules/@babel/runtime": {
@ -730,13 +728,6 @@
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/clone": {
"version": "2.1.2",
"integrity": "sha512-3Pe/CF1Nn94hyhIYpjtiLhdCoEoz0DqQ+988E9gmeEdQZlojxnOb74wctFyuwWQHzqyf9X7C7MG8juUpqBJT8w==",
"engines": {
"node": ">=0.8"
}
},
"node_modules/color-convert": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
@ -1776,35 +1767,6 @@
"integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==",
"dev": true
},
"node_modules/node-cache": {
"version": "5.1.2",
"integrity": "sha512-t1QzWwnk4sjLWaQAS8CHgOJ+RAfmHpxFWmc36IWTiWHQfs0w5JDMBS1b1ZxQteo0vVVuWJvIUKHDkkeK7vIGCg==",
"dependencies": {
"clone": "2.x"
},
"engines": {
"node": ">= 8.0.0"
}
},
"node_modules/node-fetch": {
"version": "2.6.11",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.11.tgz",
"integrity": "sha512-4I6pdBY1EthSqDmJkiNk3JIT8cswwR9nfeW/cPdUagJYEQG7R95WRH74wpz7ma8Gh/9dI9FP+OU+0E4FvtA55w==",
"dependencies": {
"whatwg-url": "^5.0.0"
},
"engines": {
"node": "4.x || >=6.0.0"
},
"peerDependencies": {
"encoding": "^0.1.0"
},
"peerDependenciesMeta": {
"encoding": {
"optional": true
}
}
},
"node_modules/nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
@ -2194,11 +2156,6 @@
"node": ">=8.0"
}
},
"node_modules/tr46": {
"version": "0.0.3",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
},
"node_modules/ts-node": {
"version": "10.9.1",
"resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.1.tgz",
@ -2391,20 +2348,6 @@
"integrity": "sha512-AFbieoL7a5LMqcnOF04ji+rpXadgOXnZsxQr//r83kLPr7biP7am3g9zbaZIaBGwBRWeSvoMD4mgPdX3e4NWBg==",
"dev": true
},
"node_modules/webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
},
"node_modules/whatwg-url": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
"dependencies": {
"tr46": "~0.0.3",
"webidl-conversions": "^3.0.0"
}
},
"node_modules/which": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
@ -2970,10 +2913,6 @@
"domutils": "^3.0.1"
}
},
"clone": {
"version": "2.1.2",
"integrity": "sha512-3Pe/CF1Nn94hyhIYpjtiLhdCoEoz0DqQ+988E9gmeEdQZlojxnOb74wctFyuwWQHzqyf9X7C7MG8juUpqBJT8w=="
},
"color-convert": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
@ -3753,21 +3692,6 @@
"integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==",
"dev": true
},
"node-cache": {
"version": "5.1.2",
"integrity": "sha512-t1QzWwnk4sjLWaQAS8CHgOJ+RAfmHpxFWmc36IWTiWHQfs0w5JDMBS1b1ZxQteo0vVVuWJvIUKHDkkeK7vIGCg==",
"requires": {
"clone": "2.x"
}
},
"node-fetch": {
"version": "2.6.11",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.11.tgz",
"integrity": "sha512-4I6pdBY1EthSqDmJkiNk3JIT8cswwR9nfeW/cPdUagJYEQG7R95WRH74wpz7ma8Gh/9dI9FP+OU+0E4FvtA55w==",
"requires": {
"whatwg-url": "^5.0.0"
}
},
"nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
@ -4029,11 +3953,6 @@
"is-number": "^7.0.0"
}
},
"tr46": {
"version": "0.0.3",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
},
"ts-node": {
"version": "10.9.1",
"resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.1.tgz",
@ -4164,20 +4083,6 @@
"integrity": "sha512-AFbieoL7a5LMqcnOF04ji+rpXadgOXnZsxQr//r83kLPr7biP7am3g9zbaZIaBGwBRWeSvoMD4mgPdX3e4NWBg==",
"dev": true
},
"webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
},
"whatwg-url": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
"requires": {
"tr46": "~0.0.3",
"webidl-conversions": "^3.0.0"
}
},
"which": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",

View File

@ -1,7 +1,7 @@
{
"name": "bandcamp-fetch",
"name": "@encode42/bandcamp-fetch",
"version": "1.2.1",
"description": "Scrape Bandcamp content",
"description": "Scrape Bandcamp content (supports Cloudflare Pages)",
"scripts": {
"build": "npm run prepare",
"build:esm": "npx tsc -p tsconfig-esm.json",
@ -30,14 +30,14 @@
"author": "Patrick Kan <patrickkfkan@gmail.com> (https://github.com/patrickkfkan)",
"repository": {
"type": "git",
"url": "https://github.com/patrickkfkan/bandcamp-fetch.git"
"url": "https://github.com/encode42/bandcamp-fetch.git"
},
"license": "MIT",
"directories": {
"dist": "./dist"
},
"engines": {
"node": ">=14"
"node": ">=15"
},
"devDependencies": {
"@types/cookie": "^0.5.3",
@ -58,9 +58,7 @@
"cheerio": "^1.0.0-rc.5",
"cookie": "^0.5.0",
"eval5": "^1.4.7",
"html-entities": "^2.4.0",
"node-cache": "^5.1.2",
"node-fetch": "^2.6.9"
"html-entities": "^2.4.0"
},
"keywords": [
"bandcamp",

View File

@ -5,7 +5,6 @@ import { URLS } from '../utils/Constants.js';
import { ParseError, brToNewLine, isAbsoluteUrl, normalizeUrl, stripTags } from '../utils/Parse.js';
import { ImageFormat } from '../types/Image.js';
import Album from '../types/Album.js';
import { EOL } from 'os';
import Track from '../types/Track.js';
interface ArticleParseOptions {
@ -159,8 +158,8 @@ export default class ArticleParser {
const paragraphs = player.nextUntil('bamplayer-art, h3, h5, article-end', 'p');
paragraphs.each((i: number, p: any) => {
p = $(p);
section.html += (section.html !== '' ? EOL : '') + p.html();
section.text += (section.text !== '' ? EOL + EOL : '') + p.text();
section.html += (section.html !== '' ? '\\n' : '') + p.html();
section.text += (section.text !== '' ? '\\n' + '\\n' : '') + p.text();
});
// Get mediaItemRef
@ -183,8 +182,8 @@ export default class ArticleParser {
};
paragraphs.each((i: number, p: any) => {
p = $(p);
section.html += (section.html !== '' ? EOL : '') + p.html();
section.text += (section.text !== '' ? EOL + EOL : '') + p.text();
section.html += (section.html !== '' ? '\\n' : '') + p.html();
section.text += (section.text !== '' ? '\\n' + '\\n' : '') + p.text();
});
return section;
}

View File

@ -1,4 +1,4 @@
import { URL } from 'url';
import { URL } from '../utils/URL.js';
import { ImageFormat } from '../types/Image.js';
import { URLS } from '../utils/Constants.js';
import SearchResultsParser from './SearchResultsParser.js';

View File

@ -1,3 +1,4 @@
import { URL } from '../utils/URL.js';
import BaseAPI, { BaseAPIParams } from '../common/BaseAPI.js';
import { URLS } from '../utils/Constants.js';
import { FetchMethod } from '../utils/Fetcher.js';

View File

@ -1,10 +1,84 @@
import NodeCache from 'node-cache';
export enum CacheDataType {
Page = 'Page',
Constants = 'Constants'
}
interface NodeCacheRecord {
[key: string]: {
ttl: number,
added: Date,
value: any
}
}
interface NodeCacheOptions {
defaultTTL?: number,
checkperiod?: number
}
class NodeCache {
private defaultTTL: number;
private recordStore: NodeCacheRecord = {};
constructor(options: NodeCacheOptions = {}) {
options.defaultTTL ??= 0;
options.checkperiod ??= 600;
this.defaultTTL = options.defaultTTL;
setInterval(() => {
for (const [ key, value ] of Object.entries(this.recordStore)) {
if (value.ttl > 0 && new Date().getTime() - value.added.getTime() > value.ttl / 1000) {
this.del(key);
return;
}
}
}, options.checkperiod * 1000);
}
public createKey(key: string, ttl?: number) {
if (this.recordStore[key]) {
return;
}
this.recordStore[key] = {
ttl: ttl ?? this.defaultTTL,
added: new Date(),
value: undefined
};
}
public keys() {
return Object.keys(this.recordStore);
}
public get(key: string) {
const store = this.recordStore[key];
return store?.value;
}
public set(key: string, value: any, ttl?: number) {
this.createKey(key, ttl);
this.recordStore[key].value = value;
}
public del(key: string) {
delete this.recordStore[key];
}
public flushAll() {
for (const key of this.keys()) {
delete this.recordStore[key];
}
}
public ttl(key: string, ttl: number) {
this.createKey(key);
this.recordStore[key].ttl = ttl;
}
}
export default class Cache {
#ttl: Record<CacheDataType, number>;
#maxEntries: Record<CacheDataType, number>;

View File

@ -1,5 +1,4 @@
import { URL } from 'url';
import fetch, { Request, RequestInit } from 'node-fetch';
import { URL } from './URL.js';
import Cache, { CacheDataType } from './Cache.js';
export enum FetchMethod {

35
src/lib/utils/URL.ts Normal file
View File

@ -0,0 +1,35 @@
export class URL {
public href: string;
public searchParams: URLSearchParams;
constructor(url: string) {
this.href = url;
this.searchParams = new URLSearchParams();
}
public toString() {
return `${this.href}${this.searchParams.toString()}`;
}
}
export class URLSearchParams {
private values: Record<string, string> = {};
public set(key: string, value: string) {
this.values[key] = value;
}
public toString() {
const pairs: string[] = [];
for (const [ key, value ] of Object.entries(this.values)) {
pairs.push(`${key}=${encodeURIComponent(value)}`);
}
if (pairs.length > 0) {
return `?${pairs.join('&')}`;
}
return '';
}
}