use MangaDex to better find links between AniList and MangaUpdates

This commit is contained in:
wea_ondara
2023-11-25 16:58:48 +01:00
parent 53f059b7de
commit d8bb97805c
25 changed files with 506 additions and 72 deletions

23
backend/src/cache/CacheUtil.ts vendored Normal file
View File

@@ -0,0 +1,23 @@
import * as fs from 'fs';
export type CacheEntry = { data: string, lastUpdateMs: number }
export function loadJson(file: string): Map<string, CacheEntry> {
try {
const data = fs.readFileSync(file, {encoding: 'utf8', flag: 'r'});
const json = JSON.parse(data);
return new Map<string, CacheEntry>(Object.entries(json));
} catch (err) {
console.error('Failed to load cache from ' + file + ':' + (err as Error).message);
return new Map();
}
}
export function saveJson(file: string, map: Map<string, CacheEntry>): void {
try {
const data = JSON.stringify(Object.fromEntries(map.entries()));
fs.writeFileSync(file, data, {encoding: 'utf8', flag: 'w'});
} catch (err) {
console.error(err);
}
}

39
backend/src/cache/MangaDexCache.ts vendored Normal file
View File

@@ -0,0 +1,39 @@
import * as fs from 'fs';
import {CacheEntry, loadJson, saveJson} from './CacheUtil';
export class MangaDexCache {
private readonly FILE_CACHE_DIR = 'cache/mangadex';
private readonly FILE_SEARCH_BY_TITLE = this.FILE_CACHE_DIR + '/searchByTitle.json';
private readonly MAX_CACHE_AGE_SEARCH_BY_TITLE = 7 * 24 * 60 * 60 * 1000;
private _searchByTitle = new Map<string, CacheEntry>();
constructor() {
this.load();
}
private load(): void {
try {
fs.mkdirSync(this.FILE_CACHE_DIR, {recursive: true});
} catch (_) {
}
this._searchByTitle = loadJson(this.FILE_SEARCH_BY_TITLE);
}
getSearchByTitle(title: string): string | undefined {
const entry = this._searchByTitle.get(title);
return !entry || entry.lastUpdateMs + this.MAX_CACHE_AGE_SEARCH_BY_TITLE < Date.now() ? undefined : entry.data;
}
putSearchByTitle(title: string, value: string): void {
this._searchByTitle.set(title, {data: value, lastUpdateMs: Date.now()});
saveJson(this.FILE_SEARCH_BY_TITLE, this._searchByTitle);
}
getOutOfDateSearch(): string[] {
return Array.from(this._searchByTitle.entries())
.filter(([title, entry]) => entry.lastUpdateMs + this.MAX_CACHE_AGE_SEARCH_BY_TITLE < Date.now())
.map(([title, entry]) => title);
}
}

View File

@@ -1,20 +1,22 @@
import * as fs from 'fs';
type CacheEntry = { data: string, lastUpdateMs: number }
import {CacheEntry, loadJson, saveJson} from './CacheUtil';
export class MangaUpdatesCache {
private readonly FILE_CACHE_DIR = 'cache';
private readonly FILE_CACHE_DIR = 'cache/mangaupdates';
private readonly FILE_SEARCH_BY_TITLE = this.FILE_CACHE_DIR + '/searchByTitle.json';
private readonly FILE_SERIES_BY_ID = this.FILE_CACHE_DIR + '/seriesById.json';
private readonly FILE_SERIES_GROUPS_BY_ID = this.FILE_CACHE_DIR + '/seriesGroupsById.json';
private readonly FILE_SERIES_IDS_BY_WEBSITE_ID = this.FILE_CACHE_DIR + '/seriesIdsByWebsiteId.json';
private readonly MAX_CACHE_AGE_SEARCH_BY_TITLE = 7 * 24 * 60 * 60 * 1000;
private readonly MAX_CACHE_AGE_SERIES_BY_ID = 30 * 24 * 60 * 60 * 1000;
private readonly MAX_CACHE_AGE_SERIES_GROUPS_BY_ID = 1 * 24 * 60 * 60 * 1000;
private readonly MAX_CACHE_AGE_SERIES_IDS_BY_WEBSITE_ID = 30 * 24 * 60 * 60 * 1000;
private _searchByTitle = new Map<string, CacheEntry>();
private _seriesById = new Map<string, CacheEntry>();
private _seriesGroupsById = new Map<string, CacheEntry>();
private _seriesIdsByWebsiteId = new Map<string, CacheEntry>();
constructor() {
this.load();
@@ -22,32 +24,13 @@ export class MangaUpdatesCache {
private load(): void {
try {
fs.mkdirSync(this.FILE_CACHE_DIR);
fs.mkdirSync(this.FILE_CACHE_DIR, {recursive: true});
} catch (_) {
}
this._searchByTitle = this.loadJson(this.FILE_SEARCH_BY_TITLE);
this._seriesById = this.loadJson(this.FILE_SERIES_BY_ID);
this._seriesGroupsById = this.loadJson(this.FILE_SERIES_GROUPS_BY_ID);
}
private loadJson(file: string): Map<string, CacheEntry> {
try {
const data = fs.readFileSync(file, {encoding: 'utf8', flag: 'r'});
const json = JSON.parse(data);
return new Map<string, CacheEntry>(Object.entries(json));
} catch (err) {
console.error('Failed to load cache from ' + file + ':' + (err as Error).message);
return new Map();
}
}
private saveJson(file: string, map: Map<string, CacheEntry>): void {
try {
const data = JSON.stringify(Object.fromEntries(map.entries()));
fs.writeFileSync(file, data, {encoding: 'utf8', flag: 'w'});
} catch (err) {
console.error(err);
}
this._searchByTitle = loadJson(this.FILE_SEARCH_BY_TITLE);
this._seriesById = loadJson(this.FILE_SERIES_BY_ID);
this._seriesGroupsById = loadJson(this.FILE_SERIES_GROUPS_BY_ID);
this._seriesIdsByWebsiteId = loadJson(this.FILE_SERIES_IDS_BY_WEBSITE_ID);
}
getSearchByTitle(title: string): string | undefined {
@@ -65,19 +48,29 @@ export class MangaUpdatesCache {
return !entry || entry.lastUpdateMs + this.MAX_CACHE_AGE_SERIES_GROUPS_BY_ID < Date.now() ? undefined : entry.data;
}
getSeriesIdByWebsiteId(id: string): string | undefined {
const entry = this._seriesIdsByWebsiteId.get(id);
return !entry || entry.lastUpdateMs + this.MAX_CACHE_AGE_SERIES_IDS_BY_WEBSITE_ID < Date.now() ? undefined : entry.data;
}
putSearchByTitle(title: string, value: string): void {
this._searchByTitle.set(title, {data: value, lastUpdateMs: Date.now()});
this.saveJson(this.FILE_SEARCH_BY_TITLE, this._searchByTitle);
saveJson(this.FILE_SEARCH_BY_TITLE, this._searchByTitle);
}
putSeriesById(id: string, value: string): void {
this._seriesById.set(id, {data: value, lastUpdateMs: Date.now()});
this.saveJson(this.FILE_SERIES_BY_ID, this._seriesById);
saveJson(this.FILE_SERIES_BY_ID, this._seriesById);
}
putSeriesGroupsById(id: string, value: string): void {
this._seriesGroupsById.set(id, {data: value, lastUpdateMs: Date.now()});
this.saveJson(this.FILE_SERIES_GROUPS_BY_ID, this._seriesGroupsById);
saveJson(this.FILE_SERIES_GROUPS_BY_ID, this._seriesGroupsById);
}
putSeriesIdByWebsiteId(id: string, value: string): void {
this._seriesIdsByWebsiteId.set(id, {data: value, lastUpdateMs: Date.now()});
saveJson(this.FILE_SERIES_IDS_BY_WEBSITE_ID, this._seriesIdsByWebsiteId);
}
getOutOfDateSearch(): string[] {

View File

@@ -0,0 +1,47 @@
import {NextFunction, Request, Response} from 'express';
import {MangaDexCache} from '../cache/MangaDexCache';
export class MangaDexController {
private cache: MangaDexCache;
constructor(cache: MangaDexCache) {
this.cache = cache;
}
async manga(req: Request, res: Response, next: NextFunction): Promise<void> {
try {
const title = req.query.title as string | undefined;
if (!title || !title.trim().length) {
res.status(400).send('Title required!');
next();
return;
}
const fromCache = this.cache.getSearchByTitle(title);
if (fromCache) {
res.status(200).setHeader('Content-Type', 'application/json').send(fromCache);
next();
return;
}
//throttle
await new Promise((r) => setTimeout(r, 1000));
//fetch from manga updates
const fromApi = await fetch('https://api.mangadex.org/manga?title=' + encodeURIComponent(title));
if (fromApi.status !== 200) {
res.status(fromApi.status).send(fromApi.body);
next();
return;
}
const fromApiJson = await fromApi.text();
this.cache.putSearchByTitle(title, fromApiJson);
res.status(200).setHeader('Content-Type', 'application/json').send(fromApiJson);
next();
} catch (e) {
next(e);
}
}
}

View File

@@ -103,4 +103,50 @@ export class MangaUpdatesController {
res.status(200).setHeader('Content-Type', 'application/json').send(fromApiJson);
next();
}
async getSeriesIdFromWebsiteId(req: Request, res: Response, next: NextFunction): Promise<void> {
try {
const id = req.params.websiteId;
if (!id || !id.trim().length || !id.match(/^[0-9a-zA-Z]+$/)) {
res.status(400).send('Website id required!');
next();
return;
}
const fromCache = this.cache.getSeriesIdByWebsiteId(id);
if (fromCache) {
res.status(200).send(fromCache);
next();
return;
}
//throttle
await new Promise((r) => setTimeout(r, 1000));
//fetch from manga updates
const fromApi = await fetch(id.match(/^[0-9]+$/)
? 'https://www.mangaupdates.com/series.html?id=' + id
: 'https://www.mangaupdates.com/series/' + id);
if (fromApi.status !== 200) {
res.status(fromApi.status).send(fromApi.body);
next();
return;
}
const fromApiHtml = await fromApi.text();
const match = fromApiHtml.match(/https:\/\/api.mangaupdates.com\/v1\/series\/([0-9]+)\/rss/);
if (!match) {
res.status(404).send('Series id not found in website!');
next();
return;
}
const json = JSON.stringify({website_id: id, series_id: parseInt(match[1]!)});
this.cache.putSeriesIdByWebsiteId(id, json);
res.status(200).send(json);
next();
} catch (e) {
next(e);
}
}
}

View File

@@ -5,12 +5,15 @@ import mangaUpdatesRouter from './router/MangaUpdatesRouter.js';
import Scheduler from './schedule/Scheduler.js';
import {MangaUpdatesCache} from './cache/MangaUpdatesCache.js';
import * as fs from 'fs';
import {MangaDexCache} from './cache/MangaDexCache';
import mangaDexRouter from './router/MangaDexRouter';
const config = JSON.parse(fs.readFileSync('config.json').toString())
const app = express();
const mangaDexCache = new MangaDexCache();
const mangaUpdatesCache = new MangaUpdatesCache();
const scheduler = new Scheduler(mangaUpdatesCache);
const scheduler = new Scheduler(mangaDexCache, mangaUpdatesCache);
scheduler.registerJobs();
@@ -20,6 +23,7 @@ app.use(express.json());
//router
app.use('/anilist', aniListRouter());
app.use('/mangadex', mangaDexRouter(mangaDexCache));
app.use('/mangaupdates', mangaUpdatesRouter(mangaUpdatesCache));
app.use(express.static('_client')) //for production

View File

@@ -0,0 +1,10 @@
import {Router} from 'express';
import {MangaDexController} from '../controller/MangaDexController';
import {MangaDexCache} from '../cache/MangaDexCache';
export default function mangaDexRouter(cache: MangaDexCache): Router {
const controller = new MangaDexController(cache);
const router = Router();
router.get('/manga', controller.manga.bind(controller));
return router;
}

View File

@@ -8,5 +8,6 @@ export default function mangaUpdatesRouter(cache: MangaUpdatesCache): Router {
router.post('/v1/series/search', controller.search.bind(controller));
router.get('/v1/series/:id', controller.getById.bind(controller));
router.get('/v1/series/:id/groups', controller.getGroupById.bind(controller));
router.get('/series_id_from_website_id/:websiteId', controller.getSeriesIdFromWebsiteId.bind(controller));
return router;
}

View File

@@ -0,0 +1,28 @@
import IJob from './IJob';
import MangaDexCacheRenewService from '../service/MangaDexCacheRenewService';
import {MangaDexCache} from '../cache/MangaDexCache';
export default class MangaDexCacheRenewJob implements IJob<void> {
private readonly service: MangaDexCacheRenewService;
private lock: boolean = false;
constructor(cache: MangaDexCache) {
this.service = new MangaDexCacheRenewService(cache);
}
get schedule(): Date | string {
return '0 0 * * * *'; //every hour
}
async execute(): Promise<void> {
if (this.lock) {
return;
}
this.lock = true;
try {
await this.service.renew();
} finally {
this.lock = false;
}
}
}

View File

@@ -2,13 +2,16 @@ import {MangaUpdatesCache} from '../cache/MangaUpdatesCache.js';
import IJob from './IJob.js';
import MangaUpdateCacheRenewJob from './MangaUpdateCacheRenewJob.js';
import {gracefulShutdown, scheduleJob} from 'node-schedule';
import {MangaDexCache} from '../cache/MangaDexCache';
import MangaDexCacheRenewJob from './MangaDexCacheRenewJob';
export default class Scheduler {
private readonly jobs: IJob<any>[] = [];
constructor(cache: MangaUpdatesCache) {
constructor(mangaDexCache: MangaDexCache, mangaUpdatesCache: MangaUpdatesCache) {
this.jobs.push(
new MangaUpdateCacheRenewJob(cache),
new MangaDexCacheRenewJob(mangaDexCache),
new MangaUpdateCacheRenewJob(mangaUpdatesCache),
);
}

View File

@@ -0,0 +1,37 @@
import {MangaDexCache} from '../cache/MangaDexCache';
export default class MangaDexCacheRenewService {
private static readonly delay = 3000;
private readonly cache: MangaDexCache;
constructor(cache: MangaDexCache) {
this.cache = cache;
}
async renew(): Promise<void> {
console.log('Renewing MangaDex cache ...');
await this.renewMedia();
console.log('Renewing MangaDex cache done');
}
async renewMedia(): Promise<void> {
const titles = this.cache.getOutOfDateSearch();
console.log(titles.length + ' out-of-date media');
for (let title of titles) {
await new Promise((r) => setTimeout(r, MangaDexCacheRenewService.delay));
try {
const fromApi = await fetch('https://api.mangadex.org/manga?title=' + encodeURIComponent(title));
if (fromApi.status !== 200) {
continue;
}
const fromApiJson = await fromApi.text();
this.cache.putSearchByTitle(title, fromApiJson);
} catch (e) {
console.error(e);
}
}
}
}

View File

@@ -10,11 +10,11 @@ export default class MangaUpdateCacheRenewService {
}
async renew(): Promise<void> {
console.log('Renewing cache ...');
console.log('Renewing MangaUpdates cache ...');
await this.renewRelations();
await this.renewSeries();
await this.renewUpdates();
console.log('Renewing cache done');
console.log('Renewing MangaUpdates cache done');
}
async renewRelations(): Promise<void> {