From 7c97934bb9e119e9eb799eaba873cadd04463a5d Mon Sep 17 00:00:00 2001 From: Rephl3x Date: Tue, 27 Jan 2026 11:46:50 +1300 Subject: [PATCH] Improve cache stats performance (build 271261145) --- .build_number | 2 +- backend/app/db.py | 164 ++++++++++++++++++++++++++++++++ backend/app/routers/admin.py | 34 +++---- backend/app/routers/requests.py | 158 ++++++++++++++++++++++++------ 4 files changed, 307 insertions(+), 51 deletions(-) diff --git a/.build_number b/.build_number index 592acce..21a1689 100644 --- a/.build_number +++ b/.build_number @@ -1 +1 @@ -271261125 \ No newline at end of file +271261145 diff --git a/backend/app/db.py b/backend/app/db.py index 0a25e20..2ebc21c 100644 --- a/backend/app/db.py +++ b/backend/app/db.py @@ -179,6 +179,21 @@ def init_db() -> None: ) """ ) + conn.execute( + """ + CREATE TABLE IF NOT EXISTS artwork_cache_status ( + request_id INTEGER PRIMARY KEY, + tmdb_id INTEGER, + media_type TEXT, + poster_path TEXT, + backdrop_path TEXT, + has_tmdb INTEGER NOT NULL DEFAULT 0, + poster_cached INTEGER NOT NULL DEFAULT 0, + backdrop_cached INTEGER NOT NULL DEFAULT 0, + updated_at TEXT NOT NULL + ) + """ + ) conn.execute( """ CREATE INDEX IF NOT EXISTS idx_requests_cache_created_at @@ -191,6 +206,12 @@ def init_db() -> None: ON requests_cache (requested_by_norm) """ ) + conn.execute( + """ + CREATE INDEX IF NOT EXISTS idx_artwork_cache_status_updated_at + ON artwork_cache_status (updated_at) + """ + ) conn.execute( """ CREATE TABLE IF NOT EXISTS user_activity ( @@ -942,6 +963,116 @@ def get_request_cache_count() -> int: return int(row[0] or 0) +def upsert_artwork_cache_status( + request_id: int, + tmdb_id: Optional[int], + media_type: Optional[str], + poster_path: Optional[str], + backdrop_path: Optional[str], + has_tmdb: bool, + poster_cached: bool, + backdrop_cached: bool, +) -> None: + updated_at = datetime.now(timezone.utc).isoformat() + with _connect() as conn: + conn.execute( + """ + INSERT INTO artwork_cache_status ( + request_id, + tmdb_id, + media_type, + poster_path, + backdrop_path, + has_tmdb, + poster_cached, + backdrop_cached, + updated_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(request_id) DO UPDATE SET + tmdb_id = excluded.tmdb_id, + media_type = excluded.media_type, + poster_path = excluded.poster_path, + backdrop_path = excluded.backdrop_path, + has_tmdb = excluded.has_tmdb, + poster_cached = excluded.poster_cached, + backdrop_cached = excluded.backdrop_cached, + updated_at = excluded.updated_at + """, + ( + request_id, + tmdb_id, + media_type, + poster_path, + backdrop_path, + 1 if has_tmdb else 0, + 1 if poster_cached else 0, + 1 if backdrop_cached else 0, + updated_at, + ), + ) + + +def get_artwork_cache_status_count() -> int: + with _connect() as conn: + row = conn.execute("SELECT COUNT(*) FROM artwork_cache_status").fetchone() + return int(row[0] or 0) + + +def get_artwork_cache_missing_count() -> int: + with _connect() as conn: + row = conn.execute( + """ + SELECT COUNT(*) + FROM artwork_cache_status + WHERE ( + (poster_path IS NULL AND has_tmdb = 1) + OR (poster_path IS NOT NULL AND poster_cached = 0) + OR (backdrop_path IS NULL AND has_tmdb = 1) + OR (backdrop_path IS NOT NULL AND backdrop_cached = 0) + ) + """ + ).fetchone() + return int(row[0] or 0) + + +def update_artwork_cache_stats( + cache_bytes: Optional[int] = None, + cache_files: Optional[int] = None, + missing_count: Optional[int] = None, + total_requests: Optional[int] = None, +) -> None: + updated_at = datetime.now(timezone.utc).isoformat() + if cache_bytes is not None: + set_setting("artwork_cache_bytes", str(int(cache_bytes))) + if cache_files is not None: + set_setting("artwork_cache_files", str(int(cache_files))) + if missing_count is not None: + set_setting("artwork_cache_missing", str(int(missing_count))) + if total_requests is not None: + set_setting("artwork_cache_total_requests", str(int(total_requests))) + set_setting("artwork_cache_updated_at", updated_at) + + +def get_artwork_cache_stats() -> Dict[str, Any]: + def _get_int(key: str) -> int: + value = get_setting(key) + if value is None: + return 0 + try: + return int(value) + except (TypeError, ValueError): + return 0 + + return { + "cache_bytes": _get_int("artwork_cache_bytes"), + "cache_files": _get_int("artwork_cache_files"), + "missing_artwork": _get_int("artwork_cache_missing"), + "total_requests": _get_int("artwork_cache_total_requests"), + "updated_at": get_setting("artwork_cache_updated_at"), + } + + def update_request_cache_title( request_id: int, title: str, year: Optional[int] = None ) -> None: @@ -1030,6 +1161,39 @@ def get_request_cache_payloads(limit: int = 200, offset: int = 0) -> list[Dict[s return results +def get_request_cache_payloads_missing(limit: int = 200, offset: int = 0) -> list[Dict[str, Any]]: + limit = max(1, min(limit, 1000)) + offset = max(0, offset) + with _connect() as conn: + rows = conn.execute( + """ + SELECT rc.request_id, rc.payload_json + FROM requests_cache rc + JOIN artwork_cache_status acs + ON rc.request_id = acs.request_id + WHERE ( + (acs.poster_path IS NULL AND acs.has_tmdb = 1) + OR (acs.poster_path IS NOT NULL AND acs.poster_cached = 0) + OR (acs.backdrop_path IS NULL AND acs.has_tmdb = 1) + OR (acs.backdrop_path IS NOT NULL AND acs.backdrop_cached = 0) + ) + ORDER BY rc.request_id ASC + LIMIT ? OFFSET ? + """, + (limit, offset), + ).fetchall() + results: list[Dict[str, Any]] = [] + for row in rows: + payload = None + if row[1]: + try: + payload = json.loads(row[1]) + except json.JSONDecodeError: + payload = None + results.append({"request_id": row[0], "payload": payload}) + return results + + def get_cached_requests_since(since_iso: str) -> list[Dict[str, Any]]: with _connect() as conn: rows = conn.execute( diff --git a/backend/app/routers/admin.py b/backend/app/routers/admin.py index 65f8943..2e2b7eb 100644 --- a/backend/app/routers/admin.py +++ b/backend/app/routers/admin.py @@ -10,7 +10,7 @@ from ..db import ( get_all_users, get_request_cache_overview, get_request_cache_missing_titles, - get_request_cache_count, + get_request_cache_stats, get_settings_overrides, get_user_by_username, set_setting, @@ -39,22 +39,6 @@ from ..routers.branding import save_branding_image router = APIRouter(prefix="/admin", tags=["admin"], dependencies=[Depends(require_admin)]) logger = logging.getLogger(__name__) -def _get_artwork_cache_stats() -> Dict[str, int]: - cache_root = os.path.join(os.getcwd(), "data", "artwork") - total_bytes = 0 - total_files = 0 - if not os.path.isdir(cache_root): - return {"cache_bytes": 0, "cache_files": 0} - for root, _, files in os.walk(cache_root): - for name in files: - path = os.path.join(root, name) - try: - total_bytes += os.path.getsize(path) - total_files += 1 - except OSError: - continue - return {"cache_bytes": total_bytes, "cache_files": total_files} - SENSITIVE_KEYS = { "jellyseerr_api_key", "jellyfin_api_key", @@ -312,12 +296,20 @@ async def requests_artwork_status() -> Dict[str, Any]: @router.get("/requests/artwork/summary") async def requests_artwork_summary() -> Dict[str, Any]: runtime = get_runtime_settings() + cache_mode = (runtime.artwork_cache_mode or "remote").lower() + stats = get_request_cache_stats() + if cache_mode != "cache": + stats["cache_bytes"] = 0 + stats["cache_files"] = 0 + stats["missing_artwork"] = 0 summary = { - "total_requests": get_request_cache_count(), - "missing_artwork": requests_router.get_artwork_cache_missing_count(), - "cache_mode": (runtime.artwork_cache_mode or "remote").lower(), + "cache_mode": cache_mode, + "cache_bytes": stats.get("cache_bytes", 0), + "cache_files": stats.get("cache_files", 0), + "missing_artwork": stats.get("missing_artwork", 0), + "total_requests": stats.get("total_requests", 0), + "updated_at": stats.get("updated_at"), } - summary.update(_get_artwork_cache_stats()) return {"status": "ok", "summary": summary} diff --git a/backend/app/routers/requests.py b/backend/app/routers/requests.py index deaa329..01a988f 100644 --- a/backend/app/routers/requests.py +++ b/backend/app/routers/requests.py @@ -3,6 +3,7 @@ import asyncio import httpx import json import logging +import os import time from urllib.parse import quote from datetime import datetime, timezone, timedelta @@ -30,11 +31,16 @@ from ..db import ( get_request_cache_last_updated, get_request_cache_count, get_request_cache_payloads, + get_request_cache_payloads_missing, repair_request_cache_titles, prune_duplicate_requests_cache, upsert_request_cache, + upsert_artwork_cache_status, + get_artwork_cache_missing_count, + get_artwork_cache_status_count, get_setting, set_setting, + update_artwork_cache_stats, cleanup_history, ) from ..models import Snapshot, TriageResult, RequestType @@ -266,22 +272,69 @@ def _artwork_missing_for_payload(payload: Dict[str, Any]) -> bool: return False -def get_artwork_cache_missing_count() -> int: - limit = 400 - offset = 0 - missing = 0 - while True: - batch = get_request_cache_payloads(limit=limit, offset=offset) - if not batch: - break - for row in batch: - payload = row.get("payload") - if not isinstance(payload, dict): +def _compute_cached_flags( + poster_path: Optional[str], + backdrop_path: Optional[str], + cache_mode: str, + poster_cached: Optional[bool] = None, + backdrop_cached: Optional[bool] = None, +) -> tuple[bool, bool]: + if cache_mode != "cache": + return True, True + poster = poster_cached + backdrop = backdrop_cached + if poster is None: + poster = bool(poster_path) and is_tmdb_cached(poster_path, "w185") and is_tmdb_cached( + poster_path, "w342" + ) + if backdrop is None: + backdrop = bool(backdrop_path) and is_tmdb_cached(backdrop_path, "w780") + return bool(poster), bool(backdrop) + + +def _upsert_artwork_status( + payload: Dict[str, Any], + cache_mode: str, + poster_cached: Optional[bool] = None, + backdrop_cached: Optional[bool] = None, +) -> None: + parsed = _parse_request_payload(payload) + request_id = parsed.get("request_id") + if not isinstance(request_id, int): + return + tmdb_id, media_type = _extract_tmdb_lookup(payload) + poster_path, backdrop_path = _extract_artwork_paths(payload) + has_tmdb = bool(tmdb_id and media_type) + poster_cached_flag, backdrop_cached_flag = _compute_cached_flags( + poster_path, backdrop_path, cache_mode, poster_cached, backdrop_cached + ) + upsert_artwork_cache_status( + request_id=request_id, + tmdb_id=tmdb_id, + media_type=media_type, + poster_path=poster_path, + backdrop_path=backdrop_path, + has_tmdb=has_tmdb, + poster_cached=poster_cached_flag, + backdrop_cached=backdrop_cached_flag, + ) + + +def _collect_artwork_cache_disk_stats() -> tuple[int, int]: + cache_root = os.path.join(os.getcwd(), "data", "artwork") + total_bytes = 0 + total_files = 0 + if not os.path.isdir(cache_root): + return 0, 0 + for root, _, files in os.walk(cache_root): + for name in files: + path = os.path.join(root, name) + try: + total_bytes += os.path.getsize(path) + total_files += 1 + except OSError: continue - if _artwork_missing_for_payload(payload): - missing += 1 - offset += limit - return missing + return total_bytes, total_files async def _get_request_details(client: JellyseerrClient, request_id: int) -> Optional[Dict[str, Any]]: @@ -528,6 +581,8 @@ async def _sync_all_requests(client: JellyseerrClient) -> int: updated_at=payload.get("updated_at"), payload_json=payload_json, ) + if isinstance(item, dict): + _upsert_artwork_status(item, cache_mode) stored += 1 _sync_state["stored"] = stored if len(items) < take: @@ -547,6 +602,11 @@ async def _sync_all_requests(client: JellyseerrClient) -> int: ) set_setting(_sync_last_key, datetime.now(timezone.utc).isoformat()) _refresh_recent_cache_from_db() + if cache_mode == "cache": + update_artwork_cache_stats( + missing_count=get_artwork_cache_missing_count(), + total_requests=get_request_cache_count(), + ) return stored @@ -658,6 +718,8 @@ async def _sync_delta_requests(client: JellyseerrClient) -> int: updated_at=payload.get("updated_at"), payload_json=payload_json, ) + if isinstance(item, dict): + _upsert_artwork_status(item, cache_mode) stored += 1 page_changed = True _sync_state["stored"] = stored @@ -685,11 +747,19 @@ async def _sync_delta_requests(client: JellyseerrClient) -> int: ) set_setting(_sync_last_key, datetime.now(timezone.utc).isoformat()) _refresh_recent_cache_from_db() + if cache_mode == "cache": + update_artwork_cache_stats( + missing_count=get_artwork_cache_missing_count(), + total_requests=get_request_cache_count(), + ) return stored async def _prefetch_artwork_cache( - client: JellyseerrClient, only_missing: bool = False, total: Optional[int] = None + client: JellyseerrClient, + only_missing: bool = False, + total: Optional[int] = None, + use_missing_query: bool = False, ) -> None: runtime = get_runtime_settings() cache_mode = (runtime.artwork_cache_mode or "remote").lower() @@ -731,7 +801,10 @@ async def _prefetch_artwork_cache( limit = 200 processed = 0 while True: - batch = get_request_cache_payloads(limit=limit, offset=offset) + if use_missing_query: + batch = get_request_cache_payloads_missing(limit=limit, offset=offset) + else: + batch = get_request_cache_payloads(limit=limit, offset=offset) if not batch: break for row in batch: @@ -740,7 +813,7 @@ async def _prefetch_artwork_cache( if not only_missing: processed += 1 continue - if only_missing and not _artwork_missing_for_payload(payload): + if only_missing and not use_missing_query and not _artwork_missing_for_payload(payload): continue poster_path, backdrop_path = _extract_artwork_paths(payload) tmdb_id, media_type = _extract_tmdb_lookup(payload) @@ -774,17 +847,26 @@ async def _prefetch_artwork_cache( updated_at=parsed.get("updated_at"), payload_json=json.dumps(payload, ensure_ascii=True), ) + poster_cached_flag = False + backdrop_cached_flag = False if poster_path: try: - await cache_tmdb_image(poster_path, "w185") - await cache_tmdb_image(poster_path, "w342") + poster_cached_flag = bool( + await cache_tmdb_image(poster_path, "w185") + ) and bool(await cache_tmdb_image(poster_path, "w342")) except httpx.HTTPError: - pass + poster_cached_flag = False if backdrop_path: try: - await cache_tmdb_image(backdrop_path, "w780") + backdrop_cached_flag = bool(await cache_tmdb_image(backdrop_path, "w780")) except httpx.HTTPError: - pass + backdrop_cached_flag = False + _upsert_artwork_status( + payload, + cache_mode, + poster_cached=poster_cached_flag if poster_path else None, + backdrop_cached=backdrop_cached_flag if backdrop_path else None, + ) processed += 1 if processed % 25 == 0: _artwork_prefetch_state.update( @@ -792,6 +874,15 @@ async def _prefetch_artwork_cache( ) offset += limit + total_requests = get_request_cache_count() + missing_count = get_artwork_cache_missing_count() + cache_bytes, cache_files = _collect_artwork_cache_disk_stats() + update_artwork_cache_stats( + cache_bytes=cache_bytes, + cache_files=cache_files, + missing_count=missing_count, + total_requests=total_requests, + ) _artwork_prefetch_state.update( { "status": "completed", @@ -809,17 +900,21 @@ async def start_artwork_prefetch( if _artwork_prefetch_task and not _artwork_prefetch_task.done(): return dict(_artwork_prefetch_state) client = JellyseerrClient(base_url, api_key) - total = get_request_cache_count() - if only_missing: + status_count = get_artwork_cache_status_count() + total_requests = get_request_cache_count() + use_missing_query = only_missing and status_count >= total_requests and total_requests > 0 + if only_missing and use_missing_query: total = get_artwork_cache_missing_count() + else: + total = total_requests _artwork_prefetch_state.update( { "status": "running", "processed": 0, "total": total, - "message": "Starting missing artwork prefetch" - if only_missing - else "Starting artwork prefetch", + "message": "Seeding artwork cache status" + if only_missing and not use_missing_query + else ("Starting missing artwork prefetch" if only_missing else "Starting artwork prefetch"), "only_missing": only_missing, "started_at": datetime.now(timezone.utc).isoformat(), "finished_at": None, @@ -838,7 +933,12 @@ async def start_artwork_prefetch( async def _runner() -> None: try: - await _prefetch_artwork_cache(client, only_missing=only_missing, total=total) + await _prefetch_artwork_cache( + client, + only_missing=only_missing, + total=total, + use_missing_query=use_missing_query, + ) except Exception: logger.exception("Artwork prefetch failed") _artwork_prefetch_state.update(