Improve cache stats performance (build 271261145)

This commit is contained in:
2026-01-27 11:46:50 +13:00
parent 3f51e24181
commit 7c97934bb9
4 changed files with 307 additions and 51 deletions

View File

@@ -1 +1 @@
271261125 271261145

View File

@@ -179,6 +179,21 @@ def init_db() -> None:
) )
""" """
) )
conn.execute(
"""
CREATE TABLE IF NOT EXISTS artwork_cache_status (
request_id INTEGER PRIMARY KEY,
tmdb_id INTEGER,
media_type TEXT,
poster_path TEXT,
backdrop_path TEXT,
has_tmdb INTEGER NOT NULL DEFAULT 0,
poster_cached INTEGER NOT NULL DEFAULT 0,
backdrop_cached INTEGER NOT NULL DEFAULT 0,
updated_at TEXT NOT NULL
)
"""
)
conn.execute( conn.execute(
""" """
CREATE INDEX IF NOT EXISTS idx_requests_cache_created_at CREATE INDEX IF NOT EXISTS idx_requests_cache_created_at
@@ -191,6 +206,12 @@ def init_db() -> None:
ON requests_cache (requested_by_norm) ON requests_cache (requested_by_norm)
""" """
) )
conn.execute(
"""
CREATE INDEX IF NOT EXISTS idx_artwork_cache_status_updated_at
ON artwork_cache_status (updated_at)
"""
)
conn.execute( conn.execute(
""" """
CREATE TABLE IF NOT EXISTS user_activity ( CREATE TABLE IF NOT EXISTS user_activity (
@@ -942,6 +963,116 @@ def get_request_cache_count() -> int:
return int(row[0] or 0) return int(row[0] or 0)
def upsert_artwork_cache_status(
request_id: int,
tmdb_id: Optional[int],
media_type: Optional[str],
poster_path: Optional[str],
backdrop_path: Optional[str],
has_tmdb: bool,
poster_cached: bool,
backdrop_cached: bool,
) -> None:
updated_at = datetime.now(timezone.utc).isoformat()
with _connect() as conn:
conn.execute(
"""
INSERT INTO artwork_cache_status (
request_id,
tmdb_id,
media_type,
poster_path,
backdrop_path,
has_tmdb,
poster_cached,
backdrop_cached,
updated_at
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(request_id) DO UPDATE SET
tmdb_id = excluded.tmdb_id,
media_type = excluded.media_type,
poster_path = excluded.poster_path,
backdrop_path = excluded.backdrop_path,
has_tmdb = excluded.has_tmdb,
poster_cached = excluded.poster_cached,
backdrop_cached = excluded.backdrop_cached,
updated_at = excluded.updated_at
""",
(
request_id,
tmdb_id,
media_type,
poster_path,
backdrop_path,
1 if has_tmdb else 0,
1 if poster_cached else 0,
1 if backdrop_cached else 0,
updated_at,
),
)
def get_artwork_cache_status_count() -> int:
with _connect() as conn:
row = conn.execute("SELECT COUNT(*) FROM artwork_cache_status").fetchone()
return int(row[0] or 0)
def get_artwork_cache_missing_count() -> int:
with _connect() as conn:
row = conn.execute(
"""
SELECT COUNT(*)
FROM artwork_cache_status
WHERE (
(poster_path IS NULL AND has_tmdb = 1)
OR (poster_path IS NOT NULL AND poster_cached = 0)
OR (backdrop_path IS NULL AND has_tmdb = 1)
OR (backdrop_path IS NOT NULL AND backdrop_cached = 0)
)
"""
).fetchone()
return int(row[0] or 0)
def update_artwork_cache_stats(
cache_bytes: Optional[int] = None,
cache_files: Optional[int] = None,
missing_count: Optional[int] = None,
total_requests: Optional[int] = None,
) -> None:
updated_at = datetime.now(timezone.utc).isoformat()
if cache_bytes is not None:
set_setting("artwork_cache_bytes", str(int(cache_bytes)))
if cache_files is not None:
set_setting("artwork_cache_files", str(int(cache_files)))
if missing_count is not None:
set_setting("artwork_cache_missing", str(int(missing_count)))
if total_requests is not None:
set_setting("artwork_cache_total_requests", str(int(total_requests)))
set_setting("artwork_cache_updated_at", updated_at)
def get_artwork_cache_stats() -> Dict[str, Any]:
def _get_int(key: str) -> int:
value = get_setting(key)
if value is None:
return 0
try:
return int(value)
except (TypeError, ValueError):
return 0
return {
"cache_bytes": _get_int("artwork_cache_bytes"),
"cache_files": _get_int("artwork_cache_files"),
"missing_artwork": _get_int("artwork_cache_missing"),
"total_requests": _get_int("artwork_cache_total_requests"),
"updated_at": get_setting("artwork_cache_updated_at"),
}
def update_request_cache_title( def update_request_cache_title(
request_id: int, title: str, year: Optional[int] = None request_id: int, title: str, year: Optional[int] = None
) -> None: ) -> None:
@@ -1030,6 +1161,39 @@ def get_request_cache_payloads(limit: int = 200, offset: int = 0) -> list[Dict[s
return results return results
def get_request_cache_payloads_missing(limit: int = 200, offset: int = 0) -> list[Dict[str, Any]]:
limit = max(1, min(limit, 1000))
offset = max(0, offset)
with _connect() as conn:
rows = conn.execute(
"""
SELECT rc.request_id, rc.payload_json
FROM requests_cache rc
JOIN artwork_cache_status acs
ON rc.request_id = acs.request_id
WHERE (
(acs.poster_path IS NULL AND acs.has_tmdb = 1)
OR (acs.poster_path IS NOT NULL AND acs.poster_cached = 0)
OR (acs.backdrop_path IS NULL AND acs.has_tmdb = 1)
OR (acs.backdrop_path IS NOT NULL AND acs.backdrop_cached = 0)
)
ORDER BY rc.request_id ASC
LIMIT ? OFFSET ?
""",
(limit, offset),
).fetchall()
results: list[Dict[str, Any]] = []
for row in rows:
payload = None
if row[1]:
try:
payload = json.loads(row[1])
except json.JSONDecodeError:
payload = None
results.append({"request_id": row[0], "payload": payload})
return results
def get_cached_requests_since(since_iso: str) -> list[Dict[str, Any]]: def get_cached_requests_since(since_iso: str) -> list[Dict[str, Any]]:
with _connect() as conn: with _connect() as conn:
rows = conn.execute( rows = conn.execute(

View File

@@ -10,7 +10,7 @@ from ..db import (
get_all_users, get_all_users,
get_request_cache_overview, get_request_cache_overview,
get_request_cache_missing_titles, get_request_cache_missing_titles,
get_request_cache_count, get_request_cache_stats,
get_settings_overrides, get_settings_overrides,
get_user_by_username, get_user_by_username,
set_setting, set_setting,
@@ -39,22 +39,6 @@ from ..routers.branding import save_branding_image
router = APIRouter(prefix="/admin", tags=["admin"], dependencies=[Depends(require_admin)]) router = APIRouter(prefix="/admin", tags=["admin"], dependencies=[Depends(require_admin)])
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _get_artwork_cache_stats() -> Dict[str, int]:
cache_root = os.path.join(os.getcwd(), "data", "artwork")
total_bytes = 0
total_files = 0
if not os.path.isdir(cache_root):
return {"cache_bytes": 0, "cache_files": 0}
for root, _, files in os.walk(cache_root):
for name in files:
path = os.path.join(root, name)
try:
total_bytes += os.path.getsize(path)
total_files += 1
except OSError:
continue
return {"cache_bytes": total_bytes, "cache_files": total_files}
SENSITIVE_KEYS = { SENSITIVE_KEYS = {
"jellyseerr_api_key", "jellyseerr_api_key",
"jellyfin_api_key", "jellyfin_api_key",
@@ -312,12 +296,20 @@ async def requests_artwork_status() -> Dict[str, Any]:
@router.get("/requests/artwork/summary") @router.get("/requests/artwork/summary")
async def requests_artwork_summary() -> Dict[str, Any]: async def requests_artwork_summary() -> Dict[str, Any]:
runtime = get_runtime_settings() runtime = get_runtime_settings()
cache_mode = (runtime.artwork_cache_mode or "remote").lower()
stats = get_request_cache_stats()
if cache_mode != "cache":
stats["cache_bytes"] = 0
stats["cache_files"] = 0
stats["missing_artwork"] = 0
summary = { summary = {
"total_requests": get_request_cache_count(), "cache_mode": cache_mode,
"missing_artwork": requests_router.get_artwork_cache_missing_count(), "cache_bytes": stats.get("cache_bytes", 0),
"cache_mode": (runtime.artwork_cache_mode or "remote").lower(), "cache_files": stats.get("cache_files", 0),
"missing_artwork": stats.get("missing_artwork", 0),
"total_requests": stats.get("total_requests", 0),
"updated_at": stats.get("updated_at"),
} }
summary.update(_get_artwork_cache_stats())
return {"status": "ok", "summary": summary} return {"status": "ok", "summary": summary}

View File

@@ -3,6 +3,7 @@ import asyncio
import httpx import httpx
import json import json
import logging import logging
import os
import time import time
from urllib.parse import quote from urllib.parse import quote
from datetime import datetime, timezone, timedelta from datetime import datetime, timezone, timedelta
@@ -30,11 +31,16 @@ from ..db import (
get_request_cache_last_updated, get_request_cache_last_updated,
get_request_cache_count, get_request_cache_count,
get_request_cache_payloads, get_request_cache_payloads,
get_request_cache_payloads_missing,
repair_request_cache_titles, repair_request_cache_titles,
prune_duplicate_requests_cache, prune_duplicate_requests_cache,
upsert_request_cache, upsert_request_cache,
upsert_artwork_cache_status,
get_artwork_cache_missing_count,
get_artwork_cache_status_count,
get_setting, get_setting,
set_setting, set_setting,
update_artwork_cache_stats,
cleanup_history, cleanup_history,
) )
from ..models import Snapshot, TriageResult, RequestType from ..models import Snapshot, TriageResult, RequestType
@@ -266,22 +272,69 @@ def _artwork_missing_for_payload(payload: Dict[str, Any]) -> bool:
return False return False
def get_artwork_cache_missing_count() -> int: def _compute_cached_flags(
limit = 400 poster_path: Optional[str],
offset = 0 backdrop_path: Optional[str],
missing = 0 cache_mode: str,
while True: poster_cached: Optional[bool] = None,
batch = get_request_cache_payloads(limit=limit, offset=offset) backdrop_cached: Optional[bool] = None,
if not batch: ) -> tuple[bool, bool]:
break if cache_mode != "cache":
for row in batch: return True, True
payload = row.get("payload") poster = poster_cached
if not isinstance(payload, dict): backdrop = backdrop_cached
if poster is None:
poster = bool(poster_path) and is_tmdb_cached(poster_path, "w185") and is_tmdb_cached(
poster_path, "w342"
)
if backdrop is None:
backdrop = bool(backdrop_path) and is_tmdb_cached(backdrop_path, "w780")
return bool(poster), bool(backdrop)
def _upsert_artwork_status(
payload: Dict[str, Any],
cache_mode: str,
poster_cached: Optional[bool] = None,
backdrop_cached: Optional[bool] = None,
) -> None:
parsed = _parse_request_payload(payload)
request_id = parsed.get("request_id")
if not isinstance(request_id, int):
return
tmdb_id, media_type = _extract_tmdb_lookup(payload)
poster_path, backdrop_path = _extract_artwork_paths(payload)
has_tmdb = bool(tmdb_id and media_type)
poster_cached_flag, backdrop_cached_flag = _compute_cached_flags(
poster_path, backdrop_path, cache_mode, poster_cached, backdrop_cached
)
upsert_artwork_cache_status(
request_id=request_id,
tmdb_id=tmdb_id,
media_type=media_type,
poster_path=poster_path,
backdrop_path=backdrop_path,
has_tmdb=has_tmdb,
poster_cached=poster_cached_flag,
backdrop_cached=backdrop_cached_flag,
)
def _collect_artwork_cache_disk_stats() -> tuple[int, int]:
cache_root = os.path.join(os.getcwd(), "data", "artwork")
total_bytes = 0
total_files = 0
if not os.path.isdir(cache_root):
return 0, 0
for root, _, files in os.walk(cache_root):
for name in files:
path = os.path.join(root, name)
try:
total_bytes += os.path.getsize(path)
total_files += 1
except OSError:
continue continue
if _artwork_missing_for_payload(payload): return total_bytes, total_files
missing += 1
offset += limit
return missing
async def _get_request_details(client: JellyseerrClient, request_id: int) -> Optional[Dict[str, Any]]: async def _get_request_details(client: JellyseerrClient, request_id: int) -> Optional[Dict[str, Any]]:
@@ -528,6 +581,8 @@ async def _sync_all_requests(client: JellyseerrClient) -> int:
updated_at=payload.get("updated_at"), updated_at=payload.get("updated_at"),
payload_json=payload_json, payload_json=payload_json,
) )
if isinstance(item, dict):
_upsert_artwork_status(item, cache_mode)
stored += 1 stored += 1
_sync_state["stored"] = stored _sync_state["stored"] = stored
if len(items) < take: if len(items) < take:
@@ -547,6 +602,11 @@ async def _sync_all_requests(client: JellyseerrClient) -> int:
) )
set_setting(_sync_last_key, datetime.now(timezone.utc).isoformat()) set_setting(_sync_last_key, datetime.now(timezone.utc).isoformat())
_refresh_recent_cache_from_db() _refresh_recent_cache_from_db()
if cache_mode == "cache":
update_artwork_cache_stats(
missing_count=get_artwork_cache_missing_count(),
total_requests=get_request_cache_count(),
)
return stored return stored
@@ -658,6 +718,8 @@ async def _sync_delta_requests(client: JellyseerrClient) -> int:
updated_at=payload.get("updated_at"), updated_at=payload.get("updated_at"),
payload_json=payload_json, payload_json=payload_json,
) )
if isinstance(item, dict):
_upsert_artwork_status(item, cache_mode)
stored += 1 stored += 1
page_changed = True page_changed = True
_sync_state["stored"] = stored _sync_state["stored"] = stored
@@ -685,11 +747,19 @@ async def _sync_delta_requests(client: JellyseerrClient) -> int:
) )
set_setting(_sync_last_key, datetime.now(timezone.utc).isoformat()) set_setting(_sync_last_key, datetime.now(timezone.utc).isoformat())
_refresh_recent_cache_from_db() _refresh_recent_cache_from_db()
if cache_mode == "cache":
update_artwork_cache_stats(
missing_count=get_artwork_cache_missing_count(),
total_requests=get_request_cache_count(),
)
return stored return stored
async def _prefetch_artwork_cache( async def _prefetch_artwork_cache(
client: JellyseerrClient, only_missing: bool = False, total: Optional[int] = None client: JellyseerrClient,
only_missing: bool = False,
total: Optional[int] = None,
use_missing_query: bool = False,
) -> None: ) -> None:
runtime = get_runtime_settings() runtime = get_runtime_settings()
cache_mode = (runtime.artwork_cache_mode or "remote").lower() cache_mode = (runtime.artwork_cache_mode or "remote").lower()
@@ -731,6 +801,9 @@ async def _prefetch_artwork_cache(
limit = 200 limit = 200
processed = 0 processed = 0
while True: while True:
if use_missing_query:
batch = get_request_cache_payloads_missing(limit=limit, offset=offset)
else:
batch = get_request_cache_payloads(limit=limit, offset=offset) batch = get_request_cache_payloads(limit=limit, offset=offset)
if not batch: if not batch:
break break
@@ -740,7 +813,7 @@ async def _prefetch_artwork_cache(
if not only_missing: if not only_missing:
processed += 1 processed += 1
continue continue
if only_missing and not _artwork_missing_for_payload(payload): if only_missing and not use_missing_query and not _artwork_missing_for_payload(payload):
continue continue
poster_path, backdrop_path = _extract_artwork_paths(payload) poster_path, backdrop_path = _extract_artwork_paths(payload)
tmdb_id, media_type = _extract_tmdb_lookup(payload) tmdb_id, media_type = _extract_tmdb_lookup(payload)
@@ -774,17 +847,26 @@ async def _prefetch_artwork_cache(
updated_at=parsed.get("updated_at"), updated_at=parsed.get("updated_at"),
payload_json=json.dumps(payload, ensure_ascii=True), payload_json=json.dumps(payload, ensure_ascii=True),
) )
poster_cached_flag = False
backdrop_cached_flag = False
if poster_path: if poster_path:
try: try:
poster_cached_flag = bool(
await cache_tmdb_image(poster_path, "w185") await cache_tmdb_image(poster_path, "w185")
await cache_tmdb_image(poster_path, "w342") ) and bool(await cache_tmdb_image(poster_path, "w342"))
except httpx.HTTPError: except httpx.HTTPError:
pass poster_cached_flag = False
if backdrop_path: if backdrop_path:
try: try:
await cache_tmdb_image(backdrop_path, "w780") backdrop_cached_flag = bool(await cache_tmdb_image(backdrop_path, "w780"))
except httpx.HTTPError: except httpx.HTTPError:
pass backdrop_cached_flag = False
_upsert_artwork_status(
payload,
cache_mode,
poster_cached=poster_cached_flag if poster_path else None,
backdrop_cached=backdrop_cached_flag if backdrop_path else None,
)
processed += 1 processed += 1
if processed % 25 == 0: if processed % 25 == 0:
_artwork_prefetch_state.update( _artwork_prefetch_state.update(
@@ -792,6 +874,15 @@ async def _prefetch_artwork_cache(
) )
offset += limit offset += limit
total_requests = get_request_cache_count()
missing_count = get_artwork_cache_missing_count()
cache_bytes, cache_files = _collect_artwork_cache_disk_stats()
update_artwork_cache_stats(
cache_bytes=cache_bytes,
cache_files=cache_files,
missing_count=missing_count,
total_requests=total_requests,
)
_artwork_prefetch_state.update( _artwork_prefetch_state.update(
{ {
"status": "completed", "status": "completed",
@@ -809,17 +900,21 @@ async def start_artwork_prefetch(
if _artwork_prefetch_task and not _artwork_prefetch_task.done(): if _artwork_prefetch_task and not _artwork_prefetch_task.done():
return dict(_artwork_prefetch_state) return dict(_artwork_prefetch_state)
client = JellyseerrClient(base_url, api_key) client = JellyseerrClient(base_url, api_key)
total = get_request_cache_count() status_count = get_artwork_cache_status_count()
if only_missing: total_requests = get_request_cache_count()
use_missing_query = only_missing and status_count >= total_requests and total_requests > 0
if only_missing and use_missing_query:
total = get_artwork_cache_missing_count() total = get_artwork_cache_missing_count()
else:
total = total_requests
_artwork_prefetch_state.update( _artwork_prefetch_state.update(
{ {
"status": "running", "status": "running",
"processed": 0, "processed": 0,
"total": total, "total": total,
"message": "Starting missing artwork prefetch" "message": "Seeding artwork cache status"
if only_missing if only_missing and not use_missing_query
else "Starting artwork prefetch", else ("Starting missing artwork prefetch" if only_missing else "Starting artwork prefetch"),
"only_missing": only_missing, "only_missing": only_missing,
"started_at": datetime.now(timezone.utc).isoformat(), "started_at": datetime.now(timezone.utc).isoformat(),
"finished_at": None, "finished_at": None,
@@ -838,7 +933,12 @@ async def start_artwork_prefetch(
async def _runner() -> None: async def _runner() -> None:
try: try:
await _prefetch_artwork_cache(client, only_missing=only_missing, total=total) await _prefetch_artwork_cache(
client,
only_missing=only_missing,
total=total,
use_missing_query=use_missing_query,
)
except Exception: except Exception:
logger.exception("Artwork prefetch failed") logger.exception("Artwork prefetch failed")
_artwork_prefetch_state.update( _artwork_prefetch_state.update(