Improve cache stats performance (build 271261145)

This commit is contained in:
2026-01-27 11:46:50 +13:00
parent 3f51e24181
commit 7c97934bb9
4 changed files with 307 additions and 51 deletions

View File

@@ -1 +1 @@
271261125
271261145

View File

@@ -179,6 +179,21 @@ def init_db() -> None:
)
"""
)
conn.execute(
"""
CREATE TABLE IF NOT EXISTS artwork_cache_status (
request_id INTEGER PRIMARY KEY,
tmdb_id INTEGER,
media_type TEXT,
poster_path TEXT,
backdrop_path TEXT,
has_tmdb INTEGER NOT NULL DEFAULT 0,
poster_cached INTEGER NOT NULL DEFAULT 0,
backdrop_cached INTEGER NOT NULL DEFAULT 0,
updated_at TEXT NOT NULL
)
"""
)
conn.execute(
"""
CREATE INDEX IF NOT EXISTS idx_requests_cache_created_at
@@ -191,6 +206,12 @@ def init_db() -> None:
ON requests_cache (requested_by_norm)
"""
)
conn.execute(
"""
CREATE INDEX IF NOT EXISTS idx_artwork_cache_status_updated_at
ON artwork_cache_status (updated_at)
"""
)
conn.execute(
"""
CREATE TABLE IF NOT EXISTS user_activity (
@@ -942,6 +963,116 @@ def get_request_cache_count() -> int:
return int(row[0] or 0)
def upsert_artwork_cache_status(
request_id: int,
tmdb_id: Optional[int],
media_type: Optional[str],
poster_path: Optional[str],
backdrop_path: Optional[str],
has_tmdb: bool,
poster_cached: bool,
backdrop_cached: bool,
) -> None:
updated_at = datetime.now(timezone.utc).isoformat()
with _connect() as conn:
conn.execute(
"""
INSERT INTO artwork_cache_status (
request_id,
tmdb_id,
media_type,
poster_path,
backdrop_path,
has_tmdb,
poster_cached,
backdrop_cached,
updated_at
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(request_id) DO UPDATE SET
tmdb_id = excluded.tmdb_id,
media_type = excluded.media_type,
poster_path = excluded.poster_path,
backdrop_path = excluded.backdrop_path,
has_tmdb = excluded.has_tmdb,
poster_cached = excluded.poster_cached,
backdrop_cached = excluded.backdrop_cached,
updated_at = excluded.updated_at
""",
(
request_id,
tmdb_id,
media_type,
poster_path,
backdrop_path,
1 if has_tmdb else 0,
1 if poster_cached else 0,
1 if backdrop_cached else 0,
updated_at,
),
)
def get_artwork_cache_status_count() -> int:
with _connect() as conn:
row = conn.execute("SELECT COUNT(*) FROM artwork_cache_status").fetchone()
return int(row[0] or 0)
def get_artwork_cache_missing_count() -> int:
with _connect() as conn:
row = conn.execute(
"""
SELECT COUNT(*)
FROM artwork_cache_status
WHERE (
(poster_path IS NULL AND has_tmdb = 1)
OR (poster_path IS NOT NULL AND poster_cached = 0)
OR (backdrop_path IS NULL AND has_tmdb = 1)
OR (backdrop_path IS NOT NULL AND backdrop_cached = 0)
)
"""
).fetchone()
return int(row[0] or 0)
def update_artwork_cache_stats(
cache_bytes: Optional[int] = None,
cache_files: Optional[int] = None,
missing_count: Optional[int] = None,
total_requests: Optional[int] = None,
) -> None:
updated_at = datetime.now(timezone.utc).isoformat()
if cache_bytes is not None:
set_setting("artwork_cache_bytes", str(int(cache_bytes)))
if cache_files is not None:
set_setting("artwork_cache_files", str(int(cache_files)))
if missing_count is not None:
set_setting("artwork_cache_missing", str(int(missing_count)))
if total_requests is not None:
set_setting("artwork_cache_total_requests", str(int(total_requests)))
set_setting("artwork_cache_updated_at", updated_at)
def get_artwork_cache_stats() -> Dict[str, Any]:
def _get_int(key: str) -> int:
value = get_setting(key)
if value is None:
return 0
try:
return int(value)
except (TypeError, ValueError):
return 0
return {
"cache_bytes": _get_int("artwork_cache_bytes"),
"cache_files": _get_int("artwork_cache_files"),
"missing_artwork": _get_int("artwork_cache_missing"),
"total_requests": _get_int("artwork_cache_total_requests"),
"updated_at": get_setting("artwork_cache_updated_at"),
}
def update_request_cache_title(
request_id: int, title: str, year: Optional[int] = None
) -> None:
@@ -1030,6 +1161,39 @@ def get_request_cache_payloads(limit: int = 200, offset: int = 0) -> list[Dict[s
return results
def get_request_cache_payloads_missing(limit: int = 200, offset: int = 0) -> list[Dict[str, Any]]:
limit = max(1, min(limit, 1000))
offset = max(0, offset)
with _connect() as conn:
rows = conn.execute(
"""
SELECT rc.request_id, rc.payload_json
FROM requests_cache rc
JOIN artwork_cache_status acs
ON rc.request_id = acs.request_id
WHERE (
(acs.poster_path IS NULL AND acs.has_tmdb = 1)
OR (acs.poster_path IS NOT NULL AND acs.poster_cached = 0)
OR (acs.backdrop_path IS NULL AND acs.has_tmdb = 1)
OR (acs.backdrop_path IS NOT NULL AND acs.backdrop_cached = 0)
)
ORDER BY rc.request_id ASC
LIMIT ? OFFSET ?
""",
(limit, offset),
).fetchall()
results: list[Dict[str, Any]] = []
for row in rows:
payload = None
if row[1]:
try:
payload = json.loads(row[1])
except json.JSONDecodeError:
payload = None
results.append({"request_id": row[0], "payload": payload})
return results
def get_cached_requests_since(since_iso: str) -> list[Dict[str, Any]]:
with _connect() as conn:
rows = conn.execute(

View File

@@ -10,7 +10,7 @@ from ..db import (
get_all_users,
get_request_cache_overview,
get_request_cache_missing_titles,
get_request_cache_count,
get_request_cache_stats,
get_settings_overrides,
get_user_by_username,
set_setting,
@@ -39,22 +39,6 @@ from ..routers.branding import save_branding_image
router = APIRouter(prefix="/admin", tags=["admin"], dependencies=[Depends(require_admin)])
logger = logging.getLogger(__name__)
def _get_artwork_cache_stats() -> Dict[str, int]:
cache_root = os.path.join(os.getcwd(), "data", "artwork")
total_bytes = 0
total_files = 0
if not os.path.isdir(cache_root):
return {"cache_bytes": 0, "cache_files": 0}
for root, _, files in os.walk(cache_root):
for name in files:
path = os.path.join(root, name)
try:
total_bytes += os.path.getsize(path)
total_files += 1
except OSError:
continue
return {"cache_bytes": total_bytes, "cache_files": total_files}
SENSITIVE_KEYS = {
"jellyseerr_api_key",
"jellyfin_api_key",
@@ -312,12 +296,20 @@ async def requests_artwork_status() -> Dict[str, Any]:
@router.get("/requests/artwork/summary")
async def requests_artwork_summary() -> Dict[str, Any]:
runtime = get_runtime_settings()
cache_mode = (runtime.artwork_cache_mode or "remote").lower()
stats = get_request_cache_stats()
if cache_mode != "cache":
stats["cache_bytes"] = 0
stats["cache_files"] = 0
stats["missing_artwork"] = 0
summary = {
"total_requests": get_request_cache_count(),
"missing_artwork": requests_router.get_artwork_cache_missing_count(),
"cache_mode": (runtime.artwork_cache_mode or "remote").lower(),
"cache_mode": cache_mode,
"cache_bytes": stats.get("cache_bytes", 0),
"cache_files": stats.get("cache_files", 0),
"missing_artwork": stats.get("missing_artwork", 0),
"total_requests": stats.get("total_requests", 0),
"updated_at": stats.get("updated_at"),
}
summary.update(_get_artwork_cache_stats())
return {"status": "ok", "summary": summary}

View File

@@ -3,6 +3,7 @@ import asyncio
import httpx
import json
import logging
import os
import time
from urllib.parse import quote
from datetime import datetime, timezone, timedelta
@@ -30,11 +31,16 @@ from ..db import (
get_request_cache_last_updated,
get_request_cache_count,
get_request_cache_payloads,
get_request_cache_payloads_missing,
repair_request_cache_titles,
prune_duplicate_requests_cache,
upsert_request_cache,
upsert_artwork_cache_status,
get_artwork_cache_missing_count,
get_artwork_cache_status_count,
get_setting,
set_setting,
update_artwork_cache_stats,
cleanup_history,
)
from ..models import Snapshot, TriageResult, RequestType
@@ -266,22 +272,69 @@ def _artwork_missing_for_payload(payload: Dict[str, Any]) -> bool:
return False
def get_artwork_cache_missing_count() -> int:
limit = 400
offset = 0
missing = 0
while True:
batch = get_request_cache_payloads(limit=limit, offset=offset)
if not batch:
break
for row in batch:
payload = row.get("payload")
if not isinstance(payload, dict):
def _compute_cached_flags(
poster_path: Optional[str],
backdrop_path: Optional[str],
cache_mode: str,
poster_cached: Optional[bool] = None,
backdrop_cached: Optional[bool] = None,
) -> tuple[bool, bool]:
if cache_mode != "cache":
return True, True
poster = poster_cached
backdrop = backdrop_cached
if poster is None:
poster = bool(poster_path) and is_tmdb_cached(poster_path, "w185") and is_tmdb_cached(
poster_path, "w342"
)
if backdrop is None:
backdrop = bool(backdrop_path) and is_tmdb_cached(backdrop_path, "w780")
return bool(poster), bool(backdrop)
def _upsert_artwork_status(
payload: Dict[str, Any],
cache_mode: str,
poster_cached: Optional[bool] = None,
backdrop_cached: Optional[bool] = None,
) -> None:
parsed = _parse_request_payload(payload)
request_id = parsed.get("request_id")
if not isinstance(request_id, int):
return
tmdb_id, media_type = _extract_tmdb_lookup(payload)
poster_path, backdrop_path = _extract_artwork_paths(payload)
has_tmdb = bool(tmdb_id and media_type)
poster_cached_flag, backdrop_cached_flag = _compute_cached_flags(
poster_path, backdrop_path, cache_mode, poster_cached, backdrop_cached
)
upsert_artwork_cache_status(
request_id=request_id,
tmdb_id=tmdb_id,
media_type=media_type,
poster_path=poster_path,
backdrop_path=backdrop_path,
has_tmdb=has_tmdb,
poster_cached=poster_cached_flag,
backdrop_cached=backdrop_cached_flag,
)
def _collect_artwork_cache_disk_stats() -> tuple[int, int]:
cache_root = os.path.join(os.getcwd(), "data", "artwork")
total_bytes = 0
total_files = 0
if not os.path.isdir(cache_root):
return 0, 0
for root, _, files in os.walk(cache_root):
for name in files:
path = os.path.join(root, name)
try:
total_bytes += os.path.getsize(path)
total_files += 1
except OSError:
continue
if _artwork_missing_for_payload(payload):
missing += 1
offset += limit
return missing
return total_bytes, total_files
async def _get_request_details(client: JellyseerrClient, request_id: int) -> Optional[Dict[str, Any]]:
@@ -528,6 +581,8 @@ async def _sync_all_requests(client: JellyseerrClient) -> int:
updated_at=payload.get("updated_at"),
payload_json=payload_json,
)
if isinstance(item, dict):
_upsert_artwork_status(item, cache_mode)
stored += 1
_sync_state["stored"] = stored
if len(items) < take:
@@ -547,6 +602,11 @@ async def _sync_all_requests(client: JellyseerrClient) -> int:
)
set_setting(_sync_last_key, datetime.now(timezone.utc).isoformat())
_refresh_recent_cache_from_db()
if cache_mode == "cache":
update_artwork_cache_stats(
missing_count=get_artwork_cache_missing_count(),
total_requests=get_request_cache_count(),
)
return stored
@@ -658,6 +718,8 @@ async def _sync_delta_requests(client: JellyseerrClient) -> int:
updated_at=payload.get("updated_at"),
payload_json=payload_json,
)
if isinstance(item, dict):
_upsert_artwork_status(item, cache_mode)
stored += 1
page_changed = True
_sync_state["stored"] = stored
@@ -685,11 +747,19 @@ async def _sync_delta_requests(client: JellyseerrClient) -> int:
)
set_setting(_sync_last_key, datetime.now(timezone.utc).isoformat())
_refresh_recent_cache_from_db()
if cache_mode == "cache":
update_artwork_cache_stats(
missing_count=get_artwork_cache_missing_count(),
total_requests=get_request_cache_count(),
)
return stored
async def _prefetch_artwork_cache(
client: JellyseerrClient, only_missing: bool = False, total: Optional[int] = None
client: JellyseerrClient,
only_missing: bool = False,
total: Optional[int] = None,
use_missing_query: bool = False,
) -> None:
runtime = get_runtime_settings()
cache_mode = (runtime.artwork_cache_mode or "remote").lower()
@@ -731,6 +801,9 @@ async def _prefetch_artwork_cache(
limit = 200
processed = 0
while True:
if use_missing_query:
batch = get_request_cache_payloads_missing(limit=limit, offset=offset)
else:
batch = get_request_cache_payloads(limit=limit, offset=offset)
if not batch:
break
@@ -740,7 +813,7 @@ async def _prefetch_artwork_cache(
if not only_missing:
processed += 1
continue
if only_missing and not _artwork_missing_for_payload(payload):
if only_missing and not use_missing_query and not _artwork_missing_for_payload(payload):
continue
poster_path, backdrop_path = _extract_artwork_paths(payload)
tmdb_id, media_type = _extract_tmdb_lookup(payload)
@@ -774,17 +847,26 @@ async def _prefetch_artwork_cache(
updated_at=parsed.get("updated_at"),
payload_json=json.dumps(payload, ensure_ascii=True),
)
poster_cached_flag = False
backdrop_cached_flag = False
if poster_path:
try:
poster_cached_flag = bool(
await cache_tmdb_image(poster_path, "w185")
await cache_tmdb_image(poster_path, "w342")
) and bool(await cache_tmdb_image(poster_path, "w342"))
except httpx.HTTPError:
pass
poster_cached_flag = False
if backdrop_path:
try:
await cache_tmdb_image(backdrop_path, "w780")
backdrop_cached_flag = bool(await cache_tmdb_image(backdrop_path, "w780"))
except httpx.HTTPError:
pass
backdrop_cached_flag = False
_upsert_artwork_status(
payload,
cache_mode,
poster_cached=poster_cached_flag if poster_path else None,
backdrop_cached=backdrop_cached_flag if backdrop_path else None,
)
processed += 1
if processed % 25 == 0:
_artwork_prefetch_state.update(
@@ -792,6 +874,15 @@ async def _prefetch_artwork_cache(
)
offset += limit
total_requests = get_request_cache_count()
missing_count = get_artwork_cache_missing_count()
cache_bytes, cache_files = _collect_artwork_cache_disk_stats()
update_artwork_cache_stats(
cache_bytes=cache_bytes,
cache_files=cache_files,
missing_count=missing_count,
total_requests=total_requests,
)
_artwork_prefetch_state.update(
{
"status": "completed",
@@ -809,17 +900,21 @@ async def start_artwork_prefetch(
if _artwork_prefetch_task and not _artwork_prefetch_task.done():
return dict(_artwork_prefetch_state)
client = JellyseerrClient(base_url, api_key)
total = get_request_cache_count()
if only_missing:
status_count = get_artwork_cache_status_count()
total_requests = get_request_cache_count()
use_missing_query = only_missing and status_count >= total_requests and total_requests > 0
if only_missing and use_missing_query:
total = get_artwork_cache_missing_count()
else:
total = total_requests
_artwork_prefetch_state.update(
{
"status": "running",
"processed": 0,
"total": total,
"message": "Starting missing artwork prefetch"
if only_missing
else "Starting artwork prefetch",
"message": "Seeding artwork cache status"
if only_missing and not use_missing_query
else ("Starting missing artwork prefetch" if only_missing else "Starting artwork prefetch"),
"only_missing": only_missing,
"started_at": datetime.now(timezone.utc).isoformat(),
"finished_at": None,
@@ -838,7 +933,12 @@ async def start_artwork_prefetch(
async def _runner() -> None:
try:
await _prefetch_artwork_cache(client, only_missing=only_missing, total=total)
await _prefetch_artwork_cache(
client,
only_missing=only_missing,
total=total,
use_missing_query=use_missing_query,
)
except Exception:
logger.exception("Artwork prefetch failed")
_artwork_prefetch_state.update(