Improve cache stats performance (build 271261145)

This commit is contained in:
2026-01-27 11:46:50 +13:00
parent 3f51e24181
commit 7c97934bb9
4 changed files with 307 additions and 51 deletions

View File

@@ -3,6 +3,7 @@ import asyncio
import httpx
import json
import logging
import os
import time
from urllib.parse import quote
from datetime import datetime, timezone, timedelta
@@ -30,11 +31,16 @@ from ..db import (
get_request_cache_last_updated,
get_request_cache_count,
get_request_cache_payloads,
get_request_cache_payloads_missing,
repair_request_cache_titles,
prune_duplicate_requests_cache,
upsert_request_cache,
upsert_artwork_cache_status,
get_artwork_cache_missing_count,
get_artwork_cache_status_count,
get_setting,
set_setting,
update_artwork_cache_stats,
cleanup_history,
)
from ..models import Snapshot, TriageResult, RequestType
@@ -266,22 +272,69 @@ def _artwork_missing_for_payload(payload: Dict[str, Any]) -> bool:
return False
def get_artwork_cache_missing_count() -> int:
limit = 400
offset = 0
missing = 0
while True:
batch = get_request_cache_payloads(limit=limit, offset=offset)
if not batch:
break
for row in batch:
payload = row.get("payload")
if not isinstance(payload, dict):
def _compute_cached_flags(
poster_path: Optional[str],
backdrop_path: Optional[str],
cache_mode: str,
poster_cached: Optional[bool] = None,
backdrop_cached: Optional[bool] = None,
) -> tuple[bool, bool]:
if cache_mode != "cache":
return True, True
poster = poster_cached
backdrop = backdrop_cached
if poster is None:
poster = bool(poster_path) and is_tmdb_cached(poster_path, "w185") and is_tmdb_cached(
poster_path, "w342"
)
if backdrop is None:
backdrop = bool(backdrop_path) and is_tmdb_cached(backdrop_path, "w780")
return bool(poster), bool(backdrop)
def _upsert_artwork_status(
payload: Dict[str, Any],
cache_mode: str,
poster_cached: Optional[bool] = None,
backdrop_cached: Optional[bool] = None,
) -> None:
parsed = _parse_request_payload(payload)
request_id = parsed.get("request_id")
if not isinstance(request_id, int):
return
tmdb_id, media_type = _extract_tmdb_lookup(payload)
poster_path, backdrop_path = _extract_artwork_paths(payload)
has_tmdb = bool(tmdb_id and media_type)
poster_cached_flag, backdrop_cached_flag = _compute_cached_flags(
poster_path, backdrop_path, cache_mode, poster_cached, backdrop_cached
)
upsert_artwork_cache_status(
request_id=request_id,
tmdb_id=tmdb_id,
media_type=media_type,
poster_path=poster_path,
backdrop_path=backdrop_path,
has_tmdb=has_tmdb,
poster_cached=poster_cached_flag,
backdrop_cached=backdrop_cached_flag,
)
def _collect_artwork_cache_disk_stats() -> tuple[int, int]:
cache_root = os.path.join(os.getcwd(), "data", "artwork")
total_bytes = 0
total_files = 0
if not os.path.isdir(cache_root):
return 0, 0
for root, _, files in os.walk(cache_root):
for name in files:
path = os.path.join(root, name)
try:
total_bytes += os.path.getsize(path)
total_files += 1
except OSError:
continue
if _artwork_missing_for_payload(payload):
missing += 1
offset += limit
return missing
return total_bytes, total_files
async def _get_request_details(client: JellyseerrClient, request_id: int) -> Optional[Dict[str, Any]]:
@@ -528,6 +581,8 @@ async def _sync_all_requests(client: JellyseerrClient) -> int:
updated_at=payload.get("updated_at"),
payload_json=payload_json,
)
if isinstance(item, dict):
_upsert_artwork_status(item, cache_mode)
stored += 1
_sync_state["stored"] = stored
if len(items) < take:
@@ -547,6 +602,11 @@ async def _sync_all_requests(client: JellyseerrClient) -> int:
)
set_setting(_sync_last_key, datetime.now(timezone.utc).isoformat())
_refresh_recent_cache_from_db()
if cache_mode == "cache":
update_artwork_cache_stats(
missing_count=get_artwork_cache_missing_count(),
total_requests=get_request_cache_count(),
)
return stored
@@ -658,6 +718,8 @@ async def _sync_delta_requests(client: JellyseerrClient) -> int:
updated_at=payload.get("updated_at"),
payload_json=payload_json,
)
if isinstance(item, dict):
_upsert_artwork_status(item, cache_mode)
stored += 1
page_changed = True
_sync_state["stored"] = stored
@@ -685,11 +747,19 @@ async def _sync_delta_requests(client: JellyseerrClient) -> int:
)
set_setting(_sync_last_key, datetime.now(timezone.utc).isoformat())
_refresh_recent_cache_from_db()
if cache_mode == "cache":
update_artwork_cache_stats(
missing_count=get_artwork_cache_missing_count(),
total_requests=get_request_cache_count(),
)
return stored
async def _prefetch_artwork_cache(
client: JellyseerrClient, only_missing: bool = False, total: Optional[int] = None
client: JellyseerrClient,
only_missing: bool = False,
total: Optional[int] = None,
use_missing_query: bool = False,
) -> None:
runtime = get_runtime_settings()
cache_mode = (runtime.artwork_cache_mode or "remote").lower()
@@ -731,7 +801,10 @@ async def _prefetch_artwork_cache(
limit = 200
processed = 0
while True:
batch = get_request_cache_payloads(limit=limit, offset=offset)
if use_missing_query:
batch = get_request_cache_payloads_missing(limit=limit, offset=offset)
else:
batch = get_request_cache_payloads(limit=limit, offset=offset)
if not batch:
break
for row in batch:
@@ -740,7 +813,7 @@ async def _prefetch_artwork_cache(
if not only_missing:
processed += 1
continue
if only_missing and not _artwork_missing_for_payload(payload):
if only_missing and not use_missing_query and not _artwork_missing_for_payload(payload):
continue
poster_path, backdrop_path = _extract_artwork_paths(payload)
tmdb_id, media_type = _extract_tmdb_lookup(payload)
@@ -774,17 +847,26 @@ async def _prefetch_artwork_cache(
updated_at=parsed.get("updated_at"),
payload_json=json.dumps(payload, ensure_ascii=True),
)
poster_cached_flag = False
backdrop_cached_flag = False
if poster_path:
try:
await cache_tmdb_image(poster_path, "w185")
await cache_tmdb_image(poster_path, "w342")
poster_cached_flag = bool(
await cache_tmdb_image(poster_path, "w185")
) and bool(await cache_tmdb_image(poster_path, "w342"))
except httpx.HTTPError:
pass
poster_cached_flag = False
if backdrop_path:
try:
await cache_tmdb_image(backdrop_path, "w780")
backdrop_cached_flag = bool(await cache_tmdb_image(backdrop_path, "w780"))
except httpx.HTTPError:
pass
backdrop_cached_flag = False
_upsert_artwork_status(
payload,
cache_mode,
poster_cached=poster_cached_flag if poster_path else None,
backdrop_cached=backdrop_cached_flag if backdrop_path else None,
)
processed += 1
if processed % 25 == 0:
_artwork_prefetch_state.update(
@@ -792,6 +874,15 @@ async def _prefetch_artwork_cache(
)
offset += limit
total_requests = get_request_cache_count()
missing_count = get_artwork_cache_missing_count()
cache_bytes, cache_files = _collect_artwork_cache_disk_stats()
update_artwork_cache_stats(
cache_bytes=cache_bytes,
cache_files=cache_files,
missing_count=missing_count,
total_requests=total_requests,
)
_artwork_prefetch_state.update(
{
"status": "completed",
@@ -809,17 +900,21 @@ async def start_artwork_prefetch(
if _artwork_prefetch_task and not _artwork_prefetch_task.done():
return dict(_artwork_prefetch_state)
client = JellyseerrClient(base_url, api_key)
total = get_request_cache_count()
if only_missing:
status_count = get_artwork_cache_status_count()
total_requests = get_request_cache_count()
use_missing_query = only_missing and status_count >= total_requests and total_requests > 0
if only_missing and use_missing_query:
total = get_artwork_cache_missing_count()
else:
total = total_requests
_artwork_prefetch_state.update(
{
"status": "running",
"processed": 0,
"total": total,
"message": "Starting missing artwork prefetch"
if only_missing
else "Starting artwork prefetch",
"message": "Seeding artwork cache status"
if only_missing and not use_missing_query
else ("Starting missing artwork prefetch" if only_missing else "Starting artwork prefetch"),
"only_missing": only_missing,
"started_at": datetime.now(timezone.utc).isoformat(),
"finished_at": None,
@@ -838,7 +933,12 @@ async def start_artwork_prefetch(
async def _runner() -> None:
try:
await _prefetch_artwork_cache(client, only_missing=only_missing, total=total)
await _prefetch_artwork_cache(
client,
only_missing=only_missing,
total=total,
use_missing_query=use_missing_query,
)
except Exception:
logger.exception("Artwork prefetch failed")
_artwork_prefetch_state.update(