Improve SQLite batching and diagnostics visibility

This commit is contained in:
2026-03-03 15:03:23 +13:00
parent e582ff4ef7
commit dda17a20a5
10 changed files with 667 additions and 188 deletions

View File

@@ -26,7 +26,7 @@ from ..db import (
get_cached_requests,
get_cached_requests_since,
get_cached_request_by_media_id,
get_request_cache_by_id,
get_request_cache_lookup,
get_request_cache_payload,
get_request_cache_last_updated,
get_request_cache_count,
@@ -35,7 +35,9 @@ from ..db import (
repair_request_cache_titles,
prune_duplicate_requests_cache,
upsert_request_cache,
upsert_request_cache_many,
upsert_artwork_cache_status,
upsert_artwork_cache_status_many,
get_artwork_cache_missing_count,
get_artwork_cache_status_count,
get_setting,
@@ -411,26 +413,55 @@ def _upsert_artwork_status(
poster_cached: Optional[bool] = None,
backdrop_cached: Optional[bool] = None,
) -> None:
record = _build_artwork_status_record(payload, cache_mode, poster_cached, backdrop_cached)
if not record:
return
upsert_artwork_cache_status(**record)
def _build_request_cache_record(payload: Dict[str, Any], request_payload: Dict[str, Any]) -> Dict[str, Any]:
return {
"request_id": payload.get("request_id"),
"media_id": payload.get("media_id"),
"media_type": payload.get("media_type"),
"status": payload.get("status"),
"title": payload.get("title"),
"year": payload.get("year"),
"requested_by": payload.get("requested_by"),
"requested_by_norm": payload.get("requested_by_norm"),
"requested_by_id": payload.get("requested_by_id"),
"created_at": payload.get("created_at"),
"updated_at": payload.get("updated_at"),
"payload_json": json.dumps(request_payload, ensure_ascii=True),
}
def _build_artwork_status_record(
payload: Dict[str, Any],
cache_mode: str,
poster_cached: Optional[bool] = None,
backdrop_cached: Optional[bool] = None,
) -> Optional[Dict[str, Any]]:
parsed = _parse_request_payload(payload)
request_id = parsed.get("request_id")
if not isinstance(request_id, int):
return
return None
tmdb_id, media_type = _extract_tmdb_lookup(payload)
poster_path, backdrop_path = _extract_artwork_paths(payload)
has_tmdb = bool(tmdb_id and media_type)
poster_cached_flag, backdrop_cached_flag = _compute_cached_flags(
poster_path, backdrop_path, cache_mode, poster_cached, backdrop_cached
)
upsert_artwork_cache_status(
request_id=request_id,
tmdb_id=tmdb_id,
media_type=media_type,
poster_path=poster_path,
backdrop_path=backdrop_path,
has_tmdb=has_tmdb,
poster_cached=poster_cached_flag,
backdrop_cached=backdrop_cached_flag,
)
return {
"request_id": request_id,
"tmdb_id": tmdb_id,
"media_type": media_type,
"poster_path": poster_path,
"backdrop_path": backdrop_path,
"has_tmdb": has_tmdb,
"poster_cached": poster_cached_flag,
"backdrop_cached": backdrop_cached_flag,
}
def _collect_artwork_cache_disk_stats() -> tuple[int, int]:
@@ -631,6 +662,16 @@ async def _sync_all_requests(client: JellyseerrClient) -> int:
if not isinstance(items, list) or not items:
logger.info("Seerr sync completed: no more results at skip=%s", skip)
break
page_request_ids = [
payload.get("request_id")
for item in items
if isinstance(item, dict)
for payload in [_parse_request_payload(item)]
if isinstance(payload.get("request_id"), int)
]
cached_by_request_id = get_request_cache_lookup(page_request_ids)
page_cache_records: list[Dict[str, Any]] = []
page_artwork_records: list[Dict[str, Any]] = []
for item in items:
if not isinstance(item, dict):
continue
@@ -638,10 +679,9 @@ async def _sync_all_requests(client: JellyseerrClient) -> int:
request_id = payload.get("request_id")
cached_title = None
if isinstance(request_id, int):
if not payload.get("title"):
cached = get_request_cache_by_id(request_id)
if cached and cached.get("title"):
cached_title = cached.get("title")
cached = cached_by_request_id.get(request_id)
if not payload.get("title") and cached and cached.get("title"):
cached_title = cached.get("title")
needs_details = (
not payload.get("title")
or not payload.get("media_id")
@@ -672,25 +712,17 @@ async def _sync_all_requests(client: JellyseerrClient) -> int:
payload["title"] = cached_title
if not isinstance(payload.get("request_id"), int):
continue
payload_json = json.dumps(item, ensure_ascii=True)
upsert_request_cache(
request_id=payload.get("request_id"),
media_id=payload.get("media_id"),
media_type=payload.get("media_type"),
status=payload.get("status"),
title=payload.get("title"),
year=payload.get("year"),
requested_by=payload.get("requested_by"),
requested_by_norm=payload.get("requested_by_norm"),
requested_by_id=payload.get("requested_by_id"),
created_at=payload.get("created_at"),
updated_at=payload.get("updated_at"),
payload_json=payload_json,
)
page_cache_records.append(_build_request_cache_record(payload, item))
if isinstance(item, dict):
_upsert_artwork_status(item, cache_mode)
artwork_record = _build_artwork_status_record(item, cache_mode)
if artwork_record:
page_artwork_records.append(artwork_record)
stored += 1
_sync_state["stored"] = stored
if page_cache_records:
upsert_request_cache_many(page_cache_records)
if page_artwork_records:
upsert_artwork_cache_status_many(page_artwork_records)
if len(items) < take:
logger.info("Seerr sync completed: stored=%s", stored)
break
@@ -749,6 +781,16 @@ async def _sync_delta_requests(client: JellyseerrClient) -> int:
if not isinstance(items, list) or not items:
logger.info("Seerr delta sync completed: no more results at skip=%s", skip)
break
page_request_ids = [
payload.get("request_id")
for item in items
if isinstance(item, dict)
for payload in [_parse_request_payload(item)]
if isinstance(payload.get("request_id"), int)
]
cached_by_request_id = get_request_cache_lookup(page_request_ids)
page_cache_records: list[Dict[str, Any]] = []
page_artwork_records: list[Dict[str, Any]] = []
page_changed = False
for item in items:
if not isinstance(item, dict):
@@ -756,7 +798,7 @@ async def _sync_delta_requests(client: JellyseerrClient) -> int:
payload = _parse_request_payload(item)
request_id = payload.get("request_id")
if isinstance(request_id, int):
cached = get_request_cache_by_id(request_id)
cached = cached_by_request_id.get(request_id)
incoming_updated = payload.get("updated_at")
cached_title = cached.get("title") if cached else None
if cached and incoming_updated and cached.get("updated_at") == incoming_updated and cached.get("title"):
@@ -790,26 +832,18 @@ async def _sync_delta_requests(client: JellyseerrClient) -> int:
payload["title"] = cached_title
if not isinstance(payload.get("request_id"), int):
continue
payload_json = json.dumps(item, ensure_ascii=True)
upsert_request_cache(
request_id=payload.get("request_id"),
media_id=payload.get("media_id"),
media_type=payload.get("media_type"),
status=payload.get("status"),
title=payload.get("title"),
year=payload.get("year"),
requested_by=payload.get("requested_by"),
requested_by_norm=payload.get("requested_by_norm"),
requested_by_id=payload.get("requested_by_id"),
created_at=payload.get("created_at"),
updated_at=payload.get("updated_at"),
payload_json=payload_json,
)
page_cache_records.append(_build_request_cache_record(payload, item))
if isinstance(item, dict):
_upsert_artwork_status(item, cache_mode)
artwork_record = _build_artwork_status_record(item, cache_mode)
if artwork_record:
page_artwork_records.append(artwork_record)
stored += 1
page_changed = True
_sync_state["stored"] = stored
if page_cache_records:
upsert_request_cache_many(page_cache_records)
if page_artwork_records:
upsert_artwork_cache_status_many(page_artwork_records)
if not page_changed:
unchanged_pages += 1
else:
@@ -894,6 +928,8 @@ async def _prefetch_artwork_cache(
batch = get_request_cache_payloads(limit=limit, offset=offset)
if not batch:
break
page_cache_records: list[Dict[str, Any]] = []
page_artwork_records: list[Dict[str, Any]] = []
for row in batch:
payload = row.get("payload")
if not isinstance(payload, dict):
@@ -921,20 +957,7 @@ async def _prefetch_artwork_cache(
parsed = _parse_request_payload(payload)
request_id = parsed.get("request_id")
if isinstance(request_id, int):
upsert_request_cache(
request_id=request_id,
media_id=parsed.get("media_id"),
media_type=parsed.get("media_type"),
status=parsed.get("status"),
title=parsed.get("title"),
year=parsed.get("year"),
requested_by=parsed.get("requested_by"),
requested_by_norm=parsed.get("requested_by_norm"),
requested_by_id=parsed.get("requested_by_id"),
created_at=parsed.get("created_at"),
updated_at=parsed.get("updated_at"),
payload_json=json.dumps(payload, ensure_ascii=True),
)
page_cache_records.append(_build_request_cache_record(parsed, payload))
poster_cached_flag = False
backdrop_cached_flag = False
if poster_path:
@@ -949,17 +972,23 @@ async def _prefetch_artwork_cache(
backdrop_cached_flag = bool(await cache_tmdb_image(backdrop_path, "w780"))
except httpx.HTTPError:
backdrop_cached_flag = False
_upsert_artwork_status(
artwork_record = _build_artwork_status_record(
payload,
cache_mode,
poster_cached=poster_cached_flag if poster_path else None,
backdrop_cached=backdrop_cached_flag if backdrop_path else None,
)
if artwork_record:
page_artwork_records.append(artwork_record)
processed += 1
if processed % 25 == 0:
_artwork_prefetch_state.update(
{"processed": processed, "message": f"Cached artwork for {processed} requests"}
)
if page_cache_records:
upsert_request_cache_many(page_cache_records)
if page_artwork_records:
upsert_artwork_cache_status_many(page_artwork_records)
offset += limit
total_requests = get_request_cache_count()