Harden request cache titles and cache-only reads

This commit is contained in:
2026-01-25 19:38:31 +13:00
parent 22f90b7e07
commit 86ca3bdeb2
6 changed files with 163 additions and 77 deletions

View File

@@ -24,6 +24,58 @@ def _connect() -> sqlite3.Connection:
return sqlite3.connect(_db_path())
def _normalize_title_value(title: Optional[str]) -> Optional[str]:
if not isinstance(title, str):
return None
trimmed = title.strip()
return trimmed if trimmed else None
def _normalize_year_value(year: Optional[Any]) -> Optional[int]:
if isinstance(year, int):
return year
if isinstance(year, str):
trimmed = year.strip()
if trimmed.isdigit():
return int(trimmed)
return None
def _is_placeholder_title(title: Optional[str], request_id: Optional[int]) -> bool:
if not isinstance(title, str):
return True
normalized = title.strip().lower()
if not normalized:
return True
if normalized == "untitled":
return True
if request_id and normalized == f"request {request_id}":
return True
return False
def _extract_title_year_from_payload(payload_json: Optional[str]) -> tuple[Optional[str], Optional[int]]:
if not payload_json:
return None, None
try:
payload = json.loads(payload_json)
except json.JSONDecodeError:
return None, None
if not isinstance(payload, dict):
return None, None
media = payload.get("media") or {}
title = None
year = None
if isinstance(media, dict):
title = media.get("title") or media.get("name")
year = media.get("year")
if not title:
title = payload.get("title") or payload.get("name")
if year is None:
year = payload.get("year")
return _normalize_title_value(title), _normalize_year_value(year)
def init_db() -> None:
with _connect() as conn:
conn.execute(
@@ -603,7 +655,34 @@ def upsert_request_cache(
updated_at: Optional[str],
payload_json: str,
) -> None:
normalized_title = _normalize_title_value(title)
normalized_year = _normalize_year_value(year)
derived_title = None
derived_year = None
if not normalized_title or normalized_year is None:
derived_title, derived_year = _extract_title_year_from_payload(payload_json)
if _is_placeholder_title(normalized_title, request_id):
normalized_title = None
if derived_title and not normalized_title:
normalized_title = derived_title
if normalized_year is None and derived_year is not None:
normalized_year = derived_year
with _connect() as conn:
existing_title = None
existing_year = None
if normalized_title is None or normalized_year is None:
row = conn.execute(
"SELECT title, year FROM requests_cache WHERE request_id = ?",
(request_id,),
).fetchone()
if row:
existing_title, existing_year = row[0], row[1]
if _is_placeholder_title(existing_title, request_id):
existing_title = None
if normalized_title is None and existing_title:
normalized_title = existing_title
if normalized_year is None and existing_year is not None:
normalized_year = existing_year
conn.execute(
"""
INSERT INTO requests_cache (
@@ -637,8 +716,8 @@ def upsert_request_cache(
media_id,
media_type,
status,
title,
year,
normalized_title,
normalized_year,
requested_by,
requested_by_norm,
created_at,
@@ -741,22 +820,11 @@ def get_cached_requests(
title = row[4]
year = row[5]
if (not title or not year) and row[8]:
try:
payload = json.loads(row[8])
if isinstance(payload, dict):
media = payload.get("media") or {}
if not title:
title = (
(media.get("title") if isinstance(media, dict) else None)
or (media.get("name") if isinstance(media, dict) else None)
or payload.get("title")
or payload.get("name")
)
if not year:
year = media.get("year") if isinstance(media, dict) else None
year = year or payload.get("year")
except json.JSONDecodeError:
pass
derived_title, derived_year = _extract_title_year_from_payload(row[8])
if not title:
title = derived_title
if not year:
year = derived_year
results.append(
{
"request_id": row[0],
@@ -788,18 +856,8 @@ def get_request_cache_overview(limit: int = 50) -> list[Dict[str, Any]]:
for row in rows:
title = row[4]
if not title and row[9]:
try:
payload = json.loads(row[9])
if isinstance(payload, dict):
media = payload.get("media") or {}
title = (
(media.get("title") if isinstance(media, dict) else None)
or (media.get("name") if isinstance(media, dict) else None)
or payload.get("title")
or payload.get("name")
)
except json.JSONDecodeError:
title = row[4]
derived_title, _ = _extract_title_year_from_payload(row[9])
title = derived_title or row[4]
results.append(
{
"request_id": row[0],
@@ -825,7 +883,9 @@ def get_request_cache_count() -> int:
def update_request_cache_title(
request_id: int, title: str, year: Optional[int] = None
) -> None:
if not title:
normalized_title = _normalize_title_value(title)
normalized_year = _normalize_year_value(year)
if not normalized_title:
return
with _connect() as conn:
conn.execute(
@@ -834,10 +894,38 @@ def update_request_cache_title(
SET title = ?, year = COALESCE(?, year)
WHERE request_id = ?
""",
(title, year, request_id),
(normalized_title, normalized_year, request_id),
)
def repair_request_cache_titles() -> int:
updated = 0
with _connect() as conn:
rows = conn.execute(
"""
SELECT request_id, title, year, payload_json
FROM requests_cache
"""
).fetchall()
for row in rows:
request_id, title, year, payload_json = row
if not _is_placeholder_title(title, request_id):
continue
derived_title, derived_year = _extract_title_year_from_payload(payload_json)
if not derived_title:
continue
conn.execute(
"""
UPDATE requests_cache
SET title = ?, year = COALESCE(?, year)
WHERE request_id = ?
""",
(derived_title, derived_year, request_id),
)
updated += 1
return updated
def prune_duplicate_requests_cache() -> int:
with _connect() as conn:
cursor = conn.execute(