Harden request cache titles and cache-only reads
This commit is contained in:
@@ -24,6 +24,58 @@ def _connect() -> sqlite3.Connection:
|
||||
return sqlite3.connect(_db_path())
|
||||
|
||||
|
||||
def _normalize_title_value(title: Optional[str]) -> Optional[str]:
|
||||
if not isinstance(title, str):
|
||||
return None
|
||||
trimmed = title.strip()
|
||||
return trimmed if trimmed else None
|
||||
|
||||
|
||||
def _normalize_year_value(year: Optional[Any]) -> Optional[int]:
|
||||
if isinstance(year, int):
|
||||
return year
|
||||
if isinstance(year, str):
|
||||
trimmed = year.strip()
|
||||
if trimmed.isdigit():
|
||||
return int(trimmed)
|
||||
return None
|
||||
|
||||
|
||||
def _is_placeholder_title(title: Optional[str], request_id: Optional[int]) -> bool:
|
||||
if not isinstance(title, str):
|
||||
return True
|
||||
normalized = title.strip().lower()
|
||||
if not normalized:
|
||||
return True
|
||||
if normalized == "untitled":
|
||||
return True
|
||||
if request_id and normalized == f"request {request_id}":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _extract_title_year_from_payload(payload_json: Optional[str]) -> tuple[Optional[str], Optional[int]]:
|
||||
if not payload_json:
|
||||
return None, None
|
||||
try:
|
||||
payload = json.loads(payload_json)
|
||||
except json.JSONDecodeError:
|
||||
return None, None
|
||||
if not isinstance(payload, dict):
|
||||
return None, None
|
||||
media = payload.get("media") or {}
|
||||
title = None
|
||||
year = None
|
||||
if isinstance(media, dict):
|
||||
title = media.get("title") or media.get("name")
|
||||
year = media.get("year")
|
||||
if not title:
|
||||
title = payload.get("title") or payload.get("name")
|
||||
if year is None:
|
||||
year = payload.get("year")
|
||||
return _normalize_title_value(title), _normalize_year_value(year)
|
||||
|
||||
|
||||
def init_db() -> None:
|
||||
with _connect() as conn:
|
||||
conn.execute(
|
||||
@@ -603,7 +655,34 @@ def upsert_request_cache(
|
||||
updated_at: Optional[str],
|
||||
payload_json: str,
|
||||
) -> None:
|
||||
normalized_title = _normalize_title_value(title)
|
||||
normalized_year = _normalize_year_value(year)
|
||||
derived_title = None
|
||||
derived_year = None
|
||||
if not normalized_title or normalized_year is None:
|
||||
derived_title, derived_year = _extract_title_year_from_payload(payload_json)
|
||||
if _is_placeholder_title(normalized_title, request_id):
|
||||
normalized_title = None
|
||||
if derived_title and not normalized_title:
|
||||
normalized_title = derived_title
|
||||
if normalized_year is None and derived_year is not None:
|
||||
normalized_year = derived_year
|
||||
with _connect() as conn:
|
||||
existing_title = None
|
||||
existing_year = None
|
||||
if normalized_title is None or normalized_year is None:
|
||||
row = conn.execute(
|
||||
"SELECT title, year FROM requests_cache WHERE request_id = ?",
|
||||
(request_id,),
|
||||
).fetchone()
|
||||
if row:
|
||||
existing_title, existing_year = row[0], row[1]
|
||||
if _is_placeholder_title(existing_title, request_id):
|
||||
existing_title = None
|
||||
if normalized_title is None and existing_title:
|
||||
normalized_title = existing_title
|
||||
if normalized_year is None and existing_year is not None:
|
||||
normalized_year = existing_year
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO requests_cache (
|
||||
@@ -637,8 +716,8 @@ def upsert_request_cache(
|
||||
media_id,
|
||||
media_type,
|
||||
status,
|
||||
title,
|
||||
year,
|
||||
normalized_title,
|
||||
normalized_year,
|
||||
requested_by,
|
||||
requested_by_norm,
|
||||
created_at,
|
||||
@@ -741,22 +820,11 @@ def get_cached_requests(
|
||||
title = row[4]
|
||||
year = row[5]
|
||||
if (not title or not year) and row[8]:
|
||||
try:
|
||||
payload = json.loads(row[8])
|
||||
if isinstance(payload, dict):
|
||||
media = payload.get("media") or {}
|
||||
if not title:
|
||||
title = (
|
||||
(media.get("title") if isinstance(media, dict) else None)
|
||||
or (media.get("name") if isinstance(media, dict) else None)
|
||||
or payload.get("title")
|
||||
or payload.get("name")
|
||||
)
|
||||
if not year:
|
||||
year = media.get("year") if isinstance(media, dict) else None
|
||||
year = year or payload.get("year")
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
derived_title, derived_year = _extract_title_year_from_payload(row[8])
|
||||
if not title:
|
||||
title = derived_title
|
||||
if not year:
|
||||
year = derived_year
|
||||
results.append(
|
||||
{
|
||||
"request_id": row[0],
|
||||
@@ -788,18 +856,8 @@ def get_request_cache_overview(limit: int = 50) -> list[Dict[str, Any]]:
|
||||
for row in rows:
|
||||
title = row[4]
|
||||
if not title and row[9]:
|
||||
try:
|
||||
payload = json.loads(row[9])
|
||||
if isinstance(payload, dict):
|
||||
media = payload.get("media") or {}
|
||||
title = (
|
||||
(media.get("title") if isinstance(media, dict) else None)
|
||||
or (media.get("name") if isinstance(media, dict) else None)
|
||||
or payload.get("title")
|
||||
or payload.get("name")
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
title = row[4]
|
||||
derived_title, _ = _extract_title_year_from_payload(row[9])
|
||||
title = derived_title or row[4]
|
||||
results.append(
|
||||
{
|
||||
"request_id": row[0],
|
||||
@@ -825,7 +883,9 @@ def get_request_cache_count() -> int:
|
||||
def update_request_cache_title(
|
||||
request_id: int, title: str, year: Optional[int] = None
|
||||
) -> None:
|
||||
if not title:
|
||||
normalized_title = _normalize_title_value(title)
|
||||
normalized_year = _normalize_year_value(year)
|
||||
if not normalized_title:
|
||||
return
|
||||
with _connect() as conn:
|
||||
conn.execute(
|
||||
@@ -834,10 +894,38 @@ def update_request_cache_title(
|
||||
SET title = ?, year = COALESCE(?, year)
|
||||
WHERE request_id = ?
|
||||
""",
|
||||
(title, year, request_id),
|
||||
(normalized_title, normalized_year, request_id),
|
||||
)
|
||||
|
||||
|
||||
def repair_request_cache_titles() -> int:
|
||||
updated = 0
|
||||
with _connect() as conn:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT request_id, title, year, payload_json
|
||||
FROM requests_cache
|
||||
"""
|
||||
).fetchall()
|
||||
for row in rows:
|
||||
request_id, title, year, payload_json = row
|
||||
if not _is_placeholder_title(title, request_id):
|
||||
continue
|
||||
derived_title, derived_year = _extract_title_year_from_payload(payload_json)
|
||||
if not derived_title:
|
||||
continue
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE requests_cache
|
||||
SET title = ?, year = COALESCE(?, year)
|
||||
WHERE request_id = ?
|
||||
""",
|
||||
(derived_title, derived_year, request_id),
|
||||
)
|
||||
updated += 1
|
||||
return updated
|
||||
|
||||
|
||||
def prune_duplicate_requests_cache() -> int:
|
||||
with _connect() as conn:
|
||||
cursor = conn.execute(
|
||||
|
||||
Reference in New Issue
Block a user