Replace in-memory TTL cache with Redis

This commit is contained in:
2026-03-07 18:45:15 +00:00
parent 0112875894
commit b11c1bdf98
10 changed files with 238 additions and 54 deletions

View File

@@ -1,29 +1,62 @@
import time
import json
import logging
import os
from typing import Any
import redis.asyncio as redis
class TTLCache:
"""Simple in-memory TTL cache."""
logger = logging.getLogger(__name__)
def __init__(self, ttl_seconds: int = 60):
self._ttl = ttl_seconds
self._store: dict[str, tuple[float, Any]] = {}
_redis: redis.Redis | None = None
def get(self, key: str) -> Any | None:
entry = self._store.get(key)
if entry is None:
async def init_cache() -> None:
"""Create Redis connection from environment variables."""
global _redis
host = os.environ.get("REDIS_HOST", "localhost")
port = int(os.environ.get("REDIS_PORT", "6379"))
db = int(os.environ.get("REDIS_DB", "0"))
try:
_redis = redis.Redis(host=host, port=port, db=db, decode_responses=True)
await _redis.ping()
logger.info("Redis connected at %s:%d/%d", host, port, db)
except Exception as e:
logger.warning("Redis connection failed: %s — running without cache", e)
_redis = None
async def close_cache() -> None:
"""Close Redis connection."""
global _redis
if _redis is not None:
await _redis.aclose()
_redis = None
def redis_available() -> bool:
"""Return whether Redis connection is live."""
return _redis is not None
async def cache_get(key: str) -> Any | None:
"""GET key from Redis, return deserialized value or None on miss/error."""
if _redis is None:
return None
try:
raw = await _redis.get(key)
if raw is None:
return None
ts, value = entry
if time.monotonic() - ts > self._ttl:
del self._store[key]
return None
return value
def set(self, key: str, value: Any) -> None:
self._store[key] = (time.monotonic(), value)
def clear(self) -> None:
self._store.clear()
return json.loads(raw)
except Exception as e:
logger.warning("Redis GET %s failed: %s", key, e)
return None
smart_cache = TTLCache(ttl_seconds=60)
async def cache_set(key: str, value: Any, ttl: int = 120) -> None:
"""SET key in Redis with expiry, silently catches errors."""
if _redis is None:
return
try:
await _redis.set(key, json.dumps(value), ex=ttl)
except Exception as e:
logger.warning("Redis SET %s failed: %s", key, e)

View File

@@ -65,12 +65,14 @@ async def get_host_drives() -> list[dict]:
smart_results = await asyncio.gather(*smart_tasks, return_exceptions=True)
results: list[dict] = []
for dev_info, smart in zip(host_devices, smart_results):
for dev_info, smart_result in zip(host_devices, smart_results):
name = dev_info["name"]
if isinstance(smart, Exception):
logger.warning("SMART query failed for host drive %s: %s", name, smart)
if isinstance(smart_result, Exception):
logger.warning("SMART query failed for host drive %s: %s", name, smart_result)
smart = {"device": name, "smart_supported": False}
else:
smart, _ = smart_result
# Compute health_status (same logic as overview.py)
healthy = smart.get("smart_healthy")

View File

@@ -1,10 +1,11 @@
import asyncio
import json
import logging
import os
import re
import shutil
from services.cache import smart_cache
from services.cache import cache_get, cache_set
logger = logging.getLogger(__name__)
@@ -17,6 +18,8 @@ ATTR_PENDING = 197
ATTR_UNCORRECTABLE = 198
ATTR_WEAR_LEVELING = 177 # SSD wear leveling
SMART_CACHE_TTL = int(os.environ.get("SMART_CACHE_TTL", "120"))
def smartctl_available() -> bool:
return shutil.which("smartctl") is not None
@@ -26,19 +29,22 @@ def sg_ses_available() -> bool:
return shutil.which("sg_ses") is not None
async def get_smart_data(device: str) -> dict:
"""Run smartctl -a -j against a device, with caching."""
async def get_smart_data(device: str) -> tuple[dict, bool]:
"""Run smartctl -a -j against a device, with caching.
Returns (data, cache_hit) tuple.
"""
# Sanitize device name: only allow alphanumeric and hyphens
if not re.match(r"^[a-zA-Z0-9\-]+$", device):
raise ValueError(f"Invalid device name: {device}")
cached = smart_cache.get(device)
cached = await cache_get(f"jbod:smart:{device}")
if cached is not None:
return cached
return (cached, True)
result = await _run_smartctl(device)
smart_cache.set(device, result)
return result
await cache_set(f"jbod:smart:{device}", result, SMART_CACHE_TTL)
return (result, False)
async def _run_smartctl(device: str) -> dict:

View File

@@ -4,11 +4,15 @@ import logging
import re
from pathlib import Path
from services.cache import cache_get, cache_set
logger = logging.getLogger(__name__)
# Allow overriding the zpool binary path via env (for bind-mounted host tools)
ZPOOL_BIN = os.environ.get("ZPOOL_BIN", "zpool")
ZFS_CACHE_TTL = 300
async def get_zfs_pool_map() -> dict[str, dict]:
"""Return a dict mapping device names to ZFS pool and vdev info.
@@ -16,6 +20,10 @@ async def get_zfs_pool_map() -> dict[str, dict]:
e.g. {"sda": {"pool": "tank", "vdev": "raidz2-0"},
"sdb": {"pool": "fast", "vdev": "mirror-0"}}
"""
cached = await cache_get("jbod:zfs_map")
if cached is not None:
return cached
pool_map = {}
try:
# When running in a container with pid:host, use nsenter to run
@@ -94,6 +102,8 @@ async def get_zfs_pool_map() -> dict[str, dict]:
pass
except FileNotFoundError:
logger.debug("zpool not available")
await cache_set("jbod:zfs_map", pool_map, ZFS_CACHE_TTL)
return pool_map