Replace in-memory TTL cache with Redis

This commit is contained in:
2026-03-07 18:45:15 +00:00
parent 0112875894
commit b11c1bdf98
10 changed files with 238 additions and 54 deletions

111
main.py
View File

@@ -1,15 +1,20 @@
import asyncio
import json
import logging
import os
from contextlib import asynccontextmanager
from pathlib import Path
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from models.schemas import HealthCheck
from routers import drives, enclosures, leds, overview
from services.smart import sg_ses_available, smartctl_available
from services.cache import cache_set, close_cache, init_cache, redis_available
from services.enclosure import discover_enclosures, list_slots
from services.smart import SMART_CACHE_TTL, _run_smartctl, sg_ses_available, smartctl_available
from services.zfs import get_zfs_pool_map
logging.basicConfig(
level=logging.INFO,
@@ -17,10 +22,96 @@ logging.basicConfig(
)
logger = logging.getLogger(__name__)
SMART_POLL_INTERVAL = int(os.environ.get("SMART_POLL_INTERVAL", "90"))
_tool_status: dict[str, bool] = {}
_poll_task: asyncio.Task | None = None
async def smart_poll_loop():
"""Pre-warm Redis with SMART data for all drives."""
await asyncio.sleep(2) # let app finish starting
while True:
try:
# Discover all enclosure devices
enclosures_raw = discover_enclosures()
devices: set[str] = set()
for enc in enclosures_raw:
for slot in list_slots(enc["id"]):
if slot["device"]:
devices.add(slot["device"])
# Discover host block devices via lsblk
try:
proc = await asyncio.create_subprocess_exec(
"lsblk", "-d", "-o", "NAME,TYPE", "-J",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, _ = await proc.communicate()
if stdout:
for dev in json.loads(stdout).get("blockdevices", []):
if dev.get("type") == "disk":
name = dev.get("name", "")
if name and name not in devices:
devices.add(name)
except Exception as e:
logger.warning("lsblk discovery failed in poller: %s", e)
# Poll each drive and cache result
for device in sorted(devices):
try:
result = await _run_smartctl(device)
await cache_set(f"jbod:smart:{device}", result, SMART_CACHE_TTL)
except Exception as e:
logger.warning("Poll failed for %s: %s", device, e)
# Pre-warm ZFS map (bypasses cache by calling directly)
await get_zfs_pool_map()
logger.info("SMART poll complete: %d devices", len(devices))
except Exception as e:
logger.error("SMART poll loop error: %s", e)
await asyncio.sleep(SMART_POLL_INTERVAL)
@asynccontextmanager
async def lifespan(app: FastAPI):
global _poll_task
# Startup
_tool_status["smartctl"] = smartctl_available()
_tool_status["sg_ses"] = sg_ses_available()
if not _tool_status["smartctl"]:
logger.warning("smartctl not found — install smartmontools for SMART data")
if not _tool_status["sg_ses"]:
logger.warning("sg_ses not found — install sg3-utils for enclosure SES data")
if os.geteuid() != 0:
logger.warning("Not running as root — smartctl may fail on some devices")
await init_cache()
_tool_status["redis"] = redis_available()
if redis_available():
_poll_task = asyncio.create_task(smart_poll_loop())
yield
# Shutdown
if _poll_task is not None:
_poll_task.cancel()
try:
await _poll_task
except asyncio.CancelledError:
pass
await close_cache()
app = FastAPI(
title="JBOD Monitor",
description="Drive health monitoring for JBOD enclosures",
version="0.1.0",
lifespan=lifespan,
)
app.add_middleware(
@@ -35,24 +126,10 @@ app.include_router(drives.router)
app.include_router(leds.router)
app.include_router(overview.router)
_tool_status: dict[str, bool] = {}
@app.on_event("startup")
async def check_dependencies():
_tool_status["smartctl"] = smartctl_available()
_tool_status["sg_ses"] = sg_ses_available()
if not _tool_status["smartctl"]:
logger.warning("smartctl not found — install smartmontools for SMART data")
if not _tool_status["sg_ses"]:
logger.warning("sg_ses not found — install sg3-utils for enclosure SES data")
if os.geteuid() != 0:
logger.warning("Not running as root — smartctl may fail on some devices")
@app.get("/api/health", response_model=HealthCheck, tags=["health"])
async def health():
_tool_status["redis"] = redis_available()
return HealthCheck(status="ok", tools=_tool_status)