Replace in-memory TTL cache with Redis
This commit is contained in:
111
main.py
111
main.py
@@ -1,15 +1,20 @@
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from models.schemas import HealthCheck
|
||||
from routers import drives, enclosures, leds, overview
|
||||
from services.smart import sg_ses_available, smartctl_available
|
||||
from services.cache import cache_set, close_cache, init_cache, redis_available
|
||||
from services.enclosure import discover_enclosures, list_slots
|
||||
from services.smart import SMART_CACHE_TTL, _run_smartctl, sg_ses_available, smartctl_available
|
||||
from services.zfs import get_zfs_pool_map
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
@@ -17,10 +22,96 @@ logging.basicConfig(
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SMART_POLL_INTERVAL = int(os.environ.get("SMART_POLL_INTERVAL", "90"))
|
||||
|
||||
_tool_status: dict[str, bool] = {}
|
||||
_poll_task: asyncio.Task | None = None
|
||||
|
||||
|
||||
async def smart_poll_loop():
|
||||
"""Pre-warm Redis with SMART data for all drives."""
|
||||
await asyncio.sleep(2) # let app finish starting
|
||||
while True:
|
||||
try:
|
||||
# Discover all enclosure devices
|
||||
enclosures_raw = discover_enclosures()
|
||||
devices: set[str] = set()
|
||||
for enc in enclosures_raw:
|
||||
for slot in list_slots(enc["id"]):
|
||||
if slot["device"]:
|
||||
devices.add(slot["device"])
|
||||
|
||||
# Discover host block devices via lsblk
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"lsblk", "-d", "-o", "NAME,TYPE", "-J",
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, _ = await proc.communicate()
|
||||
if stdout:
|
||||
for dev in json.loads(stdout).get("blockdevices", []):
|
||||
if dev.get("type") == "disk":
|
||||
name = dev.get("name", "")
|
||||
if name and name not in devices:
|
||||
devices.add(name)
|
||||
except Exception as e:
|
||||
logger.warning("lsblk discovery failed in poller: %s", e)
|
||||
|
||||
# Poll each drive and cache result
|
||||
for device in sorted(devices):
|
||||
try:
|
||||
result = await _run_smartctl(device)
|
||||
await cache_set(f"jbod:smart:{device}", result, SMART_CACHE_TTL)
|
||||
except Exception as e:
|
||||
logger.warning("Poll failed for %s: %s", device, e)
|
||||
|
||||
# Pre-warm ZFS map (bypasses cache by calling directly)
|
||||
await get_zfs_pool_map()
|
||||
|
||||
logger.info("SMART poll complete: %d devices", len(devices))
|
||||
except Exception as e:
|
||||
logger.error("SMART poll loop error: %s", e)
|
||||
await asyncio.sleep(SMART_POLL_INTERVAL)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
global _poll_task
|
||||
# Startup
|
||||
_tool_status["smartctl"] = smartctl_available()
|
||||
_tool_status["sg_ses"] = sg_ses_available()
|
||||
|
||||
if not _tool_status["smartctl"]:
|
||||
logger.warning("smartctl not found — install smartmontools for SMART data")
|
||||
if not _tool_status["sg_ses"]:
|
||||
logger.warning("sg_ses not found — install sg3-utils for enclosure SES data")
|
||||
if os.geteuid() != 0:
|
||||
logger.warning("Not running as root — smartctl may fail on some devices")
|
||||
|
||||
await init_cache()
|
||||
_tool_status["redis"] = redis_available()
|
||||
|
||||
if redis_available():
|
||||
_poll_task = asyncio.create_task(smart_poll_loop())
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
if _poll_task is not None:
|
||||
_poll_task.cancel()
|
||||
try:
|
||||
await _poll_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
await close_cache()
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="JBOD Monitor",
|
||||
description="Drive health monitoring for JBOD enclosures",
|
||||
version="0.1.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
app.add_middleware(
|
||||
@@ -35,24 +126,10 @@ app.include_router(drives.router)
|
||||
app.include_router(leds.router)
|
||||
app.include_router(overview.router)
|
||||
|
||||
_tool_status: dict[str, bool] = {}
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def check_dependencies():
|
||||
_tool_status["smartctl"] = smartctl_available()
|
||||
_tool_status["sg_ses"] = sg_ses_available()
|
||||
|
||||
if not _tool_status["smartctl"]:
|
||||
logger.warning("smartctl not found — install smartmontools for SMART data")
|
||||
if not _tool_status["sg_ses"]:
|
||||
logger.warning("sg_ses not found — install sg3-utils for enclosure SES data")
|
||||
if os.geteuid() != 0:
|
||||
logger.warning("Not running as root — smartctl may fail on some devices")
|
||||
|
||||
|
||||
@app.get("/api/health", response_model=HealthCheck, tags=["health"])
|
||||
async def health():
|
||||
_tool_status["redis"] = redis_available()
|
||||
return HealthCheck(status="ok", tools=_tool_status)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user