141 lines
4.5 KiB
Python
141 lines
4.5 KiB
Python
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
from contextlib import asynccontextmanager
|
|
from pathlib import Path
|
|
|
|
from fastapi import FastAPI
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.staticfiles import StaticFiles
|
|
|
|
from models.schemas import HealthCheck
|
|
from routers import drives, enclosures, leds, overview
|
|
from services.cache import cache_set, close_cache, init_cache, redis_available
|
|
from services.enclosure import discover_enclosures, list_slots
|
|
from services.smart import SMART_CACHE_TTL, _run_smartctl, sg_ses_available, smartctl_available
|
|
from services.zfs import get_zfs_pool_map
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
SMART_POLL_INTERVAL = int(os.environ.get("SMART_POLL_INTERVAL", "90"))
|
|
|
|
_tool_status: dict[str, bool] = {}
|
|
_poll_task: asyncio.Task | None = None
|
|
|
|
|
|
async def smart_poll_loop():
|
|
"""Pre-warm Redis with SMART data for all drives."""
|
|
await asyncio.sleep(2) # let app finish starting
|
|
while True:
|
|
try:
|
|
# Discover all enclosure devices
|
|
enclosures_raw = discover_enclosures()
|
|
devices: set[str] = set()
|
|
for enc in enclosures_raw:
|
|
for slot in list_slots(enc["id"]):
|
|
if slot["device"]:
|
|
devices.add(slot["device"])
|
|
|
|
# Discover host block devices via lsblk
|
|
try:
|
|
proc = await asyncio.create_subprocess_exec(
|
|
"lsblk", "-d", "-o", "NAME,TYPE", "-J",
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE,
|
|
)
|
|
stdout, _ = await proc.communicate()
|
|
if stdout:
|
|
for dev in json.loads(stdout).get("blockdevices", []):
|
|
if dev.get("type") == "disk":
|
|
name = dev.get("name", "")
|
|
if name and name not in devices:
|
|
devices.add(name)
|
|
except Exception as e:
|
|
logger.warning("lsblk discovery failed in poller: %s", e)
|
|
|
|
# Poll each drive and cache result
|
|
for device in sorted(devices):
|
|
try:
|
|
result = await _run_smartctl(device)
|
|
await cache_set(f"jbod:smart:{device}", result, SMART_CACHE_TTL)
|
|
except Exception as e:
|
|
logger.warning("Poll failed for %s: %s", device, e)
|
|
|
|
# Pre-warm ZFS map (bypasses cache by calling directly)
|
|
await get_zfs_pool_map()
|
|
|
|
logger.info("SMART poll complete: %d devices", len(devices))
|
|
except Exception as e:
|
|
logger.error("SMART poll loop error: %s", e)
|
|
await asyncio.sleep(SMART_POLL_INTERVAL)
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
global _poll_task
|
|
# Startup
|
|
_tool_status["smartctl"] = smartctl_available()
|
|
_tool_status["sg_ses"] = sg_ses_available()
|
|
|
|
if not _tool_status["smartctl"]:
|
|
logger.warning("smartctl not found — install smartmontools for SMART data")
|
|
if not _tool_status["sg_ses"]:
|
|
logger.warning("sg_ses not found — install sg3-utils for enclosure SES data")
|
|
if os.geteuid() != 0:
|
|
logger.warning("Not running as root — smartctl may fail on some devices")
|
|
|
|
await init_cache()
|
|
_tool_status["redis"] = redis_available()
|
|
|
|
if redis_available():
|
|
_poll_task = asyncio.create_task(smart_poll_loop())
|
|
|
|
yield
|
|
|
|
# Shutdown
|
|
if _poll_task is not None:
|
|
_poll_task.cancel()
|
|
try:
|
|
await _poll_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
await close_cache()
|
|
|
|
|
|
app = FastAPI(
|
|
title="JBOD Monitor",
|
|
description="Drive health monitoring for JBOD enclosures",
|
|
version="0.1.0",
|
|
lifespan=lifespan,
|
|
)
|
|
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
app.include_router(enclosures.router)
|
|
app.include_router(drives.router)
|
|
app.include_router(leds.router)
|
|
app.include_router(overview.router)
|
|
|
|
|
|
@app.get("/api/health", response_model=HealthCheck, tags=["health"])
|
|
async def health():
|
|
_tool_status["redis"] = redis_available()
|
|
return HealthCheck(status="ok", tools=_tool_status)
|
|
|
|
|
|
# Serve built frontend static files — mounted last so /api routes take priority.
|
|
STATIC_DIR = Path(__file__).parent / "static"
|
|
|
|
if STATIC_DIR.exists():
|
|
app.mount("/", StaticFiles(directory=STATIC_DIR, html=True), name="static")
|