import asyncio import logging from fastapi import APIRouter, Response from models.schemas import ( DriveHealthSummary, EnclosureHealth, EnclosureWithDrives, HostDrive, Overview, SlotWithDrive, ) from services.enclosure import discover_enclosures, get_enclosure_status, list_slots from services.host import get_host_drives from services.smart import get_smart_data from services.zfs import get_zfs_pool_map logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/overview", tags=["overview"]) @router.get("", response_model=Overview) async def get_overview(response: Response): """Aggregate view of all enclosures, slots, and drive health.""" enclosures_raw = discover_enclosures() pool_map = await get_zfs_pool_map() # Fetch SES health data for all enclosures concurrently async def _get_health(enc): if enc.get("sg_device"): return await get_enclosure_status(enc["sg_device"]) return None health_results = await asyncio.gather( *[_get_health(enc) for enc in enclosures_raw], return_exceptions=True, ) enc_results: list[EnclosureWithDrives] = [] total_drives = 0 warnings = 0 errors = 0 all_healthy = True for enc_idx, enc in enumerate(enclosures_raw): slots_raw = list_slots(enc["id"]) # Gather SMART data for all populated slots concurrently populated = [(s, s["device"]) for s in slots_raw if s["populated"] and s["device"]] smart_tasks = [get_smart_data(dev) for _, dev in populated] smart_results = await asyncio.gather(*smart_tasks, return_exceptions=True) smart_map: dict[str, dict] = {} all_cache_hits = True any_lookups = False for (slot_info, dev), result in zip(populated, smart_results): if isinstance(result, Exception): logger.warning("SMART query failed for %s: %s", dev, result) smart_map[dev] = {"device": dev, "smart_supported": False} all_cache_hits = False else: data, hit = result smart_map[dev] = data any_lookups = True if not hit: all_cache_hits = False slots_out: list[SlotWithDrive] = [] for s in slots_raw: drive_summary = None if s["device"] and s["device"] in smart_map: sd = smart_map[s["device"]] total_drives += 1 healthy = sd.get("smart_healthy") if healthy is False: errors += 1 all_healthy = False elif healthy is None and sd.get("smart_supported", True): warnings += 1 # Check for concerning SMART values if sd.get("reallocated_sectors") and sd["reallocated_sectors"] > 0: warnings += 1 if sd.get("pending_sectors") and sd["pending_sectors"] > 0: warnings += 1 if sd.get("uncorrectable_errors") and sd["uncorrectable_errors"] > 0: warnings += 1 # Compute health_status for frontend realloc = sd.get("reallocated_sectors") or 0 pending = sd.get("pending_sectors") or 0 unc = sd.get("uncorrectable_errors") or 0 if healthy is False: health_status = "error" elif realloc > 0 or pending > 0 or unc > 0 or (healthy is None and sd.get("smart_supported", True)): health_status = "warning" else: health_status = "healthy" drive_summary = DriveHealthSummary( device=sd["device"], model=sd.get("model"), serial=sd.get("serial"), wwn=sd.get("wwn"), firmware=sd.get("firmware"), capacity_bytes=sd.get("capacity_bytes"), smart_healthy=healthy, smart_supported=sd.get("smart_supported", True), temperature_c=sd.get("temperature_c"), power_on_hours=sd.get("power_on_hours"), reallocated_sectors=sd.get("reallocated_sectors"), pending_sectors=sd.get("pending_sectors"), uncorrectable_errors=sd.get("uncorrectable_errors"), zfs_pool=pool_map.get(sd["device"], {}).get("pool"), zfs_vdev=pool_map.get(sd["device"], {}).get("vdev"), zfs_state=pool_map.get(sd["device"], {}).get("state"), health_status=health_status, ) elif s["populated"]: total_drives += 1 slots_out.append(SlotWithDrive( slot=s["slot"], populated=s["populated"], device=s["device"], drive=drive_summary, )) # Attach enclosure health from SES health_data = health_results[enc_idx] enc_health = None if isinstance(health_data, dict): enc_health = EnclosureHealth(**health_data) # Count enclosure-level issues if enc_health.overall_status == "CRITICAL": errors += 1 all_healthy = False elif enc_health.overall_status == "WARNING": warnings += 1 elif isinstance(health_data, Exception): logger.warning("SES health failed for %s: %s", enc["id"], health_data) enc_results.append(EnclosureWithDrives( id=enc["id"], sg_device=enc.get("sg_device"), vendor=enc["vendor"], model=enc["model"], revision=enc["revision"], total_slots=enc["total_slots"], populated_slots=enc["populated_slots"], slots=slots_out, health=enc_health, )) # Host drives (non-enclosure) host_drives_raw = await get_host_drives() host_drives_out: list[HostDrive] = [] for hd in host_drives_raw: total_drives += 1 hs = hd.get("health_status", "healthy") if hs == "error": errors += 1 all_healthy = False elif hs == "warning": warnings += 1 # Count physical drives behind RAID controllers for pd in hd.get("physical_drives", []): total_drives += 1 pd_hs = pd.get("health_status", "healthy") if pd_hs == "error": errors += 1 all_healthy = False elif pd_hs == "warning": warnings += 1 host_drives_out.append(HostDrive(**hd)) response.headers["X-Cache"] = "HIT" if (any_lookups and all_cache_hits) else "MISS" return Overview( healthy=all_healthy and errors == 0, drive_count=total_drives, warning_count=warnings, error_count=errors, enclosures=enc_results, host_drives=host_drives_out, )