Add enclosure health details (PSUs, fans, temps, voltages) via SES
Parse sg_ses --page=0x02 output to surface enclosure-level health data including power supply status, fan RPMs, temperature sensors, and voltage rails. Failed/critical components are reflected in the overview totals and shown as status pills in the enclosure card header with an expandable detail panel.
This commit is contained in:
@@ -5,12 +5,13 @@ from fastapi import APIRouter
|
||||
|
||||
from models.schemas import (
|
||||
DriveHealthSummary,
|
||||
EnclosureHealth,
|
||||
EnclosureWithDrives,
|
||||
HostDrive,
|
||||
Overview,
|
||||
SlotWithDrive,
|
||||
)
|
||||
from services.enclosure import discover_enclosures, list_slots
|
||||
from services.enclosure import discover_enclosures, get_enclosure_status, list_slots
|
||||
from services.host import get_host_drives
|
||||
from services.smart import get_smart_data
|
||||
from services.zfs import get_zfs_pool_map
|
||||
@@ -26,13 +27,24 @@ async def get_overview():
|
||||
enclosures_raw = discover_enclosures()
|
||||
pool_map = await get_zfs_pool_map()
|
||||
|
||||
# Fetch SES health data for all enclosures concurrently
|
||||
async def _get_health(enc):
|
||||
if enc.get("sg_device"):
|
||||
return await get_enclosure_status(enc["sg_device"])
|
||||
return None
|
||||
|
||||
health_results = await asyncio.gather(
|
||||
*[_get_health(enc) for enc in enclosures_raw],
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
||||
enc_results: list[EnclosureWithDrives] = []
|
||||
total_drives = 0
|
||||
warnings = 0
|
||||
errors = 0
|
||||
all_healthy = True
|
||||
|
||||
for enc in enclosures_raw:
|
||||
for enc_idx, enc in enumerate(enclosures_raw):
|
||||
slots_raw = list_slots(enc["id"])
|
||||
|
||||
# Gather SMART data for all populated slots concurrently
|
||||
@@ -110,6 +122,20 @@ async def get_overview():
|
||||
drive=drive_summary,
|
||||
))
|
||||
|
||||
# Attach enclosure health from SES
|
||||
health_data = health_results[enc_idx]
|
||||
enc_health = None
|
||||
if isinstance(health_data, dict):
|
||||
enc_health = EnclosureHealth(**health_data)
|
||||
# Count enclosure-level issues
|
||||
if enc_health.overall_status == "CRITICAL":
|
||||
errors += 1
|
||||
all_healthy = False
|
||||
elif enc_health.overall_status == "WARNING":
|
||||
warnings += 1
|
||||
elif isinstance(health_data, Exception):
|
||||
logger.warning("SES health failed for %s: %s", enc["id"], health_data)
|
||||
|
||||
enc_results.append(EnclosureWithDrives(
|
||||
id=enc["id"],
|
||||
sg_device=enc.get("sg_device"),
|
||||
@@ -119,6 +145,7 @@ async def get_overview():
|
||||
total_slots=enc["total_slots"],
|
||||
populated_slots=enc["populated_slots"],
|
||||
slots=slots_out,
|
||||
health=enc_health,
|
||||
))
|
||||
|
||||
# Host drives (non-enclosure)
|
||||
|
||||
Reference in New Issue
Block a user