Add enclosure health details (PSUs, fans, temps, voltages) via SES
Parse sg_ses --page=0x02 output to surface enclosure-level health data including power supply status, fan RPMs, temperature sensors, and voltage rails. Failed/critical components are reflected in the overview totals and shown as status pills in the enclosure card header with an expandable detail panel.
This commit is contained in:
@@ -709,7 +709,181 @@ function HostDrivesCard({ drives, onSelect, t }) {
|
||||
);
|
||||
}
|
||||
|
||||
function EnclosureHealthSummary({ health, t }) {
|
||||
if (!health) return null;
|
||||
|
||||
const statusColors = {
|
||||
CRITICAL: t.health.error,
|
||||
WARNING: t.health.warning,
|
||||
OK: t.health.healthy,
|
||||
};
|
||||
const sc = statusColors[health.overall_status] || statusColors.OK;
|
||||
|
||||
const failedPsus = health.psus.filter((p) => p.fail || p.status.toLowerCase() === "critical");
|
||||
const failedFans = health.fans.filter((f) => f.fail);
|
||||
const temps = health.temps.filter((s) => s.temperature_c != null);
|
||||
const tempMin = temps.length > 0 ? Math.min(...temps.map((s) => s.temperature_c)) : null;
|
||||
const tempMax = temps.length > 0 ? Math.max(...temps.map((s) => s.temperature_c)) : null;
|
||||
|
||||
return (
|
||||
<div style={{ display: "flex", alignItems: "center", gap: 8, flexWrap: "wrap", marginTop: 6 }}>
|
||||
{/* Overall badge */}
|
||||
<span style={{
|
||||
display: "inline-flex", alignItems: "center", gap: 5,
|
||||
padding: "2px 10px", borderRadius: 99,
|
||||
background: sc.bg, border: `1px solid ${sc.border}`,
|
||||
fontSize: 11, fontWeight: 700, color: sc.text, letterSpacing: 0.3,
|
||||
}}>
|
||||
<span style={{ width: 6, height: 6, borderRadius: "50%", background: sc.dot }} />
|
||||
{health.overall_status}
|
||||
</span>
|
||||
|
||||
{/* PSU pills */}
|
||||
{health.psus.map((psu) => {
|
||||
const bad = psu.fail || psu.status.toLowerCase() === "critical";
|
||||
const pc = bad ? t.health.error : t.health.healthy;
|
||||
return (
|
||||
<span key={psu.index} style={{
|
||||
display: "inline-flex", alignItems: "center", gap: 4,
|
||||
padding: "2px 8px", borderRadius: 99,
|
||||
background: pc.bg, border: `1px solid ${pc.border}`,
|
||||
fontSize: 10, fontWeight: 600, color: pc.text,
|
||||
}}>
|
||||
<span style={{ width: 5, height: 5, borderRadius: "50%", background: pc.dot }} />
|
||||
PSU {psu.index} {bad ? "FAIL" : "OK"}
|
||||
</span>
|
||||
);
|
||||
})}
|
||||
|
||||
{/* Fans summary */}
|
||||
{health.fans.length > 0 && (
|
||||
<span style={{
|
||||
fontSize: 11, color: failedFans.length > 0 ? t.health.error.text : t.textSecondary,
|
||||
fontWeight: 600,
|
||||
}}>
|
||||
{failedFans.length > 0
|
||||
? `${failedFans.length}/${health.fans.length} fans failed`
|
||||
: `${health.fans.length} fans OK`}
|
||||
</span>
|
||||
)}
|
||||
|
||||
{/* Temp range */}
|
||||
{tempMin != null && (
|
||||
<span style={{
|
||||
fontSize: 11, color: tempMax >= 45 ? t.health.warning.text : t.textSecondary,
|
||||
fontWeight: 600, fontFamily: "'JetBrains Mono', monospace",
|
||||
}}>
|
||||
{tempMin === tempMax ? `${tempMin}\u00B0C` : `${tempMin}\u2013${tempMax}\u00B0C`}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function EnclosureHealthDetail({ health, t }) {
|
||||
if (!health) return null;
|
||||
|
||||
const sectionStyle = { marginBottom: 12 };
|
||||
const headerStyle = {
|
||||
fontSize: 10, fontWeight: 700, color: t.textMuted,
|
||||
textTransform: "uppercase", letterSpacing: 1, marginBottom: 6,
|
||||
};
|
||||
const rowStyle = {
|
||||
display: "flex", justifyContent: "space-between", alignItems: "center",
|
||||
padding: "4px 0", borderBottom: `1px solid ${t.divider}`, fontSize: 12,
|
||||
};
|
||||
|
||||
return (
|
||||
<div style={{
|
||||
padding: "12px 16px", background: t.surface,
|
||||
borderTop: `1px solid ${t.divider}`, borderBottom: `1px solid ${t.divider}`,
|
||||
}}>
|
||||
<div style={{ display: "grid", gridTemplateColumns: "repeat(auto-fit, minmax(220px, 1fr))", gap: 16 }}>
|
||||
{/* PSUs */}
|
||||
{health.psus.length > 0 && (
|
||||
<div style={sectionStyle}>
|
||||
<div style={headerStyle}>Power Supplies</div>
|
||||
{health.psus.map((psu) => {
|
||||
const bad = psu.fail || psu.status.toLowerCase() === "critical";
|
||||
return (
|
||||
<div key={psu.index} style={rowStyle}>
|
||||
<span style={{ color: t.textSecondary }}>PSU {psu.index}</span>
|
||||
<span style={{
|
||||
fontWeight: 600, color: bad ? t.health.error.text : t.health.healthy.text,
|
||||
fontFamily: "'JetBrains Mono', monospace",
|
||||
}}>
|
||||
{psu.status}{psu.ac_fail ? " (AC fail)" : ""}{psu.dc_fail ? " (DC fail)" : ""}
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Fans */}
|
||||
{health.fans.length > 0 && (
|
||||
<div style={sectionStyle}>
|
||||
<div style={headerStyle}>Fans</div>
|
||||
{health.fans.map((fan) => (
|
||||
<div key={fan.index} style={rowStyle}>
|
||||
<span style={{ color: t.textSecondary }}>Fan {fan.index}</span>
|
||||
<span style={{
|
||||
fontWeight: 600,
|
||||
color: fan.fail ? t.health.error.text : t.health.healthy.text,
|
||||
fontFamily: "'JetBrains Mono', monospace",
|
||||
}}>
|
||||
{fan.rpm != null ? `${fan.rpm} RPM` : fan.status}
|
||||
{fan.fail ? " FAIL" : ""}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Temps */}
|
||||
{health.temps.length > 0 && (
|
||||
<div style={sectionStyle}>
|
||||
<div style={headerStyle}>Temperature Sensors</div>
|
||||
{health.temps.map((ts) => (
|
||||
<div key={ts.index} style={rowStyle}>
|
||||
<span style={{ color: t.textSecondary }}>Sensor {ts.index}</span>
|
||||
<span style={{
|
||||
fontWeight: 600,
|
||||
color: ts.temperature_c >= 45 ? t.health.warning.text : t.text,
|
||||
fontFamily: "'JetBrains Mono', monospace",
|
||||
}}>
|
||||
{ts.temperature_c != null ? `${ts.temperature_c}\u00B0C` : ts.status}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Voltages */}
|
||||
{health.voltages.length > 0 && (
|
||||
<div style={sectionStyle}>
|
||||
<div style={headerStyle}>Voltage Rails</div>
|
||||
{health.voltages.map((vs) => (
|
||||
<div key={vs.index} style={rowStyle}>
|
||||
<span style={{ color: t.textSecondary }}>Rail {vs.index}</span>
|
||||
<span style={{
|
||||
fontWeight: 600, color: t.text,
|
||||
fontFamily: "'JetBrains Mono', monospace",
|
||||
}}>
|
||||
{vs.voltage != null ? `${vs.voltage} V` : vs.status}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function EnclosureCard({ enclosure, view, onSelect, selectedSerial, t }) {
|
||||
const [healthExpanded, setHealthExpanded] = useState(false);
|
||||
|
||||
return (
|
||||
<div style={{
|
||||
background: t.cardBg, borderRadius: 16,
|
||||
@@ -720,7 +894,7 @@ function EnclosureCard({ enclosure, view, onSelect, selectedSerial, t }) {
|
||||
<div style={{
|
||||
padding: "16px 20px",
|
||||
borderBottom: `1px solid ${t.divider}`,
|
||||
display: "flex", alignItems: "center", justifyContent: "space-between",
|
||||
display: "flex", alignItems: "flex-start", justifyContent: "space-between",
|
||||
flexWrap: "wrap", gap: 8,
|
||||
}}>
|
||||
<div>
|
||||
@@ -730,11 +904,29 @@ function EnclosureCard({ enclosure, view, onSelect, selectedSerial, t }) {
|
||||
<div style={{ fontSize: 12, color: t.textSecondary, marginTop: 2 }}>
|
||||
{enclosure.sg_device} · {enclosure.populated_slots}/{enclosure.total_slots} slots populated
|
||||
</div>
|
||||
{enclosure.health && (
|
||||
<div style={{ display: "flex", alignItems: "center", gap: 6 }}>
|
||||
<EnclosureHealthSummary health={enclosure.health} t={t} />
|
||||
<button
|
||||
onClick={() => setHealthExpanded(!healthExpanded)}
|
||||
style={{
|
||||
background: "none", border: "none", cursor: "pointer",
|
||||
fontSize: 11, color: t.accent, fontWeight: 600,
|
||||
padding: "2px 6px", marginTop: 6,
|
||||
}}
|
||||
>
|
||||
{healthExpanded ? "Hide details" : "Details"}
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div style={{ fontSize: 11, color: t.textMuted, fontFamily: "'JetBrains Mono', monospace" }}>
|
||||
ID {enclosure.id}
|
||||
</div>
|
||||
</div>
|
||||
{healthExpanded && enclosure.health && (
|
||||
<EnclosureHealthDetail health={enclosure.health} t={t} />
|
||||
)}
|
||||
<div style={{ padding: 16 }}>
|
||||
{view === "grid" ? (
|
||||
<GridView enclosure={enclosure} onSelect={onSelect} t={t} />
|
||||
|
||||
@@ -65,6 +65,41 @@ class SlotWithDrive(BaseModel):
|
||||
drive: DriveHealthSummary | None = None
|
||||
|
||||
|
||||
class PsuStatus(BaseModel):
|
||||
index: int
|
||||
status: str
|
||||
fail: bool = False
|
||||
ac_fail: bool = False
|
||||
dc_fail: bool = False
|
||||
|
||||
|
||||
class FanStatus(BaseModel):
|
||||
index: int
|
||||
status: str
|
||||
rpm: int | None = None
|
||||
fail: bool = False
|
||||
|
||||
|
||||
class TempSensor(BaseModel):
|
||||
index: int
|
||||
status: str
|
||||
temperature_c: float | None = None
|
||||
|
||||
|
||||
class VoltageSensor(BaseModel):
|
||||
index: int
|
||||
status: str
|
||||
voltage: float | None = None
|
||||
|
||||
|
||||
class EnclosureHealth(BaseModel):
|
||||
overall_status: str = "OK"
|
||||
psus: list[PsuStatus] = []
|
||||
fans: list[FanStatus] = []
|
||||
temps: list[TempSensor] = []
|
||||
voltages: list[VoltageSensor] = []
|
||||
|
||||
|
||||
class EnclosureWithDrives(BaseModel):
|
||||
id: str
|
||||
sg_device: str | None = None
|
||||
@@ -74,6 +109,7 @@ class EnclosureWithDrives(BaseModel):
|
||||
total_slots: int
|
||||
populated_slots: int
|
||||
slots: list[SlotWithDrive]
|
||||
health: EnclosureHealth | None = None
|
||||
|
||||
|
||||
class HostDrive(BaseModel):
|
||||
|
||||
@@ -5,12 +5,13 @@ from fastapi import APIRouter
|
||||
|
||||
from models.schemas import (
|
||||
DriveHealthSummary,
|
||||
EnclosureHealth,
|
||||
EnclosureWithDrives,
|
||||
HostDrive,
|
||||
Overview,
|
||||
SlotWithDrive,
|
||||
)
|
||||
from services.enclosure import discover_enclosures, list_slots
|
||||
from services.enclosure import discover_enclosures, get_enclosure_status, list_slots
|
||||
from services.host import get_host_drives
|
||||
from services.smart import get_smart_data
|
||||
from services.zfs import get_zfs_pool_map
|
||||
@@ -26,13 +27,24 @@ async def get_overview():
|
||||
enclosures_raw = discover_enclosures()
|
||||
pool_map = await get_zfs_pool_map()
|
||||
|
||||
# Fetch SES health data for all enclosures concurrently
|
||||
async def _get_health(enc):
|
||||
if enc.get("sg_device"):
|
||||
return await get_enclosure_status(enc["sg_device"])
|
||||
return None
|
||||
|
||||
health_results = await asyncio.gather(
|
||||
*[_get_health(enc) for enc in enclosures_raw],
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
||||
enc_results: list[EnclosureWithDrives] = []
|
||||
total_drives = 0
|
||||
warnings = 0
|
||||
errors = 0
|
||||
all_healthy = True
|
||||
|
||||
for enc in enclosures_raw:
|
||||
for enc_idx, enc in enumerate(enclosures_raw):
|
||||
slots_raw = list_slots(enc["id"])
|
||||
|
||||
# Gather SMART data for all populated slots concurrently
|
||||
@@ -110,6 +122,20 @@ async def get_overview():
|
||||
drive=drive_summary,
|
||||
))
|
||||
|
||||
# Attach enclosure health from SES
|
||||
health_data = health_results[enc_idx]
|
||||
enc_health = None
|
||||
if isinstance(health_data, dict):
|
||||
enc_health = EnclosureHealth(**health_data)
|
||||
# Count enclosure-level issues
|
||||
if enc_health.overall_status == "CRITICAL":
|
||||
errors += 1
|
||||
all_healthy = False
|
||||
elif enc_health.overall_status == "WARNING":
|
||||
warnings += 1
|
||||
elif isinstance(health_data, Exception):
|
||||
logger.warning("SES health failed for %s: %s", enc["id"], health_data)
|
||||
|
||||
enc_results.append(EnclosureWithDrives(
|
||||
id=enc["id"],
|
||||
sg_device=enc.get("sg_device"),
|
||||
@@ -119,6 +145,7 @@ async def get_overview():
|
||||
total_slots=enc["total_slots"],
|
||||
populated_slots=enc["populated_slots"],
|
||||
slots=slots_out,
|
||||
health=enc_health,
|
||||
))
|
||||
|
||||
# Host drives (non-enclosure)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -136,3 +138,121 @@ def _parse_slot_number(entry: Path) -> int | None:
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
async def get_enclosure_status(sg_device: str) -> dict | None:
|
||||
"""Run sg_ses --page=0x02 and parse enclosure health data."""
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"sg_ses", "--page=0x02", sg_device,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
logger.warning("sg_ses failed for %s: %s", sg_device, stderr.decode().strip())
|
||||
return None
|
||||
return _parse_ses_page02(stdout.decode(errors="replace"))
|
||||
except FileNotFoundError:
|
||||
logger.warning("sg_ses not found")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning("sg_ses error for %s: %s", sg_device, e)
|
||||
return None
|
||||
|
||||
|
||||
def _parse_ses_page02(text: str) -> dict:
|
||||
"""Parse sg_ses --page=0x02 text output into structured health data."""
|
||||
result = {
|
||||
"overall_status": "OK",
|
||||
"psus": [],
|
||||
"fans": [],
|
||||
"temps": [],
|
||||
"voltages": [],
|
||||
}
|
||||
|
||||
# Parse header line for overall status:
|
||||
# INVOP=0, INFO=0, NON-CRIT=0, CRIT=1, UNRECOV=0
|
||||
header_match = re.search(
|
||||
r"INVOP=\d+,\s*INFO=\d+,\s*NON-CRIT=(\d+),\s*CRIT=(\d+),\s*UNRECOV=(\d+)",
|
||||
text,
|
||||
)
|
||||
if header_match:
|
||||
non_crit = int(header_match.group(1))
|
||||
crit = int(header_match.group(2))
|
||||
unrecov = int(header_match.group(3))
|
||||
if crit > 0 or unrecov > 0:
|
||||
result["overall_status"] = "CRITICAL"
|
||||
elif non_crit > 0:
|
||||
result["overall_status"] = "WARNING"
|
||||
|
||||
# Split into element type sections.
|
||||
# Each section starts with "Element type: <type>"
|
||||
sections = re.split(r"(?=\s*Element type:)", text)
|
||||
|
||||
for section in sections:
|
||||
type_match = re.match(r"\s*Element type:\s*(.+)", section)
|
||||
if not type_match:
|
||||
continue
|
||||
element_type = type_match.group(1).strip().rstrip(",").lower()
|
||||
|
||||
# Find individual element blocks (skip "Overall descriptor")
|
||||
elements = re.split(r"(?=\s*Element \d+ descriptor:)", section)
|
||||
|
||||
for elem_text in elements:
|
||||
desc_match = re.match(r"\s*Element (\d+) descriptor:", elem_text)
|
||||
if not desc_match:
|
||||
continue
|
||||
idx = int(desc_match.group(1))
|
||||
|
||||
# Extract status line
|
||||
status_match = re.search(r"status:\s*(.+?)(?:,|\n|$)", elem_text, re.IGNORECASE)
|
||||
status = status_match.group(1).strip() if status_match else "Unknown"
|
||||
|
||||
if status.lower() == "not installed":
|
||||
continue
|
||||
|
||||
if "power supply" in element_type:
|
||||
fail = "Fail=1" in elem_text
|
||||
ac_fail = "AC fail=1" in elem_text
|
||||
dc_fail = "DC fail=1" in elem_text
|
||||
result["psus"].append({
|
||||
"index": idx,
|
||||
"status": status,
|
||||
"fail": fail,
|
||||
"ac_fail": ac_fail,
|
||||
"dc_fail": dc_fail,
|
||||
})
|
||||
|
||||
elif "cooling" in element_type or "fan" in element_type:
|
||||
fail = "Fail=1" in elem_text
|
||||
rpm_match = re.search(r"Actual speed[=:]\s*(\d+)\s*rpm", elem_text, re.IGNORECASE)
|
||||
rpm = int(rpm_match.group(1)) if rpm_match else None
|
||||
result["fans"].append({
|
||||
"index": idx,
|
||||
"status": status,
|
||||
"rpm": rpm,
|
||||
"fail": fail,
|
||||
})
|
||||
|
||||
elif "temperature" in element_type:
|
||||
temp_match = re.search(r"Temperature=\s*([\d.]+)\s*C", elem_text)
|
||||
temp = float(temp_match.group(1)) if temp_match else None
|
||||
result["temps"].append({
|
||||
"index": idx,
|
||||
"status": status,
|
||||
"temperature_c": temp,
|
||||
})
|
||||
|
||||
elif "voltage" in element_type:
|
||||
volt_match = re.search(r"Voltage:\s*([\d.]+)\s*V", elem_text, re.IGNORECASE)
|
||||
if not volt_match:
|
||||
volt_match = re.search(r"([\d.]+)\s*V", elem_text)
|
||||
voltage = float(volt_match.group(1)) if volt_match else None
|
||||
result["voltages"].append({
|
||||
"index": idx,
|
||||
"status": status,
|
||||
"voltage": voltage,
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user