Add enclosure health details (PSUs, fans, temps, voltages) via SES

Parse sg_ses --page=0x02 output to surface enclosure-level health data
including power supply status, fan RPMs, temperature sensors, and voltage
rails. Failed/critical components are reflected in the overview totals
and shown as status pills in the enclosure card header with an expandable
detail panel.
This commit is contained in:
2026-03-07 06:03:26 +00:00
parent 8ea8fdef08
commit 0112875894
4 changed files with 379 additions and 4 deletions

View File

@@ -1,5 +1,7 @@
import os
import asyncio
import logging
import os
import re
from pathlib import Path
logger = logging.getLogger(__name__)
@@ -136,3 +138,121 @@ def _parse_slot_number(entry: Path) -> int | None:
except ValueError:
return None
return None
async def get_enclosure_status(sg_device: str) -> dict | None:
"""Run sg_ses --page=0x02 and parse enclosure health data."""
try:
proc = await asyncio.create_subprocess_exec(
"sg_ses", "--page=0x02", sg_device,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
logger.warning("sg_ses failed for %s: %s", sg_device, stderr.decode().strip())
return None
return _parse_ses_page02(stdout.decode(errors="replace"))
except FileNotFoundError:
logger.warning("sg_ses not found")
return None
except Exception as e:
logger.warning("sg_ses error for %s: %s", sg_device, e)
return None
def _parse_ses_page02(text: str) -> dict:
"""Parse sg_ses --page=0x02 text output into structured health data."""
result = {
"overall_status": "OK",
"psus": [],
"fans": [],
"temps": [],
"voltages": [],
}
# Parse header line for overall status:
# INVOP=0, INFO=0, NON-CRIT=0, CRIT=1, UNRECOV=0
header_match = re.search(
r"INVOP=\d+,\s*INFO=\d+,\s*NON-CRIT=(\d+),\s*CRIT=(\d+),\s*UNRECOV=(\d+)",
text,
)
if header_match:
non_crit = int(header_match.group(1))
crit = int(header_match.group(2))
unrecov = int(header_match.group(3))
if crit > 0 or unrecov > 0:
result["overall_status"] = "CRITICAL"
elif non_crit > 0:
result["overall_status"] = "WARNING"
# Split into element type sections.
# Each section starts with "Element type: <type>"
sections = re.split(r"(?=\s*Element type:)", text)
for section in sections:
type_match = re.match(r"\s*Element type:\s*(.+)", section)
if not type_match:
continue
element_type = type_match.group(1).strip().rstrip(",").lower()
# Find individual element blocks (skip "Overall descriptor")
elements = re.split(r"(?=\s*Element \d+ descriptor:)", section)
for elem_text in elements:
desc_match = re.match(r"\s*Element (\d+) descriptor:", elem_text)
if not desc_match:
continue
idx = int(desc_match.group(1))
# Extract status line
status_match = re.search(r"status:\s*(.+?)(?:,|\n|$)", elem_text, re.IGNORECASE)
status = status_match.group(1).strip() if status_match else "Unknown"
if status.lower() == "not installed":
continue
if "power supply" in element_type:
fail = "Fail=1" in elem_text
ac_fail = "AC fail=1" in elem_text
dc_fail = "DC fail=1" in elem_text
result["psus"].append({
"index": idx,
"status": status,
"fail": fail,
"ac_fail": ac_fail,
"dc_fail": dc_fail,
})
elif "cooling" in element_type or "fan" in element_type:
fail = "Fail=1" in elem_text
rpm_match = re.search(r"Actual speed[=:]\s*(\d+)\s*rpm", elem_text, re.IGNORECASE)
rpm = int(rpm_match.group(1)) if rpm_match else None
result["fans"].append({
"index": idx,
"status": status,
"rpm": rpm,
"fail": fail,
})
elif "temperature" in element_type:
temp_match = re.search(r"Temperature=\s*([\d.]+)\s*C", elem_text)
temp = float(temp_match.group(1)) if temp_match else None
result["temps"].append({
"index": idx,
"status": status,
"temperature_c": temp,
})
elif "voltage" in element_type:
volt_match = re.search(r"Voltage:\s*([\d.]+)\s*V", elem_text, re.IGNORECASE)
if not volt_match:
volt_match = re.search(r"([\d.]+)\s*V", elem_text)
voltage = float(volt_match.group(1)) if volt_match else None
result["voltages"].append({
"index": idx,
"status": status,
"voltage": voltage,
})
return result