Enumerate physical drives behind RAID via smartctl megaraid passthrough
This commit is contained in:
@@ -3,7 +3,7 @@ import json
|
||||
import logging
|
||||
|
||||
from services.enclosure import discover_enclosures, list_slots
|
||||
from services.smart import get_smart_data
|
||||
from services.smart import get_smart_data, scan_megaraid_drives
|
||||
from services.zfs import get_zfs_pool_map
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -106,6 +106,31 @@ async def get_host_drives() -> list[dict]:
|
||||
"zfs_vdev": zfs_info.get("vdev"),
|
||||
"zfs_state": zfs_info.get("state"),
|
||||
"health_status": health_status,
|
||||
"physical_drives": [],
|
||||
})
|
||||
|
||||
# Discover physical drives behind RAID controllers
|
||||
has_raid = any(r["drive_type"] == "raid" and not r["smart_supported"] for r in results)
|
||||
if has_raid:
|
||||
megaraid_drives = await scan_megaraid_drives()
|
||||
for pd in megaraid_drives:
|
||||
pd_healthy = pd.get("smart_healthy")
|
||||
pd_realloc = pd.get("reallocated_sectors") or 0
|
||||
pd_pending = pd.get("pending_sectors") or 0
|
||||
pd_unc = pd.get("uncorrectable_errors") or 0
|
||||
if pd_healthy is False:
|
||||
pd["health_status"] = "error"
|
||||
elif pd_realloc > 0 or pd_pending > 0 or pd_unc > 0:
|
||||
pd["health_status"] = "warning"
|
||||
else:
|
||||
pd["health_status"] = "healthy"
|
||||
pd["drive_type"] = "physical"
|
||||
pd["physical_drives"] = []
|
||||
|
||||
# Attach to the first RAID host drive
|
||||
for r in results:
|
||||
if r["drive_type"] == "raid" and not r["smart_supported"]:
|
||||
r["physical_drives"] = megaraid_drives
|
||||
break
|
||||
|
||||
return results
|
||||
|
||||
@@ -145,6 +145,60 @@ def _parse_smart_json(device: str, data: dict) -> dict:
|
||||
return result
|
||||
|
||||
|
||||
async def scan_megaraid_drives() -> list[dict]:
|
||||
"""Discover physical drives behind MegaRAID controllers via smartctl --scan."""
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"smartctl", "--scan", "-j",
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, _ = await proc.communicate()
|
||||
scan_data = json.loads(stdout)
|
||||
except (FileNotFoundError, json.JSONDecodeError) as e:
|
||||
logger.warning("smartctl --scan failed: %s", e)
|
||||
return []
|
||||
|
||||
devices = scan_data.get("devices", [])
|
||||
megaraid_entries = [
|
||||
d for d in devices
|
||||
if "megaraid" in (d.get("type") or "")
|
||||
]
|
||||
|
||||
if not megaraid_entries:
|
||||
return []
|
||||
|
||||
# Query SMART for each physical drive concurrently
|
||||
async def _query(entry: dict) -> dict | None:
|
||||
dev_path = entry["name"]
|
||||
dev_type = entry["type"]
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"smartctl", "-a", "-j", "-d", dev_type, dev_path,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, _ = await proc.communicate()
|
||||
if not stdout:
|
||||
return None
|
||||
data = json.loads(stdout)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return None
|
||||
|
||||
# Extract the disk number from type like "sat+megaraid,0"
|
||||
megaraid_id = dev_type.split("megaraid,")[-1] if "megaraid," in dev_type else dev_type
|
||||
|
||||
result = _parse_smart_json(f"megaraid:{megaraid_id}", data)
|
||||
result["megaraid_id"] = megaraid_id
|
||||
result["megaraid_type"] = dev_type
|
||||
result["megaraid_device"] = dev_path
|
||||
return result
|
||||
|
||||
tasks = [_query(e) for e in megaraid_entries]
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
return [r for r in results if isinstance(r, dict)]
|
||||
|
||||
|
||||
def _get_attr_raw(attrs: list[dict], attr_id: int) -> int | None:
|
||||
"""Get the raw_value for a SMART attribute by ID."""
|
||||
for attr in attrs:
|
||||
|
||||
Reference in New Issue
Block a user