/** * MetricsPage — Intel GPU metrics from Prometheus (node-exporter hwmon). * * METRIC AVAILABILITY * ------------------- * Power (current W, TDP) * Source: node_hwmon_energy_joule_total, node_hwmon_power_max_watt * Driver: i915 hwmon sysfs (/sys/class/drm/card{N}/device/hwmon/) * Scraped: node-exporter hwmon collector (enabled by default) * Nodes: Discrete GPU nodes only (i915 driver exposes hwmon; iGPU driver does not) * No extra config required — works out of the box with kube-prometheus-stack. * * GPU Frequency (current, boost, min, max MHz) * Source: DRM sysfs (/sys/class/drm/card{N}/gt_{x}_freq_mhz) * Driver: i915 kernel driver * Scraped: NOT available -- node-exporter --collector.drm is AMD-only and does not * read i915 gt_freq sysfs files. Would require a custom exporter or * node-exporter textfile collector sidecar writing these values. * * GPU Utilization (engine busy %) * Source: Not exposed via hwmon or any standard Prometheus collector for i915. * Would require intel-gpu-top, XPU Manager, or a custom DRM-based exporter. * * Integrated GPU (iGPU) nodes * The iGPU driver does not expose hwmon sensors. No Prometheus metrics are * available for iGPU nodes regardless of configuration. */ import { Loader, NameValueTable, SectionBox, SectionHeader, StatusLabel, } from '@kinvolk/headlamp-plugin/lib/CommonComponents'; import React, { useCallback, useEffect, useState } from 'react'; import { useIntelGpuContext } from '../api/IntelGpuDataContext'; import { fetchGpuMetrics, formatPercent, formatWatts, GpuChipMetrics, GpuMetrics, } from '../api/metrics'; // --------------------------------------------------------------------------- // Power bar // --------------------------------------------------------------------------- function PowerBar({ watts, maxWatts }: { watts: number; maxWatts: number | null }) { const pct = maxWatts && maxWatts > 0 ? Math.min(100, Math.round((watts / maxWatts) * 100)) : null; const color = pct === null ? '#0071c5' : pct >= 90 ? '#d32f2f' : pct >= 70 ? '#f57c00' : '#0071c5'; return (
{pct !== null && (
)} {formatWatts(watts)} {maxWatts !== null && maxWatts > 0 && ( / {formatWatts(maxWatts)} ({formatPercent(watts, maxWatts)}) )}
); } // --------------------------------------------------------------------------- // Per-chip card // --------------------------------------------------------------------------- function GpuChipCard({ chip }: { chip: GpuChipMetrics }) { const rows: Array<{ name: string; value: React.ReactNode }> = [ { name: 'Node', value: chip.nodeName }, { name: 'GPU (PCI)', value: chip.chip }, { name: 'Current Power', value: chip.powerWatts !== null ? ( ) : ( No data — needs ≥5m of scrape history ), }, ]; if (chip.powerMaxWatts !== null && chip.powerMaxWatts > 0) { rows.push({ name: 'TDP', value: formatWatts(chip.powerMaxWatts) }); } return ( ); } // --------------------------------------------------------------------------- // Requirements info box // --------------------------------------------------------------------------- function MetricRequirements() { return ( Available — discrete GPU nodes
Source: node_hwmon_energy_joule_total via node-exporter hwmon collector (enabled by default). Requires the i915 kernel driver on the node. iGPU nodes do not expose hwmon sensors.
), }, { name: 'Frequency (MHz)', value: ( <> Not available
i915 exposes gt_*_freq_mhz via DRM sysfs but node-exporter's{' '} --collector.drm flag is AMD-only and does not read these files. A custom exporter or textfile-collector sidecar writing these values would be required.
), }, { name: 'Utilization (%)', value: ( <> Not available
No standard Prometheus collector exposes i915 engine busy percentage. Would require intel-gpu-top, XPU Manager, or a custom DRM-based exporter.
), }, { name: 'iGPU nodes', value: ( <> No metrics available
The integrated GPU driver does not expose hwmon sensors. No Prometheus metrics are available for iGPU nodes regardless of configuration.
), }, ]} />
); } // --------------------------------------------------------------------------- // Main page // --------------------------------------------------------------------------- export default function MetricsPage() { const { gpuNodes, loading: ctxLoading } = useIntelGpuContext(); const [metrics, setMetrics] = useState(null); const [fetchError, setFetchError] = useState(null); const [fetching, setFetching] = useState(false); const [fetchSeq, setFetchSeq] = useState(0); const doFetch = useCallback(() => { setFetchSeq(s => s + 1); }, []); useEffect(() => { if (ctxLoading) return; let cancelled = false; setFetching(true); setFetchError(null); fetchGpuMetrics() .then(result => { if (cancelled) return; setMetrics(result); if (!result) { setFetchError( 'Could not reach Prometheus. Ensure kube-prometheus-stack is installed in the monitoring namespace.' ); } }) .catch((e: unknown) => { if (cancelled) return; setFetchError(e instanceof Error ? e.message : String(e)); }) .finally(() => { if (!cancelled) setFetching(false); }); return () => { cancelled = true; }; }, [ctxLoading, fetchSeq]); if (ctxLoading) { return ; } return ( <>
{fetching && !metrics && } {fetchError && ( {fetchError}, }, { name: 'Checked services', value: 'kube-prometheus-stack-prometheus:9090, prometheus-operated:9090, prometheus:9090 (monitoring namespace)', }, ]} /> )} {metrics && metrics.chips.length === 0 && ( Prometheus reachable — no node_hwmon_chip_names{chip_name="i915"} found ), }, { name: 'GPU Nodes', value: gpuNodes.length > 0 ? gpuNodes.map(n => n.metadata.name).join(', ') : 'None detected', }, { name: 'Likely cause', value: 'node-exporter is not running on the GPU nodes, or the hwmon collector is disabled.', }, ]} /> )} {metrics && metrics.chips.length > 0 && ( <> { const total = metrics.chips.reduce((s, c) => s + (c.powerWatts ?? 0), 0); const maxTotal = metrics.chips.reduce((s, c) => s + (c.powerMaxWatts ?? 0), 0); return 0 ? maxTotal : null} />; })(), }, { name: 'Last Fetched', value: new Date(metrics.fetchedAt).toLocaleTimeString(), }, { name: 'Query', value: 'rate(node_hwmon_energy_joule_total[5m]) joined with node_hwmon_chip_names{chip_name="i915"}', }, ]} /> {metrics.chips.map(chip => ( ))} )} ); }