/**
* MetricsPage — Intel GPU metrics from Prometheus (node-exporter hwmon).
*
* METRIC AVAILABILITY
* -------------------
* Power (current W, TDP)
* Source: node_hwmon_energy_joule_total, node_hwmon_power_max_watt
* Driver: i915 hwmon sysfs (/sys/class/drm/card{N}/device/hwmon/)
* Scraped: node-exporter hwmon collector (enabled by default)
* Nodes: Discrete GPU nodes only (i915 driver exposes hwmon; iGPU driver does not)
* No extra config required — works out of the box with kube-prometheus-stack.
*
* GPU Frequency (current, boost, min, max MHz)
* Source: DRM sysfs (/sys/class/drm/card{N}/gt_{x}_freq_mhz)
* Driver: i915 kernel driver
* Scraped: NOT available -- node-exporter --collector.drm is AMD-only and does not
* read i915 gt_freq sysfs files. Would require a custom exporter or
* node-exporter textfile collector sidecar writing these values.
*
* GPU Utilization (engine busy %)
* Source: Not exposed via hwmon or any standard Prometheus collector for i915.
* Would require intel-gpu-top, XPU Manager, or a custom DRM-based exporter.
*
* Integrated GPU (iGPU) nodes
* The iGPU driver does not expose hwmon sensors. No Prometheus metrics are
* available for iGPU nodes regardless of configuration.
*/
import {
Loader,
NameValueTable,
SectionBox,
SectionHeader,
StatusLabel,
} from '@kinvolk/headlamp-plugin/lib/CommonComponents';
import React, { useCallback, useEffect, useState } from 'react';
import { useIntelGpuContext } from '../api/IntelGpuDataContext';
import {
fetchGpuMetrics,
formatPercent,
formatWatts,
GpuChipMetrics,
GpuMetrics,
} from '../api/metrics';
// ---------------------------------------------------------------------------
// Power bar
// ---------------------------------------------------------------------------
function PowerBar({ watts, maxWatts }: { watts: number; maxWatts: number | null }) {
const pct = maxWatts && maxWatts > 0 ? Math.min(100, Math.round((watts / maxWatts) * 100)) : null;
const color =
pct === null ? '#0071c5' : pct >= 90 ? '#d32f2f' : pct >= 70 ? '#f57c00' : '#0071c5';
return (
{pct !== null && (
)}
{formatWatts(watts)}
{maxWatts !== null && maxWatts > 0 && (
/ {formatWatts(maxWatts)} ({formatPercent(watts, maxWatts)})
)}
);
}
// ---------------------------------------------------------------------------
// Per-chip card
// ---------------------------------------------------------------------------
function GpuChipCard({ chip }: { chip: GpuChipMetrics }) {
const rows: Array<{ name: string; value: React.ReactNode }> = [
{ name: 'Node', value: chip.nodeName },
{ name: 'GPU (PCI)', value: chip.chip },
{
name: 'Current Power',
value:
chip.powerWatts !== null ? (
) : (
No data — needs ≥5m of scrape history
),
},
];
if (chip.powerMaxWatts !== null && chip.powerMaxWatts > 0) {
rows.push({ name: 'TDP', value: formatWatts(chip.powerMaxWatts) });
}
return (
);
}
// ---------------------------------------------------------------------------
// Requirements info box
// ---------------------------------------------------------------------------
function MetricRequirements() {
return (
Available — discrete GPU nodes
Source: node_hwmon_energy_joule_total via node-exporter hwmon
collector (enabled by default). Requires the i915 kernel driver on the node. iGPU
nodes do not expose hwmon sensors.
>
),
},
{
name: 'Frequency (MHz)',
value: (
<>
Not available
i915 exposes gt_*_freq_mhz via DRM sysfs but node-exporter's{' '}
--collector.drm flag is AMD-only and does not read these files. A
custom exporter or textfile-collector sidecar writing these values would be
required.
>
),
},
{
name: 'Utilization (%)',
value: (
<>
Not available
No standard Prometheus collector exposes i915 engine busy percentage. Would
require intel-gpu-top, XPU Manager, or a custom DRM-based exporter.
>
),
},
{
name: 'iGPU nodes',
value: (
<>
No metrics available
The integrated GPU driver does not expose hwmon sensors. No Prometheus metrics are
available for iGPU nodes regardless of configuration.
>
),
},
]}
/>
);
}
// ---------------------------------------------------------------------------
// Main page
// ---------------------------------------------------------------------------
export default function MetricsPage() {
const { gpuNodes, loading: ctxLoading } = useIntelGpuContext();
const [metrics, setMetrics] = useState(null);
const [fetchError, setFetchError] = useState(null);
const [fetching, setFetching] = useState(false);
const [fetchSeq, setFetchSeq] = useState(0);
const doFetch = useCallback(() => {
setFetchSeq(s => s + 1);
}, []);
useEffect(() => {
if (ctxLoading) return;
let cancelled = false;
setFetching(true);
setFetchError(null);
fetchGpuMetrics()
.then(result => {
if (cancelled) return;
setMetrics(result);
if (!result) {
setFetchError(
'Could not reach Prometheus. Ensure kube-prometheus-stack is installed in the monitoring namespace.'
);
}
})
.catch((e: unknown) => {
if (cancelled) return;
setFetchError(e instanceof Error ? e.message : String(e));
})
.finally(() => {
if (!cancelled) setFetching(false);
});
return () => {
cancelled = true;
};
}, [ctxLoading, fetchSeq]);
if (ctxLoading) {
return ;
}
return (
<>
void doFetch()}
disabled={fetching}
aria-label="Refresh metrics"
style={{
padding: '6px 16px',
backgroundColor: 'transparent',
color: 'var(--mui-palette-primary-main, #0071c5)',
border: '1px solid var(--mui-palette-primary-main, #0071c5)',
borderRadius: '4px',
cursor: 'pointer',
fontSize: '13px',
fontWeight: 500,
}}
>
{fetching ? 'Refreshing…' : 'Refresh'}
{fetching && !metrics && }
{fetchError && (
{fetchError},
},
{
name: 'Checked services',
value:
'kube-prometheus-stack-prometheus:9090, prometheus-operated:9090, prometheus:9090 (monitoring namespace)',
},
]}
/>
)}
{metrics && metrics.chips.length === 0 && (
Prometheus reachable — no
node_hwmon_chip_names{chip_name="i915"} found
),
},
{
name: 'GPU Nodes',
value:
gpuNodes.length > 0
? gpuNodes.map(n => n.metadata.name).join(', ')
: 'None detected',
},
{
name: 'Likely cause',
value:
'node-exporter is not running on the GPU nodes, or the hwmon collector is disabled.',
},
]}
/>
)}
{metrics && metrics.chips.length > 0 && (
<>
{
const total = metrics.chips.reduce((s, c) => s + (c.powerWatts ?? 0), 0);
const maxTotal = metrics.chips.reduce((s, c) => s + (c.powerMaxWatts ?? 0), 0);
return 0 ? maxTotal : null} />;
})(),
},
{
name: 'Last Fetched',
value: new Date(metrics.fetchedAt).toLocaleTimeString(),
},
{
name: 'Query',
value:
'rate(node_hwmon_energy_joule_total[5m]) joined with node_hwmon_chip_names{chip_name="i915"}',
},
]}
/>
{metrics.chips.map(chip => (
))}
>
)}
>
);
}