feat: initial release of headlamp-intel-gpu-plugin v0.1.0
Adds a Headlamp plugin for Intel GPU device plugin visibility: - Dedicated sidebar section: Overview, Device Plugins, GPU Nodes, GPU Pods - Native Node detail page injection: GPU capacity, allocatable, utilization, active pods - Native Pod detail page injection: per-container GPU resource requests/limits - Native Nodes table: GPU Type and GPU Devices columns - App bar health badge (hidden when plugin not installed) - GpuDevicePlugin CRD monitoring (deviceplugin.intel.com/v1) with graceful degradation when CRD is not present - Supports discrete (i915), Xe, and integrated GPU nodes via node labels - 48 unit tests, TypeScript clean, 28 kB production bundle Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
@@ -0,0 +1,46 @@
|
||||
/**
|
||||
* AppBarGpuBadge — compact Intel GPU health indicator in the Headlamp app bar.
|
||||
*
|
||||
* Shows a status chip in the top navigation bar summarising GPU plugin health.
|
||||
* Hides itself when no Intel GPU plugin is detected.
|
||||
*/
|
||||
|
||||
import { StatusLabel } from '@kinvolk/headlamp-plugin/lib/CommonComponents';
|
||||
import React from 'react';
|
||||
import { useIntelGpuContext } from '../api/IntelGpuDataContext';
|
||||
|
||||
export default function AppBarGpuBadge() {
|
||||
const { pluginInstalled, gpuNodes, devicePlugins, loading } = useIntelGpuContext();
|
||||
|
||||
// Hide when loading or no plugin present
|
||||
if (loading || !pluginInstalled) return null;
|
||||
|
||||
const hasUnhealthyPlugin = devicePlugins.some(p => {
|
||||
const desired = p.status?.desiredNumberScheduled ?? 0;
|
||||
const ready = p.status?.numberReady ?? 0;
|
||||
const unavailable = p.status?.numberUnavailable ?? 0;
|
||||
return (desired > 0 && ready < desired) || unavailable > 0;
|
||||
});
|
||||
|
||||
const status = hasUnhealthyPlugin ? 'warning' : 'success';
|
||||
const nodeCount = gpuNodes.length;
|
||||
|
||||
return (
|
||||
<div
|
||||
style={{
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
gap: '4px',
|
||||
padding: '0 8px',
|
||||
cursor: 'default',
|
||||
}}
|
||||
title={`Intel GPU: ${nodeCount} node${nodeCount !== 1 ? 's' : ''}`}
|
||||
>
|
||||
<StatusLabel status={status}>
|
||||
<span style={{ fontSize: '11px', fontWeight: 600 }}>
|
||||
Intel GPU{nodeCount > 0 ? ` · ${nodeCount}N` : ''}
|
||||
</span>
|
||||
</StatusLabel>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,212 @@
|
||||
/**
|
||||
* DevicePluginsPage — lists all GpuDevicePlugin CRD instances.
|
||||
*
|
||||
* Shows configuration details for each Intel GPU device plugin deployment,
|
||||
* including spec and status information.
|
||||
*/
|
||||
|
||||
import {
|
||||
Loader,
|
||||
NameValueTable,
|
||||
SectionBox,
|
||||
SectionHeader,
|
||||
SimpleTable,
|
||||
StatusLabel,
|
||||
} from '@kinvolk/headlamp-plugin/lib/CommonComponents';
|
||||
import React from 'react';
|
||||
import { useIntelGpuContext } from '../api/IntelGpuDataContext';
|
||||
import { formatAge, isPodReady, pluginStatusText, pluginStatusToStatus } from '../api/k8s';
|
||||
|
||||
export default function DevicePluginsPage() {
|
||||
const { devicePlugins, pluginPods, crdAvailable, loading, error, refresh } =
|
||||
useIntelGpuContext();
|
||||
|
||||
if (loading) {
|
||||
return <Loader title="Loading device plugin data..." />;
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: '20px' }}>
|
||||
<SectionHeader title="Intel GPU — Device Plugins" />
|
||||
<button
|
||||
onClick={refresh}
|
||||
aria-label="Refresh device plugin data"
|
||||
style={{
|
||||
padding: '6px 16px',
|
||||
backgroundColor: 'transparent',
|
||||
color: 'var(--mui-palette-primary-main, #0071c5)',
|
||||
border: '1px solid var(--mui-palette-primary-main, #0071c5)',
|
||||
borderRadius: '4px',
|
||||
cursor: 'pointer',
|
||||
fontSize: '13px',
|
||||
fontWeight: 500,
|
||||
}}
|
||||
>
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<SectionBox title="Error">
|
||||
<NameValueTable
|
||||
rows={[{ name: 'Status', value: <StatusLabel status="error">{error}</StatusLabel> }]}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{!crdAvailable && (
|
||||
<SectionBox title="CRD Not Available">
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{
|
||||
name: 'Status',
|
||||
value: (
|
||||
<StatusLabel status="warning">
|
||||
GpuDevicePlugin CRD (deviceplugin.intel.com/v1) is not installed
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'Note',
|
||||
value:
|
||||
'Install the Intel Device Plugins Operator to manage GpuDevicePlugin resources. ' +
|
||||
'Plugin daemon pods are shown below if detected.',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{/* GpuDevicePlugin CRD instances */}
|
||||
{crdAvailable && devicePlugins.length === 0 && (
|
||||
<SectionBox title="No Device Plugins">
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{
|
||||
name: 'Status',
|
||||
value: (
|
||||
<StatusLabel status="warning">
|
||||
No GpuDevicePlugin resources found on this cluster
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'Create',
|
||||
value:
|
||||
'kubectl apply -f gpudeviceplugin.yaml (see Intel documentation for configuration)',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{devicePlugins.map(plugin => (
|
||||
<SectionBox key={plugin.metadata.uid ?? plugin.metadata.name} title={`GpuDevicePlugin: ${plugin.metadata.name}`}>
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{
|
||||
name: 'Status',
|
||||
value: (
|
||||
<StatusLabel status={pluginStatusToStatus(plugin)}>
|
||||
{pluginStatusText(plugin)}
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'Image',
|
||||
value: plugin.spec.image ?? '—',
|
||||
},
|
||||
{
|
||||
name: 'Shared Devices/Node',
|
||||
value: String(plugin.spec.sharedDevNum ?? 1),
|
||||
},
|
||||
{
|
||||
name: 'Allocation Policy',
|
||||
value: plugin.spec.preferredAllocationPolicy ?? 'default',
|
||||
},
|
||||
{
|
||||
name: 'Monitoring',
|
||||
value: plugin.spec.enableMonitoring ? (
|
||||
<StatusLabel status="success">Enabled</StatusLabel>
|
||||
) : (
|
||||
<StatusLabel status="warning">Disabled</StatusLabel>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'Resource Manager',
|
||||
value: plugin.spec.resourceManager ? 'Enabled' : 'Disabled',
|
||||
},
|
||||
{
|
||||
name: 'Desired Nodes',
|
||||
value: String(plugin.status?.desiredNumberScheduled ?? '—'),
|
||||
},
|
||||
{
|
||||
name: 'Ready Nodes',
|
||||
value: String(plugin.status?.numberReady ?? '—'),
|
||||
},
|
||||
...(plugin.status?.numberUnavailable
|
||||
? [{
|
||||
name: 'Unavailable Nodes',
|
||||
value: (
|
||||
<StatusLabel status="error">
|
||||
{plugin.status.numberUnavailable}
|
||||
</StatusLabel>
|
||||
),
|
||||
}]
|
||||
: []),
|
||||
{
|
||||
name: 'Node Selector',
|
||||
value: plugin.spec.nodeSelector
|
||||
? Object.entries(plugin.spec.nodeSelector)
|
||||
.map(([k, v]) => `${k}=${v}`)
|
||||
.join(', ')
|
||||
: '—',
|
||||
},
|
||||
{
|
||||
name: 'Age',
|
||||
value: formatAge(plugin.metadata.creationTimestamp),
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
))}
|
||||
|
||||
{/* Plugin daemon pods */}
|
||||
{pluginPods.length > 0 && (
|
||||
<SectionBox title="Plugin Daemon Pods">
|
||||
<SimpleTable
|
||||
columns={[
|
||||
{ label: 'Name', getter: (p) => p.metadata.name },
|
||||
{ label: 'Namespace', getter: (p) => p.metadata.namespace ?? '—' },
|
||||
{ label: 'Node', getter: (p) => p.spec?.nodeName ?? '—' },
|
||||
{
|
||||
label: 'Ready',
|
||||
getter: (p) => (
|
||||
<StatusLabel status={isPodReady(p) ? 'success' : 'warning'}>
|
||||
{isPodReady(p) ? 'Ready' : p.status?.phase ?? 'Unknown'}
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{
|
||||
label: 'Restarts',
|
||||
getter: (p) => {
|
||||
const restarts = p.status?.containerStatuses?.reduce(
|
||||
(sum, c) => sum + c.restartCount, 0
|
||||
) ?? 0;
|
||||
return restarts > 0 ? (
|
||||
<StatusLabel status="warning">{restarts}</StatusLabel>
|
||||
) : (
|
||||
String(restarts)
|
||||
);
|
||||
},
|
||||
},
|
||||
{ label: 'Age', getter: (p) => formatAge(p.metadata.creationTimestamp) },
|
||||
]}
|
||||
data={pluginPods}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,144 @@
|
||||
/**
|
||||
* NodeDetailSection — injected into Headlamp's native Node detail page.
|
||||
*
|
||||
* Shows Intel GPU resources available on the node (capacity, allocatable),
|
||||
* GPU type, and pods currently using GPU resources on this node.
|
||||
* Returns null for non-GPU nodes.
|
||||
*/
|
||||
|
||||
import {
|
||||
NameValueTable,
|
||||
SectionBox,
|
||||
StatusLabel,
|
||||
} from '@kinvolk/headlamp-plugin/lib/CommonComponents';
|
||||
import React from 'react';
|
||||
import { useIntelGpuContext } from '../api/IntelGpuDataContext';
|
||||
import {
|
||||
formatGpuResourceName,
|
||||
formatGpuType,
|
||||
getGpuResources,
|
||||
getNodeGpuType,
|
||||
INTEL_GPU_RESOURCE,
|
||||
INTEL_GPU_RESOURCE_PREFIX,
|
||||
INTEL_GPU_XE_RESOURCE,
|
||||
isIntelGpuNode,
|
||||
isNodeReady,
|
||||
} from '../api/k8s';
|
||||
|
||||
interface NodeDetailSectionProps {
|
||||
resource: {
|
||||
kind?: string;
|
||||
metadata?: { name?: string; labels?: Record<string, string> };
|
||||
jsonData?: unknown;
|
||||
// Headlamp KubeObject may expose status directly or via jsonData
|
||||
status?: unknown;
|
||||
};
|
||||
}
|
||||
|
||||
export default function NodeDetailSection({ resource }: NodeDetailSectionProps) {
|
||||
const { gpuPods, loading } = useIntelGpuContext();
|
||||
|
||||
// Extract the raw Kubernetes JSON — Headlamp KubeObject wraps it in jsonData
|
||||
const rawNode =
|
||||
resource.jsonData && typeof resource.jsonData === 'object'
|
||||
? resource.jsonData
|
||||
: resource;
|
||||
|
||||
// Only render for Node resources that have Intel GPU
|
||||
if (!isIntelGpuNode(rawNode)) return null;
|
||||
|
||||
const node = rawNode as Parameters<typeof isIntelGpuNode>[0] & {
|
||||
status?: {
|
||||
capacity?: Record<string, string>;
|
||||
allocatable?: Record<string, string>;
|
||||
nodeInfo?: { kernelVersion?: string; osImage?: string };
|
||||
};
|
||||
metadata: { name: string; labels?: Record<string, string> };
|
||||
};
|
||||
|
||||
const nodeName = (node as { metadata: { name: string } }).metadata.name;
|
||||
const capacity = getGpuResources((node as any).status?.capacity);
|
||||
const allocatable = getGpuResources((node as any).status?.allocatable);
|
||||
|
||||
const gpuType = getNodeGpuType(node as any);
|
||||
|
||||
// Find GPU pods scheduled on this node
|
||||
const podsOnNode = loading
|
||||
? []
|
||||
: gpuPods.filter(p => p.spec?.nodeName === nodeName);
|
||||
|
||||
if (Object.keys(capacity).length === 0 && Object.keys(allocatable).length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// GPU utilization: count GPU units used by running pods
|
||||
let gpuInUse = 0;
|
||||
let gpuAllocatable = 0;
|
||||
|
||||
for (const [key, val] of Object.entries(allocatable)) {
|
||||
if (key === INTEL_GPU_RESOURCE || key === INTEL_GPU_XE_RESOURCE) {
|
||||
gpuAllocatable += parseInt(val, 10) || 0;
|
||||
}
|
||||
}
|
||||
for (const pod of podsOnNode.filter(p => p.status?.phase === 'Running')) {
|
||||
const reqs = pod.spec?.containers?.flatMap(c =>
|
||||
Object.entries(c.resources?.requests ?? {}).filter(([k]) =>
|
||||
k === INTEL_GPU_RESOURCE || k === INTEL_GPU_XE_RESOURCE
|
||||
)
|
||||
) ?? [];
|
||||
for (const [, val] of reqs) {
|
||||
gpuInUse += parseInt(val, 10) || 0;
|
||||
}
|
||||
}
|
||||
|
||||
const utilizationPct =
|
||||
gpuAllocatable > 0 ? Math.round((gpuInUse / gpuAllocatable) * 100) : 0;
|
||||
const utilizationStatus: 'success' | 'warning' | 'error' =
|
||||
utilizationPct >= 90 ? 'error' : utilizationPct >= 70 ? 'warning' : 'success';
|
||||
|
||||
return (
|
||||
<SectionBox title="Intel GPU">
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{
|
||||
name: 'GPU Type',
|
||||
value: formatGpuType(gpuType),
|
||||
},
|
||||
// Capacity rows
|
||||
...Object.entries(capacity).map(([key, val]) => ({
|
||||
name: `${formatGpuResourceName(key)} (capacity)`,
|
||||
value: val,
|
||||
})),
|
||||
// Allocatable rows
|
||||
...Object.entries(allocatable).map(([key, val]) => ({
|
||||
name: `${formatGpuResourceName(key)} (allocatable)`,
|
||||
value: val,
|
||||
})),
|
||||
// Utilization
|
||||
...(gpuAllocatable > 0
|
||||
? [
|
||||
{
|
||||
name: 'GPU Utilization',
|
||||
value: (
|
||||
<StatusLabel status={utilizationStatus}>
|
||||
{`${gpuInUse}/${gpuAllocatable} (${utilizationPct}%)`}
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
]
|
||||
: []),
|
||||
// Workload pods
|
||||
{
|
||||
name: 'GPU Workload Pods',
|
||||
value:
|
||||
podsOnNode.length > 0
|
||||
? podsOnNode.map(p => p.metadata.name).join(', ')
|
||||
: loading
|
||||
? 'Loading…'
|
||||
: 'None',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,297 @@
|
||||
/**
|
||||
* NodesPage — lists all nodes with Intel GPU capabilities.
|
||||
*
|
||||
* Shows GPU type, device count, resource allocation, and pod assignments
|
||||
* for each GPU-capable node in the cluster.
|
||||
*/
|
||||
|
||||
import {
|
||||
Loader,
|
||||
NameValueTable,
|
||||
SectionBox,
|
||||
SectionHeader,
|
||||
SimpleTable,
|
||||
StatusLabel,
|
||||
} from '@kinvolk/headlamp-plugin/lib/CommonComponents';
|
||||
import React from 'react';
|
||||
import { useIntelGpuContext } from '../api/IntelGpuDataContext';
|
||||
import {
|
||||
formatAge,
|
||||
formatGpuResourceName,
|
||||
formatGpuType,
|
||||
getGpuResources,
|
||||
getNodeGpuCount,
|
||||
getNodeGpuType,
|
||||
INTEL_GPU_RESOURCE,
|
||||
INTEL_GPU_RESOURCE_PREFIX,
|
||||
INTEL_GPU_XE_RESOURCE,
|
||||
IntelGpuNode,
|
||||
isNodeReady,
|
||||
} from '../api/k8s';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// GPU allocation bar component
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function GpuAllocationBar({
|
||||
used,
|
||||
allocatable,
|
||||
}: {
|
||||
used: number;
|
||||
allocatable: number;
|
||||
}) {
|
||||
if (allocatable === 0) return <span>—</span>;
|
||||
const pct = Math.min(100, Math.round((used / allocatable) * 100));
|
||||
const color = pct >= 90 ? '#d32f2f' : pct >= 70 ? '#f57c00' : '#0071c5';
|
||||
|
||||
return (
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: '8px' }}>
|
||||
<div
|
||||
style={{
|
||||
width: '80px',
|
||||
height: '8px',
|
||||
backgroundColor: '#e0e0e0',
|
||||
borderRadius: '4px',
|
||||
overflow: 'hidden',
|
||||
}}
|
||||
>
|
||||
<div
|
||||
style={{
|
||||
width: `${pct}%`,
|
||||
height: '100%',
|
||||
backgroundColor: color,
|
||||
borderRadius: '4px',
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<span style={{ fontSize: '12px' }}>{`${used}/${allocatable} (${pct}%)`}</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Node detail card
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function NodeDetailCard({
|
||||
node,
|
||||
podsByNode,
|
||||
}: {
|
||||
node: IntelGpuNode;
|
||||
podsByNode: Map<string, string[]>;
|
||||
}) {
|
||||
const gpuType = getNodeGpuType(node);
|
||||
const gpuCount = getNodeGpuCount(node);
|
||||
const ready = isNodeReady(node);
|
||||
|
||||
const capacityResources = getGpuResources(node.status?.capacity);
|
||||
const allocatableResources = getGpuResources(node.status?.allocatable);
|
||||
|
||||
const podsOnNode = podsByNode.get(node.metadata.name) ?? [];
|
||||
|
||||
return (
|
||||
<SectionBox title={node.metadata.name}>
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{
|
||||
name: 'Status',
|
||||
value: (
|
||||
<StatusLabel status={ready ? 'success' : 'error'}>
|
||||
{ready ? 'Ready' : 'Not Ready'}
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'GPU Type',
|
||||
value: formatGpuType(gpuType),
|
||||
},
|
||||
...(gpuCount > 0
|
||||
? [{ name: 'GPU Devices (i915/xe)', value: String(gpuCount) }]
|
||||
: []),
|
||||
...Object.entries(capacityResources).map(([key, cap]) => {
|
||||
const alloc = parseInt(allocatableResources[key] ?? '0', 10);
|
||||
const total = parseInt(cap, 10);
|
||||
return {
|
||||
name: `${formatGpuResourceName(key)} (capacity)`,
|
||||
value: String(total),
|
||||
};
|
||||
}),
|
||||
...Object.entries(allocatableResources).map(([key, alloc]) => {
|
||||
return {
|
||||
name: `${formatGpuResourceName(key)} (allocatable)`,
|
||||
value: alloc ?? '0',
|
||||
};
|
||||
}),
|
||||
{
|
||||
name: 'GPU Workload Pods',
|
||||
value: podsOnNode.length > 0 ? podsOnNode.join(', ') : '—',
|
||||
},
|
||||
{
|
||||
name: 'OS Image',
|
||||
value: node.status?.nodeInfo?.osImage ?? '—',
|
||||
},
|
||||
{
|
||||
name: 'Kernel',
|
||||
value: node.status?.nodeInfo?.kernelVersion ?? '—',
|
||||
},
|
||||
{
|
||||
name: 'Kubelet',
|
||||
value: node.status?.nodeInfo?.kubeletVersion ?? '—',
|
||||
},
|
||||
{
|
||||
name: 'Age',
|
||||
value: formatAge(node.metadata.creationTimestamp),
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main component
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export default function NodesPage() {
|
||||
const { gpuNodes, gpuPods, loading, error, refresh } = useIntelGpuContext();
|
||||
|
||||
if (loading) {
|
||||
return <Loader title="Loading GPU node data..." />;
|
||||
}
|
||||
|
||||
// Build map: nodeName → list of GPU pod names
|
||||
const podsByNode = new Map<string, string[]>();
|
||||
for (const pod of gpuPods) {
|
||||
if (!pod.spec?.nodeName) continue;
|
||||
const existing = podsByNode.get(pod.spec.nodeName) ?? [];
|
||||
existing.push(pod.metadata.name);
|
||||
podsByNode.set(pod.spec.nodeName, existing);
|
||||
}
|
||||
|
||||
// Build table data for summary
|
||||
const tableData = gpuNodes.map(node => {
|
||||
const gpuType = getNodeGpuType(node);
|
||||
const gpuCount = getNodeGpuCount(node);
|
||||
const ready = isNodeReady(node);
|
||||
const capacity = node.status?.capacity ?? {};
|
||||
const allocatable = node.status?.allocatable ?? {};
|
||||
|
||||
let totalCapacity = 0;
|
||||
let totalAllocatable = 0;
|
||||
for (const key of Object.keys(capacity)) {
|
||||
if (key === INTEL_GPU_RESOURCE || key === INTEL_GPU_XE_RESOURCE) {
|
||||
totalCapacity += parseInt(capacity[key] ?? '0', 10);
|
||||
totalAllocatable += parseInt(allocatable[key] ?? '0', 10);
|
||||
}
|
||||
}
|
||||
|
||||
const podsOnNode = podsByNode.get(node.metadata.name) ?? [];
|
||||
|
||||
return {
|
||||
node,
|
||||
gpuType,
|
||||
gpuCount,
|
||||
ready,
|
||||
totalCapacity,
|
||||
totalAllocatable,
|
||||
podsOnNode,
|
||||
};
|
||||
});
|
||||
|
||||
return (
|
||||
<>
|
||||
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: '20px' }}>
|
||||
<SectionHeader title="Intel GPU — Nodes" />
|
||||
<button
|
||||
onClick={refresh}
|
||||
aria-label="Refresh node data"
|
||||
style={{
|
||||
padding: '6px 16px',
|
||||
backgroundColor: 'transparent',
|
||||
color: 'var(--mui-palette-primary-main, #0071c5)',
|
||||
border: '1px solid var(--mui-palette-primary-main, #0071c5)',
|
||||
borderRadius: '4px',
|
||||
cursor: 'pointer',
|
||||
fontSize: '13px',
|
||||
fontWeight: 500,
|
||||
}}
|
||||
>
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<SectionBox title="Error">
|
||||
<NameValueTable
|
||||
rows={[{ name: 'Status', value: <StatusLabel status="error">{error}</StatusLabel> }]}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{gpuNodes.length === 0 && (
|
||||
<SectionBox title="No GPU Nodes Found">
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{
|
||||
name: 'Status',
|
||||
value: (
|
||||
<StatusLabel status="warning">
|
||||
No nodes with Intel GPU resources or labels were found
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'Note',
|
||||
value:
|
||||
'Nodes appear here when they have gpu.intel.com/* resources or Intel GPU node labels. ' +
|
||||
'Ensure the Intel GPU device plugin and Node Feature Discovery are installed.',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{/* Summary table */}
|
||||
{gpuNodes.length > 0 && (
|
||||
<SectionBox title="GPU Node Summary">
|
||||
<SimpleTable
|
||||
columns={[
|
||||
{ label: 'Node', getter: (d) => d.node.metadata.name },
|
||||
{
|
||||
label: 'Ready',
|
||||
getter: (d) => (
|
||||
<StatusLabel status={d.ready ? 'success' : 'error'}>
|
||||
{d.ready ? 'Ready' : 'Not Ready'}
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{ label: 'GPU Type', getter: (d) => formatGpuType(d.gpuType) },
|
||||
{ label: 'GPU Devices', getter: (d) => String(d.gpuCount || '—') },
|
||||
{
|
||||
label: 'Allocation',
|
||||
getter: (d) => (
|
||||
<GpuAllocationBar
|
||||
used={d.podsOnNode.length}
|
||||
allocatable={d.totalAllocatable || d.gpuCount}
|
||||
/>
|
||||
),
|
||||
},
|
||||
{ label: 'GPU Pods', getter: (d) => String(d.podsOnNode.length) },
|
||||
{ label: 'Age', getter: (d) => formatAge(d.node.metadata.creationTimestamp) },
|
||||
]}
|
||||
data={tableData}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{/* Per-node detail cards */}
|
||||
{gpuNodes.map(node => (
|
||||
<NodeDetailCard
|
||||
key={node.metadata.uid ?? node.metadata.name}
|
||||
node={node}
|
||||
podsByNode={podsByNode}
|
||||
/>
|
||||
))}
|
||||
</>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,379 @@
|
||||
/**
|
||||
* OverviewPage — main dashboard for the Intel GPU plugin.
|
||||
*
|
||||
* Shows: plugin health, GPU node summary, resource allocation overview,
|
||||
* and pods requesting GPU resources.
|
||||
*/
|
||||
|
||||
import {
|
||||
Loader,
|
||||
NameValueTable,
|
||||
PercentageBar,
|
||||
SectionBox,
|
||||
SectionHeader,
|
||||
SimpleTable,
|
||||
StatusLabel,
|
||||
} from '@kinvolk/headlamp-plugin/lib/CommonComponents';
|
||||
import React from 'react';
|
||||
import { useIntelGpuContext } from '../api/IntelGpuDataContext';
|
||||
import {
|
||||
formatAge,
|
||||
formatGpuType,
|
||||
getNodeGpuCount,
|
||||
getNodeGpuType,
|
||||
getPodGpuRequests,
|
||||
INTEL_GPU_RESOURCE,
|
||||
INTEL_GPU_RESOURCE_PREFIX,
|
||||
INTEL_GPU_XE_RESOURCE,
|
||||
isNodeReady,
|
||||
isPodReady,
|
||||
pluginStatusText,
|
||||
pluginStatusToStatus,
|
||||
} from '../api/k8s';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// GPU type distribution chart
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function gpuTypeChartData(
|
||||
discreteCount: number,
|
||||
integratedCount: number,
|
||||
unknownCount: number
|
||||
): Array<{ name: string; value: number; fill: string }> {
|
||||
const data = [];
|
||||
if (discreteCount > 0) data.push({ name: 'Discrete', value: discreteCount, fill: '#0071c5' });
|
||||
if (integratedCount > 0) data.push({ name: 'Integrated', value: integratedCount, fill: '#60a4dc' });
|
||||
if (unknownCount > 0) data.push({ name: 'Unknown', value: unknownCount, fill: '#9e9e9e' });
|
||||
return data;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main component
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export default function OverviewPage() {
|
||||
const {
|
||||
devicePlugins,
|
||||
pluginInstalled,
|
||||
gpuNodes,
|
||||
gpuPods,
|
||||
pluginPods,
|
||||
crdAvailable,
|
||||
loading,
|
||||
error,
|
||||
refresh,
|
||||
} = useIntelGpuContext();
|
||||
|
||||
if (loading) {
|
||||
return <Loader title="Loading Intel GPU data..." />;
|
||||
}
|
||||
|
||||
// Node type breakdown
|
||||
let discreteCount = 0;
|
||||
let integratedCount = 0;
|
||||
let unknownCount = 0;
|
||||
let totalGpuCount = 0;
|
||||
let readyNodeCount = 0;
|
||||
|
||||
for (const node of gpuNodes) {
|
||||
const type = getNodeGpuType(node);
|
||||
if (type === 'discrete') discreteCount++;
|
||||
else if (type === 'integrated') integratedCount++;
|
||||
else unknownCount++;
|
||||
|
||||
totalGpuCount += getNodeGpuCount(node);
|
||||
if (isNodeReady(node)) readyNodeCount++;
|
||||
}
|
||||
|
||||
// GPU allocation summary: sum capacity vs allocatable across all GPU nodes
|
||||
let totalCapacityGpus = 0;
|
||||
let totalAllocatableGpus = 0;
|
||||
let totalAllocatedGpus = 0;
|
||||
|
||||
for (const node of gpuNodes) {
|
||||
const capacity = node.status?.capacity ?? {};
|
||||
const allocatable = node.status?.allocatable ?? {};
|
||||
for (const key of Object.keys(capacity)) {
|
||||
if (key === INTEL_GPU_RESOURCE || key === INTEL_GPU_XE_RESOURCE) {
|
||||
totalCapacityGpus += parseInt(capacity[key] ?? '0', 10);
|
||||
totalAllocatableGpus += parseInt(allocatable[key] ?? '0', 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Count GPUs in use from pods
|
||||
for (const pod of gpuPods) {
|
||||
if (pod.status?.phase !== 'Running') continue;
|
||||
const requests = getPodGpuRequests(pod);
|
||||
for (const [key, value] of Object.entries(requests)) {
|
||||
if (key === INTEL_GPU_RESOURCE || key === INTEL_GPU_XE_RESOURCE) {
|
||||
totalAllocatedGpus += parseInt(value, 10) || 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const gpuUtilizationPct =
|
||||
totalCapacityGpus > 0
|
||||
? Math.round((totalAllocatedGpus / totalCapacityGpus) * 100)
|
||||
: 0;
|
||||
|
||||
const chartData = gpuTypeChartData(discreteCount, integratedCount, unknownCount);
|
||||
const totalGpuNodes = gpuNodes.length;
|
||||
|
||||
// Pod phase breakdown
|
||||
const podPhaseCounts = { Running: 0, Pending: 0, Succeeded: 0, Failed: 0, Other: 0 };
|
||||
for (const pod of gpuPods) {
|
||||
const phase = pod.status?.phase ?? 'Other';
|
||||
if (phase in podPhaseCounts) {
|
||||
podPhaseCounts[phase as keyof typeof podPhaseCounts]++;
|
||||
} else {
|
||||
podPhaseCounts.Other++;
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: '20px' }}>
|
||||
<SectionHeader title="Intel GPU — Overview" />
|
||||
<button
|
||||
onClick={refresh}
|
||||
aria-label="Refresh Intel GPU data"
|
||||
style={{
|
||||
padding: '6px 16px',
|
||||
backgroundColor: 'transparent',
|
||||
color: 'var(--mui-palette-primary-main, #0071c5)',
|
||||
border: '1px solid var(--mui-palette-primary-main, #0071c5)',
|
||||
borderRadius: '4px',
|
||||
cursor: 'pointer',
|
||||
fontSize: '13px',
|
||||
fontWeight: 500,
|
||||
}}
|
||||
>
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Error state */}
|
||||
{error && (
|
||||
<SectionBox title="Error">
|
||||
<NameValueTable
|
||||
rows={[{ name: 'Status', value: <StatusLabel status="error">{error}</StatusLabel> }]}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{/* Plugin not detected */}
|
||||
{!pluginInstalled && !loading && (
|
||||
<SectionBox title="Plugin Not Detected">
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{
|
||||
name: 'Status',
|
||||
value: (
|
||||
<StatusLabel status="warning">
|
||||
Intel GPU device plugin not found on this cluster
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'Install (Helm)',
|
||||
value:
|
||||
'helm repo add intel https://intel.github.io/helm-charts && ' +
|
||||
'helm install intel-device-plugins-operator intel/intel-device-plugins-operator',
|
||||
},
|
||||
{
|
||||
name: 'Documentation',
|
||||
value: 'https://intel.github.io/intel-device-plugins-for-kubernetes/',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{/* CRD not available notice */}
|
||||
{!crdAvailable && pluginInstalled && (
|
||||
<SectionBox title="Notice">
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{
|
||||
name: 'CRD Status',
|
||||
value: (
|
||||
<StatusLabel status="warning">
|
||||
GpuDevicePlugin CRD not found — limited visibility available
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'Note',
|
||||
value:
|
||||
'Plugin pods detected via DaemonSet labels. Install the Intel Device Plugins Operator for full CRD-based management.',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{/* Device Plugin status — only shown when CRDs exist */}
|
||||
{crdAvailable && devicePlugins.length > 0 && (
|
||||
<SectionBox title="Device Plugin Status">
|
||||
<SimpleTable
|
||||
columns={[
|
||||
{ label: 'Name', getter: (p) => p.metadata.name },
|
||||
{
|
||||
label: 'Status',
|
||||
getter: (p) => (
|
||||
<StatusLabel status={pluginStatusToStatus(p)}>
|
||||
{pluginStatusText(p)}
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{
|
||||
label: 'Monitoring',
|
||||
getter: (p) => p.spec.enableMonitoring ? (
|
||||
<StatusLabel status="success">Enabled</StatusLabel>
|
||||
) : (
|
||||
<StatusLabel status="warning">Disabled</StatusLabel>
|
||||
),
|
||||
},
|
||||
{ label: 'Shared/Node', getter: (p) => String(p.spec.sharedDevNum ?? 1) },
|
||||
{ label: 'Policy', getter: (p) => p.spec.preferredAllocationPolicy ?? '—' },
|
||||
{ label: 'Age', getter: (p) => formatAge(p.metadata.creationTimestamp) },
|
||||
]}
|
||||
data={devicePlugins}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{/* Plugin daemon pods (shown when no CRD, or always as supplemental) */}
|
||||
{pluginPods.length > 0 && (
|
||||
<SectionBox title="Plugin Daemon Pods">
|
||||
<SimpleTable
|
||||
columns={[
|
||||
{ label: 'Name', getter: (p) => p.metadata.name },
|
||||
{ label: 'Namespace', getter: (p) => p.metadata.namespace ?? '—' },
|
||||
{ label: 'Node', getter: (p) => p.spec?.nodeName ?? '—' },
|
||||
{
|
||||
label: 'Status',
|
||||
getter: (p) => (
|
||||
<StatusLabel status={isPodReady(p) ? 'success' : 'warning'}>
|
||||
{isPodReady(p) ? 'Ready' : p.status?.phase ?? 'Unknown'}
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{ label: 'Age', getter: (p) => formatAge(p.metadata.creationTimestamp) },
|
||||
]}
|
||||
data={pluginPods}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{/* GPU Node summary */}
|
||||
<SectionBox title="GPU Nodes">
|
||||
{totalGpuNodes > 0 && chartData.length > 0 && (
|
||||
<div style={{ marginBottom: '16px' }}>
|
||||
<div style={{ marginBottom: '8px', fontSize: '14px', color: 'var(--mui-palette-text-secondary)' }}>
|
||||
GPU Type Distribution
|
||||
</div>
|
||||
<PercentageBar data={chartData} total={totalGpuNodes} />
|
||||
</div>
|
||||
)}
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{
|
||||
name: 'Total GPU Nodes',
|
||||
value: (
|
||||
<StatusLabel status={totalGpuNodes > 0 ? 'success' : 'warning'}>
|
||||
{totalGpuNodes}
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{ name: 'Ready Nodes', value: String(readyNodeCount) },
|
||||
...(discreteCount > 0 ? [{ name: 'Discrete GPU Nodes', value: String(discreteCount) }] : []),
|
||||
...(integratedCount > 0 ? [{ name: 'Integrated GPU Nodes', value: String(integratedCount) }] : []),
|
||||
...(totalGpuCount > 0 ? [{ name: 'Total GPU Devices', value: String(totalGpuCount) }] : []),
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
|
||||
{/* GPU allocation summary */}
|
||||
{totalCapacityGpus > 0 && (
|
||||
<SectionBox title="GPU Allocation">
|
||||
<div style={{ marginBottom: '16px' }}>
|
||||
<div style={{ marginBottom: '8px', fontSize: '14px', color: 'var(--mui-palette-text-secondary)' }}>
|
||||
GPU Utilization ({gpuUtilizationPct}%)
|
||||
</div>
|
||||
<PercentageBar
|
||||
data={[
|
||||
{ name: 'In Use', value: totalAllocatedGpus, fill: '#0071c5' },
|
||||
{ name: 'Available', value: totalAllocatableGpus - totalAllocatedGpus, fill: '#e0e0e0' },
|
||||
]}
|
||||
total={totalAllocatableGpus}
|
||||
/>
|
||||
</div>
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{ name: 'Total Capacity (GPU devices)', value: String(totalCapacityGpus) },
|
||||
{ name: 'Allocatable', value: String(totalAllocatableGpus) },
|
||||
{ name: 'In Use', value: String(totalAllocatedGpus) },
|
||||
{
|
||||
name: 'Free',
|
||||
value: (
|
||||
<StatusLabel
|
||||
status={totalAllocatableGpus - totalAllocatedGpus > 0 ? 'success' : 'warning'}
|
||||
>
|
||||
{totalAllocatableGpus - totalAllocatedGpus}
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{/* GPU workloads summary */}
|
||||
<SectionBox title="GPU Workloads">
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{ name: 'Total GPU Pods', value: String(gpuPods.length) },
|
||||
...(podPhaseCounts.Running > 0
|
||||
? [{ name: 'Running', value: <StatusLabel status="success">{podPhaseCounts.Running}</StatusLabel> }]
|
||||
: []),
|
||||
...(podPhaseCounts.Pending > 0
|
||||
? [{ name: 'Pending', value: <StatusLabel status="warning">{podPhaseCounts.Pending}</StatusLabel> }]
|
||||
: []),
|
||||
...(podPhaseCounts.Failed > 0
|
||||
? [{ name: 'Failed', value: <StatusLabel status="error">{podPhaseCounts.Failed}</StatusLabel> }]
|
||||
: []),
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
|
||||
{/* Active GPU pods list (running only, trimmed to top 10) */}
|
||||
{gpuPods.filter(p => p.status?.phase === 'Running').length > 0 && (
|
||||
<SectionBox title="Active GPU Pods">
|
||||
<SimpleTable
|
||||
columns={[
|
||||
{ label: 'Name', getter: (p) => p.metadata.name },
|
||||
{ label: 'Namespace', getter: (p) => p.metadata.namespace ?? '—' },
|
||||
{ label: 'Node', getter: (p) => p.spec?.nodeName ?? '—' },
|
||||
{
|
||||
label: 'GPU Request',
|
||||
getter: (p) => {
|
||||
const reqs = getPodGpuRequests(p);
|
||||
const parts: string[] = [];
|
||||
for (const [key, val] of Object.entries(reqs)) {
|
||||
const shortKey = key.replace(INTEL_GPU_RESOURCE_PREFIX, '');
|
||||
parts.push(`${shortKey}: ${val}`);
|
||||
}
|
||||
return parts.join(', ') || '—';
|
||||
},
|
||||
},
|
||||
{ label: 'Age', getter: (p) => formatAge(p.metadata.creationTimestamp) },
|
||||
]}
|
||||
data={gpuPods.filter(p => p.status?.phase === 'Running').slice(0, 10)}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
/**
|
||||
* PodDetailSection — injected into Headlamp's native Pod detail page.
|
||||
*
|
||||
* Shows Intel GPU resource requests and limits per container, plus
|
||||
* a link to the node's GPU summary.
|
||||
* Returns null for pods that don't request Intel GPU resources.
|
||||
*/
|
||||
|
||||
import {
|
||||
NameValueTable,
|
||||
SectionBox,
|
||||
StatusLabel,
|
||||
} from '@kinvolk/headlamp-plugin/lib/CommonComponents';
|
||||
import React from 'react';
|
||||
import { formatGpuResourceName, INTEL_GPU_RESOURCE_PREFIX, isGpuRequestingPod } from '../api/k8s';
|
||||
|
||||
interface PodDetailSectionProps {
|
||||
resource: {
|
||||
kind?: string;
|
||||
metadata?: { name?: string; namespace?: string };
|
||||
jsonData?: unknown;
|
||||
};
|
||||
}
|
||||
|
||||
export default function PodDetailSection({ resource }: PodDetailSectionProps) {
|
||||
// Extract raw Kubernetes JSON
|
||||
const rawPod =
|
||||
resource.jsonData && typeof resource.jsonData === 'object'
|
||||
? resource.jsonData
|
||||
: resource;
|
||||
|
||||
// Only render for pods that request Intel GPU resources
|
||||
if (!isGpuRequestingPod(rawPod)) return null;
|
||||
|
||||
const pod = rawPod as {
|
||||
metadata: { name: string; namespace?: string };
|
||||
spec?: {
|
||||
nodeName?: string;
|
||||
containers?: Array<{
|
||||
name: string;
|
||||
resources?: {
|
||||
requests?: Record<string, string>;
|
||||
limits?: Record<string, string>;
|
||||
};
|
||||
}>;
|
||||
};
|
||||
status?: { phase?: string };
|
||||
};
|
||||
|
||||
const containers = pod.spec?.containers ?? [];
|
||||
const gpuContainers = containers.filter(c => {
|
||||
const all = { ...c.resources?.requests, ...c.resources?.limits };
|
||||
return Object.keys(all).some(k => k.startsWith(INTEL_GPU_RESOURCE_PREFIX));
|
||||
});
|
||||
|
||||
if (gpuContainers.length === 0) return null;
|
||||
|
||||
// Build rows: one per container per GPU resource
|
||||
const rows: Array<{ name: string; value: React.ReactNode }> = [];
|
||||
|
||||
for (const c of gpuContainers) {
|
||||
const requests = c.resources?.requests ?? {};
|
||||
const limits = c.resources?.limits ?? {};
|
||||
const allGpuKeys = new Set([
|
||||
...Object.keys(requests).filter(k => k.startsWith(INTEL_GPU_RESOURCE_PREFIX)),
|
||||
...Object.keys(limits).filter(k => k.startsWith(INTEL_GPU_RESOURCE_PREFIX)),
|
||||
]);
|
||||
|
||||
for (const key of allGpuKeys) {
|
||||
const req = requests[key];
|
||||
const lim = limits[key];
|
||||
const resourceName = formatGpuResourceName(key);
|
||||
|
||||
rows.push({
|
||||
name: `${c.name} → ${resourceName} request`,
|
||||
value: req ?? '—',
|
||||
});
|
||||
if (lim && lim !== req) {
|
||||
rows.push({
|
||||
name: `${c.name} → ${resourceName} limit`,
|
||||
value: lim,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const phase = pod.status?.phase;
|
||||
const phaseStatus: 'success' | 'warning' | 'error' =
|
||||
phase === 'Running' || phase === 'Succeeded'
|
||||
? 'success'
|
||||
: phase === 'Pending'
|
||||
? 'warning'
|
||||
: 'error';
|
||||
|
||||
return (
|
||||
<SectionBox title="Intel GPU Resources">
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{
|
||||
name: 'Phase',
|
||||
value: (
|
||||
<StatusLabel status={phaseStatus}>{phase ?? 'Unknown'}</StatusLabel>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'Scheduled Node',
|
||||
value: pod.spec?.nodeName ?? '—',
|
||||
},
|
||||
{
|
||||
name: 'GPU Containers',
|
||||
value: String(gpuContainers.length),
|
||||
},
|
||||
...rows,
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,246 @@
|
||||
/**
|
||||
* PodsPage — lists all pods requesting Intel GPU resources.
|
||||
*
|
||||
* Shows GPU resource requests/limits per container and pod-level status.
|
||||
*/
|
||||
|
||||
import {
|
||||
Loader,
|
||||
NameValueTable,
|
||||
SectionBox,
|
||||
SectionHeader,
|
||||
SimpleTable,
|
||||
StatusLabel,
|
||||
} from '@kinvolk/headlamp-plugin/lib/CommonComponents';
|
||||
import React from 'react';
|
||||
import { useIntelGpuContext } from '../api/IntelGpuDataContext';
|
||||
import {
|
||||
formatAge,
|
||||
formatGpuResourceName,
|
||||
IntelGpuPod,
|
||||
INTEL_GPU_RESOURCE_PREFIX,
|
||||
isPodReady,
|
||||
getPodGpuRequests,
|
||||
getPodRestarts,
|
||||
} from '../api/k8s';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Phase → status mapping
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function phaseToStatus(phase: string | undefined): 'success' | 'warning' | 'error' {
|
||||
switch (phase) {
|
||||
case 'Running': return 'success';
|
||||
case 'Succeeded': return 'success';
|
||||
case 'Pending': return 'warning';
|
||||
case 'Failed': return 'error';
|
||||
default: return 'warning';
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// GPU container list for a pod
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function GpuContainerList({ pod }: { pod: IntelGpuPod }) {
|
||||
const containers = pod.spec?.containers ?? [];
|
||||
const gpuContainers = containers.filter(c => {
|
||||
const resources = { ...c.resources?.requests, ...c.resources?.limits };
|
||||
return Object.keys(resources).some(k => k.startsWith(INTEL_GPU_RESOURCE_PREFIX));
|
||||
});
|
||||
|
||||
if (gpuContainers.length === 0) return <span>—</span>;
|
||||
|
||||
return (
|
||||
<>
|
||||
{gpuContainers.map(c => {
|
||||
const requests = c.resources?.requests ?? {};
|
||||
const limits = c.resources?.limits ?? {};
|
||||
const gpuKeys = new Set([
|
||||
...Object.keys(requests).filter(k => k.startsWith(INTEL_GPU_RESOURCE_PREFIX)),
|
||||
...Object.keys(limits).filter(k => k.startsWith(INTEL_GPU_RESOURCE_PREFIX)),
|
||||
]);
|
||||
|
||||
const parts: string[] = [];
|
||||
for (const key of gpuKeys) {
|
||||
const shortKey = formatGpuResourceName(key);
|
||||
const req = requests[key];
|
||||
const lim = limits[key];
|
||||
if (req && lim && req === lim) {
|
||||
parts.push(`${shortKey}: ${req}`);
|
||||
} else if (req || lim) {
|
||||
parts.push(`${shortKey}: req=${req ?? '—'} lim=${lim ?? '—'}`);
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div key={c.name} style={{ marginBottom: '2px', fontSize: '13px' }}>
|
||||
<strong>{c.name}</strong>: {parts.join(', ')}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main component
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export default function PodsPage() {
|
||||
const { gpuPods, loading, error, refresh } = useIntelGpuContext();
|
||||
|
||||
if (loading) {
|
||||
return <Loader title="Loading GPU pod data..." />;
|
||||
}
|
||||
|
||||
// Group by phase
|
||||
const running = gpuPods.filter(p => p.status?.phase === 'Running');
|
||||
const pending = gpuPods.filter(p => p.status?.phase === 'Pending');
|
||||
const failed = gpuPods.filter(p => p.status?.phase === 'Failed');
|
||||
const other = gpuPods.filter(
|
||||
p => !['Running', 'Pending', 'Failed'].includes(p.status?.phase ?? '')
|
||||
);
|
||||
|
||||
return (
|
||||
<>
|
||||
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: '20px' }}>
|
||||
<SectionHeader title="Intel GPU — Pods" />
|
||||
<button
|
||||
onClick={refresh}
|
||||
aria-label="Refresh pod data"
|
||||
style={{
|
||||
padding: '6px 16px',
|
||||
backgroundColor: 'transparent',
|
||||
color: 'var(--mui-palette-primary-main, #0071c5)',
|
||||
border: '1px solid var(--mui-palette-primary-main, #0071c5)',
|
||||
borderRadius: '4px',
|
||||
cursor: 'pointer',
|
||||
fontSize: '13px',
|
||||
fontWeight: 500,
|
||||
}}
|
||||
>
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<SectionBox title="Error">
|
||||
<NameValueTable
|
||||
rows={[{ name: 'Status', value: <StatusLabel status="error">{error}</StatusLabel> }]}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{gpuPods.length === 0 && (
|
||||
<SectionBox title="No GPU Pods Found">
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{
|
||||
name: 'Status',
|
||||
value: (
|
||||
<StatusLabel status="warning">
|
||||
No pods requesting Intel GPU resources were found
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{
|
||||
name: 'Note',
|
||||
value:
|
||||
'Pods appear here when they request resources like gpu.intel.com/i915 or gpu.intel.com/xe.',
|
||||
},
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{/* Summary */}
|
||||
{gpuPods.length > 0 && (
|
||||
<SectionBox title="Summary">
|
||||
<NameValueTable
|
||||
rows={[
|
||||
{ name: 'Total GPU Pods', value: String(gpuPods.length) },
|
||||
...(running.length > 0
|
||||
? [{ name: 'Running', value: <StatusLabel status="success">{running.length}</StatusLabel> }]
|
||||
: []),
|
||||
...(pending.length > 0
|
||||
? [{ name: 'Pending', value: <StatusLabel status="warning">{pending.length}</StatusLabel> }]
|
||||
: []),
|
||||
...(failed.length > 0
|
||||
? [{ name: 'Failed', value: <StatusLabel status="error">{failed.length}</StatusLabel> }]
|
||||
: []),
|
||||
]}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{/* All pods table */}
|
||||
{gpuPods.length > 0 && (
|
||||
<SectionBox title="All GPU Pods">
|
||||
<SimpleTable
|
||||
columns={[
|
||||
{ label: 'Name', getter: (p) => p.metadata.name },
|
||||
{ label: 'Namespace', getter: (p) => p.metadata.namespace ?? '—' },
|
||||
{ label: 'Node', getter: (p) => p.spec?.nodeName ?? '—' },
|
||||
{
|
||||
label: 'Phase',
|
||||
getter: (p) => (
|
||||
<StatusLabel status={phaseToStatus(p.status?.phase)}>
|
||||
{p.status?.phase ?? 'Unknown'}
|
||||
</StatusLabel>
|
||||
),
|
||||
},
|
||||
{
|
||||
label: 'GPU Resources',
|
||||
getter: (p) => <GpuContainerList pod={p} />,
|
||||
},
|
||||
{
|
||||
label: 'Restarts',
|
||||
getter: (p) => {
|
||||
const restarts = getPodRestarts(p);
|
||||
return restarts > 0 ? (
|
||||
<StatusLabel status="warning">{restarts}</StatusLabel>
|
||||
) : (
|
||||
String(restarts)
|
||||
);
|
||||
},
|
||||
},
|
||||
{ label: 'Age', getter: (p) => formatAge(p.metadata.creationTimestamp) },
|
||||
]}
|
||||
data={gpuPods}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
|
||||
{/* Pending pods attention box */}
|
||||
{pending.length > 0 && (
|
||||
<SectionBox title="Attention: Pending GPU Pods">
|
||||
<SimpleTable
|
||||
columns={[
|
||||
{ label: 'Name', getter: (p) => p.metadata.name },
|
||||
{ label: 'Namespace', getter: (p) => p.metadata.namespace ?? '—' },
|
||||
{
|
||||
label: 'GPU Resources',
|
||||
getter: (p) => {
|
||||
const reqs = getPodGpuRequests(p);
|
||||
return Object.entries(reqs)
|
||||
.map(([k, v]) => `${formatGpuResourceName(k)}: ${v}`)
|
||||
.join(', ') || '—';
|
||||
},
|
||||
},
|
||||
{
|
||||
label: 'Waiting Reason',
|
||||
getter: (p) => {
|
||||
const reason = p.status?.containerStatuses?.[0]?.state?.waiting?.reason;
|
||||
return reason ?? '—';
|
||||
},
|
||||
},
|
||||
{ label: 'Age', getter: (p) => formatAge(p.metadata.creationTimestamp) },
|
||||
]}
|
||||
data={pending}
|
||||
/>
|
||||
</SectionBox>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
/**
|
||||
* NodeColumns — adds Intel GPU columns to the native Headlamp Nodes table.
|
||||
*
|
||||
* Injects two columns:
|
||||
* - "GPU Type" — Discrete / Integrated / — for non-GPU nodes
|
||||
* - "GPU Devices" — count of i915/xe devices available on the node
|
||||
*
|
||||
* The processor is registered via registerResourceTableColumnsProcessor
|
||||
* in index.tsx, targeting the 'headlamp-nodes' table ID.
|
||||
*/
|
||||
|
||||
import { StatusLabel } from '@kinvolk/headlamp-plugin/lib/CommonComponents';
|
||||
import React from 'react';
|
||||
import {
|
||||
formatGpuType,
|
||||
getNodeGpuCount,
|
||||
getNodeGpuType,
|
||||
isIntelGpuNode,
|
||||
} from '../../api/k8s';
|
||||
|
||||
/** Build GPU columns to append to the native Nodes table. */
|
||||
export function buildNodeGpuColumns() {
|
||||
return [
|
||||
{
|
||||
label: 'GPU Type',
|
||||
getter: (resource: unknown) => {
|
||||
// resource is a Headlamp KubeObject — extract jsonData
|
||||
const raw =
|
||||
resource && typeof resource === 'object' && 'jsonData' in resource
|
||||
? (resource as { jsonData: unknown }).jsonData
|
||||
: resource;
|
||||
|
||||
if (!isIntelGpuNode(raw)) return '—';
|
||||
const node = raw as Parameters<typeof getNodeGpuType>[0];
|
||||
const type = getNodeGpuType(node);
|
||||
return (
|
||||
<StatusLabel status="success">
|
||||
{formatGpuType(type)}
|
||||
</StatusLabel>
|
||||
);
|
||||
},
|
||||
},
|
||||
{
|
||||
label: 'GPU Devices',
|
||||
getter: (resource: unknown) => {
|
||||
const raw =
|
||||
resource && typeof resource === 'object' && 'jsonData' in resource
|
||||
? (resource as { jsonData: unknown }).jsonData
|
||||
: resource;
|
||||
|
||||
if (!isIntelGpuNode(raw)) return '—';
|
||||
const node = raw as Parameters<typeof getNodeGpuCount>[0];
|
||||
const count = getNodeGpuCount(node);
|
||||
return count > 0 ? String(count) : '—';
|
||||
},
|
||||
},
|
||||
];
|
||||
}
|
||||
Reference in New Issue
Block a user