feat: initial release of headlamp-intel-gpu-plugin v0.1.0
Adds a Headlamp plugin for Intel GPU device plugin visibility: - Dedicated sidebar section: Overview, Device Plugins, GPU Nodes, GPU Pods - Native Node detail page injection: GPU capacity, allocatable, utilization, active pods - Native Pod detail page injection: per-container GPU resource requests/limits - Native Nodes table: GPU Type and GPU Devices columns - App bar health badge (hidden when plugin not installed) - GpuDevicePlugin CRD monitoring (deviceplugin.intel.com/v1) with graceful degradation when CRD is not present - Supports discrete (i915), Xe, and integrated GPU nodes via node labels - 48 unit tests, TypeScript clean, 28 kB production bundle Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
@@ -0,0 +1,230 @@
|
||||
/**
|
||||
* IntelGpuDataContext — shared data provider for Intel GPU device plugin resources.
|
||||
*
|
||||
* Wraps K8s hook calls and ApiProxy requests, providing filtered Intel GPU
|
||||
* resources to all child pages through React context, avoiding prop drilling
|
||||
* and duplicate API calls.
|
||||
*/
|
||||
|
||||
import { ApiProxy, K8s } from '@kinvolk/headlamp-plugin/lib';
|
||||
import React, { createContext, useCallback, useContext, useEffect, useMemo, useState } from 'react';
|
||||
import {
|
||||
filterGpuRequestingPods,
|
||||
filterIntelGpuNodes,
|
||||
filterIntelGpuPluginPods,
|
||||
GpuDevicePlugin,
|
||||
INTEL_DEVICE_PLUGIN_API_GROUP,
|
||||
INTEL_DEVICE_PLUGIN_API_VERSION,
|
||||
IntelGpuNode,
|
||||
IntelGpuPod,
|
||||
isGpuDevicePlugin,
|
||||
isKubeList,
|
||||
} from './k8s';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Context shape
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface IntelGpuContextValue {
|
||||
/** GpuDevicePlugin CRD instances — one per GPU type/config */
|
||||
devicePlugins: GpuDevicePlugin[];
|
||||
/** True if at least one GpuDevicePlugin CR exists */
|
||||
pluginInstalled: boolean;
|
||||
|
||||
/** Nodes that have Intel GPU resources or labels */
|
||||
gpuNodes: IntelGpuNode[];
|
||||
|
||||
/** Pods requesting Intel GPU resources */
|
||||
gpuPods: IntelGpuPod[];
|
||||
|
||||
/** Intel GPU device plugin daemon pods */
|
||||
pluginPods: IntelGpuPod[];
|
||||
|
||||
/** True if the GpuDevicePlugin CRD is available on the cluster */
|
||||
crdAvailable: boolean;
|
||||
|
||||
/** Loading / error state */
|
||||
loading: boolean;
|
||||
error: string | null;
|
||||
|
||||
/** Manual refresh trigger */
|
||||
refresh: () => void;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Context
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const IntelGpuContext = createContext<IntelGpuContextValue | null>(null);
|
||||
|
||||
export function useIntelGpuContext(): IntelGpuContextValue {
|
||||
const ctx = useContext(IntelGpuContext);
|
||||
if (!ctx) {
|
||||
throw new Error('useIntelGpuContext must be used within an IntelGpuDataProvider');
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Provider
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function IntelGpuDataProvider({ children }: { children: React.ReactNode }) {
|
||||
// K8s resource hooks — headlamp re-fetches on cluster context changes
|
||||
const [allNodes, nodeError] = K8s.ResourceClasses.Node.useList();
|
||||
const [allPods, podError] = K8s.ResourceClasses.Pod.useList({ namespace: '' });
|
||||
|
||||
// Async state for CRD resources
|
||||
const [devicePlugins, setDevicePlugins] = useState<GpuDevicePlugin[]>([]);
|
||||
const [pluginPods, setPluginPods] = useState<IntelGpuPod[]>([]);
|
||||
const [crdAvailable, setCrdAvailable] = useState(false);
|
||||
const [asyncLoading, setAsyncLoading] = useState(true);
|
||||
const [asyncError, setAsyncError] = useState<string | null>(null);
|
||||
const [refreshKey, setRefreshKey] = useState(0);
|
||||
|
||||
const refresh = useCallback(() => {
|
||||
setRefreshKey(k => k + 1);
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false;
|
||||
|
||||
async function fetchAsync() {
|
||||
setAsyncLoading(true);
|
||||
setAsyncError(null);
|
||||
|
||||
try {
|
||||
// GpuDevicePlugin CRDs — graceful degradation if CRD not installed
|
||||
try {
|
||||
const pluginList = await ApiProxy.request(
|
||||
`/apis/${INTEL_DEVICE_PLUGIN_API_GROUP}/${INTEL_DEVICE_PLUGIN_API_VERSION}/gpudeviceplugins`
|
||||
);
|
||||
if (!cancelled && isKubeList(pluginList)) {
|
||||
setCrdAvailable(true);
|
||||
setDevicePlugins(pluginList.items.filter(isGpuDevicePlugin));
|
||||
}
|
||||
} catch {
|
||||
if (!cancelled) {
|
||||
setCrdAvailable(false);
|
||||
setDevicePlugins([]);
|
||||
}
|
||||
}
|
||||
|
||||
// Intel GPU plugin DaemonSet pods — look across all namespaces
|
||||
// The device plugin is commonly deployed in kube-system but may vary
|
||||
const pluginPodSelectors = [
|
||||
// Intel device plugins operator deployment
|
||||
`/api/v1/pods?labelSelector=${encodeURIComponent('app=intel-gpu-plugin')}`,
|
||||
// Alternative: by component label
|
||||
`/api/v1/pods?labelSelector=${encodeURIComponent('app.kubernetes.io/name=intel-gpu-plugin')}`,
|
||||
// Intel device plugins from inteldeviceplugins-system namespace
|
||||
`/api/v1/namespaces/inteldeviceplugins-system/pods`,
|
||||
];
|
||||
|
||||
const foundPluginPods: IntelGpuPod[] = [];
|
||||
|
||||
for (const url of pluginPodSelectors) {
|
||||
try {
|
||||
const list = await ApiProxy.request(url);
|
||||
if (!cancelled && isKubeList(list)) {
|
||||
const gpuPluinPods = filterIntelGpuPluginPods(list.items);
|
||||
foundPluginPods.push(...gpuPluinPods);
|
||||
}
|
||||
} catch {
|
||||
// Silently ignore — some selectors may not match
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate by pod UID
|
||||
const seen = new Set<string>();
|
||||
const uniquePluginPods = foundPluginPods.filter(p => {
|
||||
const uid = p.metadata.uid;
|
||||
if (!uid || seen.has(uid)) return false;
|
||||
seen.add(uid);
|
||||
return true;
|
||||
});
|
||||
|
||||
if (!cancelled) setPluginPods(uniquePluginPods);
|
||||
} catch (err: unknown) {
|
||||
if (!cancelled) {
|
||||
setAsyncError(err instanceof Error ? err.message : String(err));
|
||||
}
|
||||
} finally {
|
||||
if (!cancelled) setAsyncLoading(false);
|
||||
}
|
||||
}
|
||||
|
||||
void fetchAsync();
|
||||
return () => { cancelled = true; };
|
||||
}, [refreshKey]);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Derived / filtered values — memoized to avoid recomputation on every render
|
||||
//
|
||||
// Headlamp useList() returns KubeObject class instances that store raw
|
||||
// Kubernetes JSON under `.jsonData`. Extract jsonData so our plain-object
|
||||
// type helpers work correctly.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const extractJsonData = (items: unknown[]): unknown[] =>
|
||||
items.map(item =>
|
||||
item && typeof item === 'object' && 'jsonData' in item
|
||||
? (item as { jsonData: unknown }).jsonData
|
||||
: item
|
||||
);
|
||||
|
||||
const gpuNodes = useMemo(() => {
|
||||
if (!allNodes) return [];
|
||||
return filterIntelGpuNodes(extractJsonData(allNodes as unknown[]));
|
||||
}, [allNodes]);
|
||||
|
||||
const gpuPods = useMemo(() => {
|
||||
if (!allPods) return [];
|
||||
return filterGpuRequestingPods(extractJsonData(allPods as unknown[]));
|
||||
}, [allPods]);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Combined loading / error state
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const loading = asyncLoading || !allNodes || !allPods;
|
||||
|
||||
const errors: string[] = [];
|
||||
if (nodeError) errors.push(String(nodeError));
|
||||
if (podError) errors.push(String(podError));
|
||||
if (asyncError) errors.push(asyncError);
|
||||
const error = errors.length > 0 ? errors.join('; ') : null;
|
||||
|
||||
const pluginInstalled = devicePlugins.length > 0 || pluginPods.length > 0;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Memoized context value
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const value = useMemo<IntelGpuContextValue>(
|
||||
() => ({
|
||||
devicePlugins,
|
||||
pluginInstalled,
|
||||
gpuNodes,
|
||||
gpuPods,
|
||||
pluginPods,
|
||||
crdAvailable,
|
||||
loading,
|
||||
error,
|
||||
refresh,
|
||||
}),
|
||||
[
|
||||
devicePlugins,
|
||||
pluginInstalled,
|
||||
gpuNodes,
|
||||
gpuPods,
|
||||
pluginPods,
|
||||
crdAvailable,
|
||||
loading,
|
||||
error,
|
||||
refresh,
|
||||
]
|
||||
);
|
||||
|
||||
return <IntelGpuContext.Provider value={value}>{children}</IntelGpuContext.Provider>;
|
||||
}
|
||||
@@ -0,0 +1,477 @@
|
||||
/**
|
||||
* Unit tests for Intel GPU k8s helper functions.
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
filterGpuRequestingPods,
|
||||
filterIntelGpuNodes,
|
||||
formatAge,
|
||||
formatGpuResourceName,
|
||||
formatGpuType,
|
||||
getNodeGpuCount,
|
||||
getNodeGpuType,
|
||||
getPodGpuRequests,
|
||||
INTEL_GPU_NODE_LABEL,
|
||||
INTEL_GPU_RESOURCE,
|
||||
INTEL_GPU_XE_RESOURCE,
|
||||
isGpuRequestingPod,
|
||||
isIntelGpuNode,
|
||||
isKubeList,
|
||||
isNodeReady,
|
||||
pluginStatusText,
|
||||
pluginStatusToStatus,
|
||||
type GpuDevicePlugin,
|
||||
type IntelGpuNode,
|
||||
type IntelGpuPod,
|
||||
} from './k8s';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeNode(overrides: Record<string, unknown> = {}): IntelGpuNode {
|
||||
return {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Node',
|
||||
metadata: { name: 'test-node' },
|
||||
status: {},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeGpuNode(type: 'discrete' | 'integrated' | 'generic' = 'discrete'): IntelGpuNode {
|
||||
const labels: Record<string, string> = {};
|
||||
if (type === 'discrete') labels['node-role.kubernetes.io/gpu'] = 'true';
|
||||
if (type === 'integrated') labels['node-role.kubernetes.io/igpu'] = 'true';
|
||||
if (type === 'generic') labels[INTEL_GPU_NODE_LABEL] = 'true';
|
||||
|
||||
return {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Node',
|
||||
metadata: { name: 'gpu-node', labels },
|
||||
status: {
|
||||
capacity: { [INTEL_GPU_RESOURCE]: '2' },
|
||||
allocatable: { [INTEL_GPU_RESOURCE]: '2' },
|
||||
conditions: [{ type: 'Ready', status: 'True' }],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function makeGpuPod(gpuResourceKey: string = INTEL_GPU_RESOURCE, amount = '1'): IntelGpuPod {
|
||||
return {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Pod',
|
||||
metadata: { name: 'gpu-pod', namespace: 'default' },
|
||||
spec: {
|
||||
nodeName: 'gpu-node',
|
||||
containers: [
|
||||
{
|
||||
name: 'workload',
|
||||
resources: {
|
||||
requests: { [gpuResourceKey]: amount },
|
||||
limits: { [gpuResourceKey]: amount },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
status: { phase: 'Running' },
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// isIntelGpuNode
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('isIntelGpuNode', () => {
|
||||
it('returns true for nodes with discrete GPU label', () => {
|
||||
const node = makeGpuNode('discrete');
|
||||
expect(isIntelGpuNode(node)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns true for nodes with integrated GPU label', () => {
|
||||
const node = makeGpuNode('integrated');
|
||||
expect(isIntelGpuNode(node)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns true for nodes with generic Intel GPU label', () => {
|
||||
const node = makeGpuNode('generic');
|
||||
expect(isIntelGpuNode(node)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns true for nodes with gpu.intel.com/* in capacity', () => {
|
||||
const node = makeNode({
|
||||
status: { capacity: { 'gpu.intel.com/i915': '1' } },
|
||||
});
|
||||
expect(isIntelGpuNode(node)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for nodes with no GPU labels or resources', () => {
|
||||
const node = makeNode({
|
||||
metadata: { name: 'regular-node', labels: {} },
|
||||
status: { capacity: { cpu: '8', memory: '16Gi' } },
|
||||
});
|
||||
expect(isIntelGpuNode(node)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false for null/undefined', () => {
|
||||
expect(isIntelGpuNode(null)).toBe(false);
|
||||
expect(isIntelGpuNode(undefined)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// filterIntelGpuNodes
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('filterIntelGpuNodes', () => {
|
||||
it('filters out non-GPU nodes', () => {
|
||||
const gpuNode = makeGpuNode('discrete');
|
||||
const regularNode = makeNode({ metadata: { name: 'regular' } });
|
||||
const result = filterIntelGpuNodes([gpuNode, regularNode]);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].metadata.name).toBe('gpu-node');
|
||||
});
|
||||
|
||||
it('handles empty array', () => {
|
||||
expect(filterIntelGpuNodes([])).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getNodeGpuType
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('getNodeGpuType', () => {
|
||||
it('returns discrete for GPU node role label', () => {
|
||||
expect(getNodeGpuType(makeGpuNode('discrete'))).toBe('discrete');
|
||||
});
|
||||
|
||||
it('returns integrated for iGPU node role label', () => {
|
||||
expect(getNodeGpuType(makeGpuNode('integrated'))).toBe('integrated');
|
||||
});
|
||||
|
||||
it('returns unknown for generic Intel GPU label', () => {
|
||||
expect(getNodeGpuType(makeGpuNode('generic'))).toBe('unknown');
|
||||
});
|
||||
|
||||
it('returns unknown for nodes with no labels', () => {
|
||||
const node = makeNode({ status: { capacity: { [INTEL_GPU_RESOURCE]: '1' } } });
|
||||
expect(getNodeGpuType(node)).toBe('unknown');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getNodeGpuCount
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('getNodeGpuCount', () => {
|
||||
it('returns count from i915 resource', () => {
|
||||
const node = makeNode({
|
||||
status: { capacity: { [INTEL_GPU_RESOURCE]: '4' } },
|
||||
});
|
||||
expect(getNodeGpuCount(node)).toBe(4);
|
||||
});
|
||||
|
||||
it('returns count from xe resource', () => {
|
||||
const node = makeNode({
|
||||
status: { capacity: { [INTEL_GPU_XE_RESOURCE]: '2' } },
|
||||
});
|
||||
expect(getNodeGpuCount(node)).toBe(2);
|
||||
});
|
||||
|
||||
it('returns sum of i915 and xe resources', () => {
|
||||
const node = makeNode({
|
||||
status: {
|
||||
capacity: {
|
||||
[INTEL_GPU_RESOURCE]: '2',
|
||||
[INTEL_GPU_XE_RESOURCE]: '1',
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(getNodeGpuCount(node)).toBe(3);
|
||||
});
|
||||
|
||||
it('returns 0 for nodes with no GPU capacity', () => {
|
||||
const node = makeNode({ status: { capacity: { cpu: '8' } } });
|
||||
expect(getNodeGpuCount(node)).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// isNodeReady
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('isNodeReady', () => {
|
||||
it('returns true when Ready condition is True', () => {
|
||||
const node = makeNode({
|
||||
status: { conditions: [{ type: 'Ready', status: 'True' }] },
|
||||
});
|
||||
expect(isNodeReady(node)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false when Ready condition is False', () => {
|
||||
const node = makeNode({
|
||||
status: { conditions: [{ type: 'Ready', status: 'False' }] },
|
||||
});
|
||||
expect(isNodeReady(node)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false when no conditions', () => {
|
||||
const node = makeNode({ status: {} });
|
||||
expect(isNodeReady(node)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// isGpuRequestingPod
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('isGpuRequestingPod', () => {
|
||||
it('returns true for pods requesting i915 GPU', () => {
|
||||
expect(isGpuRequestingPod(makeGpuPod(INTEL_GPU_RESOURCE))).toBe(true);
|
||||
});
|
||||
|
||||
it('returns true for pods requesting xe GPU', () => {
|
||||
expect(isGpuRequestingPod(makeGpuPod(INTEL_GPU_XE_RESOURCE))).toBe(true);
|
||||
});
|
||||
|
||||
it('returns true for pods requesting millicores', () => {
|
||||
expect(isGpuRequestingPod(makeGpuPod('gpu.intel.com/millicores', '500'))).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for pods with no GPU resources', () => {
|
||||
const pod: IntelGpuPod = {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Pod',
|
||||
metadata: { name: 'no-gpu-pod' },
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
name: 'app',
|
||||
resources: {
|
||||
requests: { cpu: '1', memory: '1Gi' },
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
expect(isGpuRequestingPod(pod)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false for null', () => {
|
||||
expect(isGpuRequestingPod(null)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// filterGpuRequestingPods
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('filterGpuRequestingPods', () => {
|
||||
it('filters out non-GPU pods', () => {
|
||||
const gpuPod = makeGpuPod();
|
||||
const regularPod: IntelGpuPod = {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Pod',
|
||||
metadata: { name: 'regular' },
|
||||
spec: { containers: [{ name: 'app' }] },
|
||||
};
|
||||
const result = filterGpuRequestingPods([gpuPod, regularPod]);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].metadata.name).toBe('gpu-pod');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getPodGpuRequests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('getPodGpuRequests', () => {
|
||||
it('returns GPU resource requests from containers', () => {
|
||||
const pod = makeGpuPod(INTEL_GPU_RESOURCE, '2');
|
||||
const requests = getPodGpuRequests(pod);
|
||||
expect(requests[INTEL_GPU_RESOURCE]).toBe('2');
|
||||
});
|
||||
|
||||
it('returns empty object for non-GPU pods', () => {
|
||||
const pod: IntelGpuPod = {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Pod',
|
||||
metadata: { name: 'regular' },
|
||||
spec: { containers: [{ name: 'app', resources: { requests: { cpu: '1' } } }] },
|
||||
};
|
||||
expect(getPodGpuRequests(pod)).toEqual({});
|
||||
});
|
||||
|
||||
it('sums requests across multiple containers', () => {
|
||||
const pod: IntelGpuPod = {
|
||||
apiVersion: 'v1',
|
||||
kind: 'Pod',
|
||||
metadata: { name: 'multi' },
|
||||
spec: {
|
||||
containers: [
|
||||
{ name: 'a', resources: { requests: { [INTEL_GPU_RESOURCE]: '1' } } },
|
||||
{ name: 'b', resources: { requests: { [INTEL_GPU_RESOURCE]: '2' } } },
|
||||
],
|
||||
},
|
||||
};
|
||||
const requests = getPodGpuRequests(pod);
|
||||
expect(requests[INTEL_GPU_RESOURCE]).toBe('3');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// isKubeList
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('isKubeList', () => {
|
||||
it('returns true for objects with items array', () => {
|
||||
expect(isKubeList({ items: [] })).toBe(true);
|
||||
expect(isKubeList({ items: [1, 2, 3] })).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for objects without items', () => {
|
||||
expect(isKubeList({ data: [] })).toBe(false);
|
||||
expect(isKubeList(null)).toBe(false);
|
||||
expect(isKubeList('string')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// formatAge
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('formatAge', () => {
|
||||
it('returns unknown for undefined', () => {
|
||||
expect(formatAge(undefined)).toBe('unknown');
|
||||
});
|
||||
|
||||
it('formats seconds', () => {
|
||||
const ts = new Date(Date.now() - 30 * 1000).toISOString();
|
||||
expect(formatAge(ts)).toBe('30s');
|
||||
});
|
||||
|
||||
it('formats minutes', () => {
|
||||
const ts = new Date(Date.now() - 5 * 60 * 1000).toISOString();
|
||||
expect(formatAge(ts)).toBe('5m');
|
||||
});
|
||||
|
||||
it('formats hours', () => {
|
||||
const ts = new Date(Date.now() - 3 * 60 * 60 * 1000).toISOString();
|
||||
expect(formatAge(ts)).toBe('3h');
|
||||
});
|
||||
|
||||
it('formats days', () => {
|
||||
const ts = new Date(Date.now() - 2 * 24 * 60 * 60 * 1000).toISOString();
|
||||
expect(formatAge(ts)).toBe('2d');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// formatGpuResourceName
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('formatGpuResourceName', () => {
|
||||
it('formats i915 resource', () => {
|
||||
expect(formatGpuResourceName('gpu.intel.com/i915')).toBe('GPU (i915)');
|
||||
});
|
||||
|
||||
it('formats xe resource', () => {
|
||||
expect(formatGpuResourceName('gpu.intel.com/xe')).toBe('GPU (Xe)');
|
||||
});
|
||||
|
||||
it('formats millicores resource', () => {
|
||||
expect(formatGpuResourceName('gpu.intel.com/millicores')).toBe('GPU Millicores');
|
||||
});
|
||||
|
||||
it('returns raw suffix for unknown resources', () => {
|
||||
expect(formatGpuResourceName('gpu.intel.com/custom')).toBe('custom');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// formatGpuType
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('formatGpuType', () => {
|
||||
it('formats discrete', () => {
|
||||
expect(formatGpuType('discrete')).toBe('Discrete');
|
||||
});
|
||||
|
||||
it('formats integrated', () => {
|
||||
expect(formatGpuType('integrated')).toBe('Integrated');
|
||||
});
|
||||
|
||||
it('formats unknown', () => {
|
||||
expect(formatGpuType('unknown')).toBe('Unknown');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// pluginStatusToStatus
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('pluginStatusToStatus', () => {
|
||||
function makePlugin(
|
||||
desired: number,
|
||||
ready: number,
|
||||
unavailable = 0
|
||||
): GpuDevicePlugin {
|
||||
return {
|
||||
apiVersion: 'deviceplugin.intel.com/v1',
|
||||
kind: 'GpuDevicePlugin',
|
||||
metadata: { name: 'test-plugin' },
|
||||
spec: {},
|
||||
status: {
|
||||
desiredNumberScheduled: desired,
|
||||
numberReady: ready,
|
||||
numberUnavailable: unavailable,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
it('returns success when all nodes ready', () => {
|
||||
expect(pluginStatusToStatus(makePlugin(3, 3))).toBe('success');
|
||||
});
|
||||
|
||||
it('returns warning when desired is 0', () => {
|
||||
expect(pluginStatusToStatus(makePlugin(0, 0))).toBe('warning');
|
||||
});
|
||||
|
||||
it('returns warning when some nodes unavailable', () => {
|
||||
expect(pluginStatusToStatus(makePlugin(3, 2, 1))).toBe('warning');
|
||||
});
|
||||
|
||||
it('returns error when ready < desired with no unavailable', () => {
|
||||
expect(pluginStatusToStatus(makePlugin(3, 1))).toBe('error');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// pluginStatusText
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('pluginStatusText', () => {
|
||||
it('shows ready/desired counts', () => {
|
||||
const plugin: GpuDevicePlugin = {
|
||||
apiVersion: 'deviceplugin.intel.com/v1',
|
||||
kind: 'GpuDevicePlugin',
|
||||
metadata: { name: 'p' },
|
||||
spec: {},
|
||||
status: { desiredNumberScheduled: 3, numberReady: 2 },
|
||||
};
|
||||
expect(pluginStatusText(plugin)).toBe('2/3 ready');
|
||||
});
|
||||
|
||||
it('shows no nodes scheduled when desired is 0', () => {
|
||||
const plugin: GpuDevicePlugin = {
|
||||
apiVersion: 'deviceplugin.intel.com/v1',
|
||||
kind: 'GpuDevicePlugin',
|
||||
metadata: { name: 'p' },
|
||||
spec: {},
|
||||
status: { desiredNumberScheduled: 0, numberReady: 0 },
|
||||
};
|
||||
expect(pluginStatusText(plugin)).toBe('No nodes scheduled');
|
||||
});
|
||||
});
|
||||
+393
@@ -0,0 +1,393 @@
|
||||
/**
|
||||
* Kubernetes type definitions and helper functions for Intel GPU device plugin resources.
|
||||
*
|
||||
* All K8s resource types are typed at the fields we actually use.
|
||||
* External data from the API is validated at the boundary before use.
|
||||
*/
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Intel GPU device plugin constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** API group for Intel device plugin CRDs */
|
||||
export const INTEL_DEVICE_PLUGIN_API_GROUP = 'deviceplugin.intel.com';
|
||||
export const INTEL_DEVICE_PLUGIN_API_VERSION = 'v1';
|
||||
|
||||
/** Kubernetes extended resource names for Intel GPU */
|
||||
export const INTEL_GPU_RESOURCE = 'gpu.intel.com/i915' as const;
|
||||
export const INTEL_GPU_XE_RESOURCE = 'gpu.intel.com/xe' as const;
|
||||
export const INTEL_GPU_MILLICORES_RESOURCE = 'gpu.intel.com/millicores' as const;
|
||||
export const INTEL_GPU_MEMORY_RESOURCE = 'gpu.intel.com/memory.max' as const;
|
||||
|
||||
/** All Intel GPU resource names (prefix match) */
|
||||
export const INTEL_GPU_RESOURCE_PREFIX = 'gpu.intel.com/';
|
||||
|
||||
/** Node labels set by Intel Node Feature Discovery */
|
||||
export const INTEL_GPU_NODE_LABEL = 'intel.feature.node.kubernetes.io/gpu';
|
||||
export const INTEL_DISCRETE_GPU_NODE_ROLE = 'node-role.kubernetes.io/gpu';
|
||||
export const INTEL_INTEGRATED_GPU_NODE_ROLE = 'node-role.kubernetes.io/igpu';
|
||||
|
||||
/** Label selector for Intel GPU device plugin DaemonSet pods */
|
||||
export const INTEL_GPU_PLUGIN_LABEL_SELECTOR =
|
||||
'app=intel-gpu-plugin';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Generic Kubernetes object base shapes
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface KubeObjectMeta {
|
||||
name: string;
|
||||
namespace?: string;
|
||||
creationTimestamp?: string;
|
||||
labels?: Record<string, string>;
|
||||
annotations?: Record<string, string>;
|
||||
uid?: string;
|
||||
}
|
||||
|
||||
export interface KubeObject {
|
||||
apiVersion?: string;
|
||||
kind?: string;
|
||||
metadata: KubeObjectMeta;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// GpuDevicePlugin CRD (deviceplugin.intel.com/v1)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface GpuDevicePluginSpec {
|
||||
image?: string;
|
||||
sharedDevNum?: number;
|
||||
enableMonitoring?: boolean;
|
||||
preferredAllocationPolicy?: string;
|
||||
nodeSelector?: Record<string, string>;
|
||||
resourceManager?: boolean;
|
||||
logLevel?: number;
|
||||
}
|
||||
|
||||
export interface GpuDevicePluginStatus {
|
||||
/** Number of nodes where the plugin daemonset is scheduled */
|
||||
desiredNumberScheduled?: number;
|
||||
/** Number of nodes where the plugin daemonset is running and ready */
|
||||
numberReady?: number;
|
||||
/** Number of nodes where the plugin daemonset pod is unavailable */
|
||||
numberUnavailable?: number;
|
||||
/** Number of nodes where the plugin daemonset is available */
|
||||
numberAvailable?: number;
|
||||
}
|
||||
|
||||
export interface GpuDevicePlugin extends KubeObject {
|
||||
spec: GpuDevicePluginSpec;
|
||||
status?: GpuDevicePluginStatus;
|
||||
}
|
||||
|
||||
export function isGpuDevicePlugin(value: unknown): value is GpuDevicePlugin {
|
||||
if (!value || typeof value !== 'object') return false;
|
||||
const obj = value as Record<string, unknown>;
|
||||
return obj['kind'] === 'GpuDevicePlugin';
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Node (with GPU resource fields)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface NodeResources {
|
||||
[key: string]: string | undefined;
|
||||
}
|
||||
|
||||
export interface NodeStatus {
|
||||
capacity?: NodeResources;
|
||||
allocatable?: NodeResources;
|
||||
conditions?: Array<{
|
||||
type: string;
|
||||
status: string;
|
||||
lastHeartbeatTime?: string;
|
||||
reason?: string;
|
||||
message?: string;
|
||||
}>;
|
||||
nodeInfo?: {
|
||||
kernelVersion?: string;
|
||||
osImage?: string;
|
||||
architecture?: string;
|
||||
kubeletVersion?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface NodeSpec {
|
||||
taints?: Array<{ key: string; effect: string; value?: string }>;
|
||||
unschedulable?: boolean;
|
||||
}
|
||||
|
||||
export interface IntelGpuNode extends KubeObject {
|
||||
spec?: NodeSpec;
|
||||
status?: NodeStatus;
|
||||
}
|
||||
|
||||
/** Returns true if the node has any Intel GPU resources in its capacity */
|
||||
export function isIntelGpuNode(node: unknown): node is IntelGpuNode {
|
||||
if (!node || typeof node !== 'object') return false;
|
||||
const obj = node as Record<string, unknown>;
|
||||
const meta = obj['metadata'] as Record<string, unknown> | undefined;
|
||||
const labels = meta?.['labels'] as Record<string, string> | undefined;
|
||||
const status = obj['status'] as Record<string, unknown> | undefined;
|
||||
const capacity = status?.['capacity'] as Record<string, string> | undefined;
|
||||
|
||||
// Check node labels (added by Intel Node Feature Discovery)
|
||||
if (labels) {
|
||||
if (
|
||||
labels[INTEL_GPU_NODE_LABEL] === 'true' ||
|
||||
labels[INTEL_DISCRETE_GPU_NODE_ROLE] === 'true' ||
|
||||
labels[INTEL_INTEGRATED_GPU_NODE_ROLE] === 'true'
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check node capacity for Intel GPU resources
|
||||
if (capacity) {
|
||||
for (const key of Object.keys(capacity)) {
|
||||
if (key.startsWith(INTEL_GPU_RESOURCE_PREFIX)) return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
export function filterIntelGpuNodes(items: unknown[]): IntelGpuNode[] {
|
||||
return items.filter(isIntelGpuNode);
|
||||
}
|
||||
|
||||
/** Get all Intel GPU resource entries from a node's capacity/allocatable */
|
||||
export function getGpuResources(resources: NodeResources | undefined): Record<string, string> {
|
||||
if (!resources) return {};
|
||||
const gpuResources: Record<string, string> = {};
|
||||
for (const [key, value] of Object.entries(resources)) {
|
||||
if (key.startsWith(INTEL_GPU_RESOURCE_PREFIX) && value !== undefined) {
|
||||
gpuResources[key] = value;
|
||||
}
|
||||
}
|
||||
return gpuResources;
|
||||
}
|
||||
|
||||
/** Get total GPU count from node capacity */
|
||||
export function getNodeGpuCount(node: IntelGpuNode): number {
|
||||
const capacity = node.status?.capacity ?? {};
|
||||
let count = 0;
|
||||
for (const [key, value] of Object.entries(capacity)) {
|
||||
if ((key === INTEL_GPU_RESOURCE || key === INTEL_GPU_XE_RESOURCE) && value) {
|
||||
count += parseInt(value, 10) || 0;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/** Determine GPU type from node labels */
|
||||
export type GpuType = 'discrete' | 'integrated' | 'unknown';
|
||||
|
||||
export function getNodeGpuType(node: IntelGpuNode): GpuType {
|
||||
const labels = node.metadata.labels ?? {};
|
||||
if (labels[INTEL_DISCRETE_GPU_NODE_ROLE] === 'true') return 'discrete';
|
||||
if (labels[INTEL_INTEGRATED_GPU_NODE_ROLE] === 'true') return 'integrated';
|
||||
// Fallback: check for generic Intel GPU label
|
||||
if (labels[INTEL_GPU_NODE_LABEL] === 'true') return 'unknown';
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
export function formatGpuType(type: GpuType): string {
|
||||
switch (type) {
|
||||
case 'discrete': return 'Discrete';
|
||||
case 'integrated': return 'Integrated';
|
||||
default: return 'Unknown';
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pod (with GPU resource requests)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ResourceRequirements {
|
||||
requests?: Record<string, string>;
|
||||
limits?: Record<string, string>;
|
||||
}
|
||||
|
||||
export interface ContainerSpec {
|
||||
name: string;
|
||||
image?: string;
|
||||
resources?: ResourceRequirements;
|
||||
}
|
||||
|
||||
export interface ContainerStatus {
|
||||
name: string;
|
||||
ready: boolean;
|
||||
restartCount: number;
|
||||
image?: string;
|
||||
state?: {
|
||||
running?: { startedAt?: string };
|
||||
waiting?: { reason?: string; message?: string };
|
||||
terminated?: { exitCode?: number; reason?: string };
|
||||
};
|
||||
}
|
||||
|
||||
export interface PodSpec {
|
||||
nodeName?: string;
|
||||
containers?: ContainerSpec[];
|
||||
initContainers?: ContainerSpec[];
|
||||
}
|
||||
|
||||
export interface PodStatus {
|
||||
phase?: string;
|
||||
conditions?: Array<{ type: string; status: string }>;
|
||||
containerStatuses?: ContainerStatus[];
|
||||
}
|
||||
|
||||
export interface IntelGpuPod extends KubeObject {
|
||||
spec?: PodSpec;
|
||||
status?: PodStatus;
|
||||
}
|
||||
|
||||
/** Returns true if any container in the pod requests Intel GPU resources */
|
||||
export function isGpuRequestingPod(pod: unknown): pod is IntelGpuPod {
|
||||
if (!pod || typeof pod !== 'object') return false;
|
||||
const obj = pod as Record<string, unknown>;
|
||||
const spec = obj['spec'] as Record<string, unknown> | undefined;
|
||||
const containers = (spec?.['containers'] ?? []) as ContainerSpec[];
|
||||
const initContainers = (spec?.['initContainers'] ?? []) as ContainerSpec[];
|
||||
|
||||
return [...containers, ...initContainers].some(c => {
|
||||
const requests = c.resources?.requests ?? {};
|
||||
const limits = c.resources?.limits ?? {};
|
||||
return Object.keys({ ...requests, ...limits }).some(k =>
|
||||
k.startsWith(INTEL_GPU_RESOURCE_PREFIX)
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
export function filterGpuRequestingPods(items: unknown[]): IntelGpuPod[] {
|
||||
return items.filter(isGpuRequestingPod);
|
||||
}
|
||||
|
||||
/** Returns true if any container in the pod requests Intel GPU resources (for plugin pods) */
|
||||
export function isIntelGpuPluginPod(pod: unknown): pod is IntelGpuPod {
|
||||
if (!pod || typeof pod !== 'object') return false;
|
||||
const obj = pod as Record<string, unknown>;
|
||||
const meta = obj['metadata'] as Record<string, unknown> | undefined;
|
||||
const labels = meta?.['labels'] as Record<string, string> | undefined;
|
||||
if (!labels) return false;
|
||||
return labels['app'] === 'intel-gpu-plugin' ||
|
||||
(labels['app.kubernetes.io/name'] === 'intel-gpu-plugin') ||
|
||||
(labels['component'] === 'intel-gpu-plugin');
|
||||
}
|
||||
|
||||
export function filterIntelGpuPluginPods(items: unknown[]): IntelGpuPod[] {
|
||||
return items.filter(isIntelGpuPluginPod);
|
||||
}
|
||||
|
||||
/** Get total GPU requests from a pod's containers */
|
||||
export function getPodGpuRequests(pod: IntelGpuPod): Record<string, string> {
|
||||
const totals: Record<string, number> = {};
|
||||
const allContainers = [
|
||||
...(pod.spec?.containers ?? []),
|
||||
...(pod.spec?.initContainers ?? []),
|
||||
];
|
||||
for (const c of allContainers) {
|
||||
const requests = c.resources?.requests ?? {};
|
||||
for (const [key, value] of Object.entries(requests)) {
|
||||
if (key.startsWith(INTEL_GPU_RESOURCE_PREFIX) && value) {
|
||||
totals[key] = (totals[key] ?? 0) + (parseInt(value, 10) || 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
return Object.fromEntries(Object.entries(totals).map(([k, v]) => [k, String(v)]));
|
||||
}
|
||||
|
||||
export function isPodReady(pod: IntelGpuPod): boolean {
|
||||
return (
|
||||
pod.status?.conditions?.some(c => c.type === 'Ready' && c.status === 'True') ?? false
|
||||
);
|
||||
}
|
||||
|
||||
export function getPodRestarts(pod: IntelGpuPod): number {
|
||||
return (
|
||||
pod.status?.containerStatuses?.reduce((sum, c) => sum + c.restartCount, 0) ?? 0
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// K8s API list response envelope
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface KubeList<T> {
|
||||
items: T[];
|
||||
metadata?: { resourceVersion?: string };
|
||||
}
|
||||
|
||||
export function isKubeList(value: unknown): value is KubeList<unknown> {
|
||||
if (!value || typeof value !== 'object') return false;
|
||||
return Array.isArray((value as Record<string, unknown>)['items']);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Node condition helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function isNodeReady(node: IntelGpuNode): boolean {
|
||||
return (
|
||||
node.status?.conditions?.some(c => c.type === 'Ready' && c.status === 'True') ?? false
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Utility: human-readable age
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function formatAge(timestamp: string | undefined): string {
|
||||
if (!timestamp) return 'unknown';
|
||||
const diffMs = Date.now() - new Date(timestamp).getTime();
|
||||
const secs = Math.floor(diffMs / 1000);
|
||||
if (secs < 60) return `${secs}s`;
|
||||
const mins = Math.floor(secs / 60);
|
||||
if (mins < 60) return `${mins}m`;
|
||||
const hours = Math.floor(mins / 60);
|
||||
if (hours < 24) return `${hours}h`;
|
||||
const days = Math.floor(hours / 24);
|
||||
return `${days}d`;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Utility: GPU resource display name
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function formatGpuResourceName(resourceKey: string): string {
|
||||
const name = resourceKey.replace(INTEL_GPU_RESOURCE_PREFIX, '');
|
||||
const map: Record<string, string> = {
|
||||
'i915': 'GPU (i915)',
|
||||
'xe': 'GPU (Xe)',
|
||||
'millicores': 'GPU Millicores',
|
||||
'memory.max': 'GPU Memory (max)',
|
||||
'tiles': 'GPU Tiles',
|
||||
};
|
||||
return map[name] ?? name;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Status helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function pluginStatusToStatus(
|
||||
plugin: GpuDevicePlugin
|
||||
): 'success' | 'warning' | 'error' {
|
||||
const desired = plugin.status?.desiredNumberScheduled ?? 0;
|
||||
const ready = plugin.status?.numberReady ?? 0;
|
||||
const unavailable = plugin.status?.numberUnavailable ?? 0;
|
||||
|
||||
if (desired === 0) return 'warning';
|
||||
if (unavailable > 0) return 'warning';
|
||||
if (ready === desired) return 'success';
|
||||
return 'error';
|
||||
}
|
||||
|
||||
export function pluginStatusText(plugin: GpuDevicePlugin): string {
|
||||
const desired = plugin.status?.desiredNumberScheduled ?? 0;
|
||||
const ready = plugin.status?.numberReady ?? 0;
|
||||
if (desired === 0) return 'No nodes scheduled';
|
||||
return `${ready}/${desired} ready`;
|
||||
}
|
||||
Reference in New Issue
Block a user