diff --git a/.eslintrc.js b/.eslintrc.js index e37cc11..8c10974 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -1,3 +1,8 @@ module.exports = { extends: ['@headlamp-k8s/eslint-config'], + rules: { + // Prettier handles indentation; the shared config's indent rule + // conflicts with Prettier's JSX ternary formatting. + indent: 'off', + }, }; diff --git a/CLAUDE.md b/CLAUDE.md index ea03f80..969d53e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -35,7 +35,7 @@ src/ ├── index.tsx # Plugin entry: registerRoute, registerSidebarEntry, registerDetailsViewSection, registerResourceTableColumnsProcessor ├── api/ │ ├── k8s.ts # Types + helpers (GpuDevicePlugin CRD, Nodes, Pods, type guards, formatters) -│ ├── k8s.test.ts # Tests for k8s helpers (70+ test cases) +│ ├── k8s.test.ts # Tests for k8s helpers (48 test cases) │ ├── metrics.ts # Prometheus GPU power metrics (node-exporter i915 hwmon) │ └── IntelGpuDataContext.tsx # Shared React context provider with data fetching └── components/ @@ -44,7 +44,7 @@ src/ ├── NodesPage.tsx # Per-node GPU type, device count, allocation, workload pods ├── PodsPage.tsx # All pods requesting Intel GPU resources with per-container detail ├── MetricsPage.tsx # Real-time GPU power metrics from Prometheus - ├── NodeDetailSection.tsx # Injected into native Node detail page (capacity, utilization, pods) + ├── NodeDetailSection.tsx # Injected into native Node detail page (capacity, utilization, pods) ├── PodDetailSection.tsx # Injected into native Pod detail page (GPU requests per container) └── integrations/ └── NodeColumns.tsx # GPU Type and GPU Devices columns for native Nodes table diff --git a/artifacthub-pkg.yml b/artifacthub-pkg.yml index 817badd..a54bf2b 100644 --- a/artifacthub-pkg.yml +++ b/artifacthub-pkg.yml @@ -1,4 +1,4 @@ -version: "0.4.0" +version: "0.4.1" name: intel-gpu displayName: Intel GPU description: >- @@ -8,14 +8,14 @@ description: >- sections into native Node and Pod detail pages. Supports discrete (i915), Xe, and integrated GPU nodes with graceful degradation when the device plugin operator is not installed. Includes a Metrics page showing real-time - engine utilization, GPU frequency, VRAM usage, and energy from the device - plugin's Prometheus endpoint. + GPU power draw and TDP from node-exporter i915 hwmon metrics (discrete GPU + nodes only). createdAt: "2026-02-18T00:00:00Z" license: Apache-2.0 category: monitoring-logging homeURL: https://github.com/privilegedescalation/headlamp-intel-gpu-plugin -appVersion: "0.3.0" +appVersion: "0.4.0" keywords: - headlamp @@ -45,33 +45,23 @@ links: url: https://intel.github.io/intel-device-plugins-for-kubernetes/ changes: - - kind: added - description: "Metrics page: document which metrics require what infrastructure (power via hwmon works out of the box; frequency and utilization need custom exporters)" - - kind: added - description: "Metrics page: real-time GPU power draw (W) and TDP via node-exporter i915 hwmon metrics in kube-prometheus-stack" + - kind: fixed + description: "Remove unsafe `as any` casts in NodeDetailSection" + - kind: fixed + description: "Fix MetricsPage fetch cancellation safety (prevent setState on unmounted component)" + - kind: fixed + description: "Fix typo gpuPluinPods → gpuPluginPods in data context" - kind: changed - description: "Sidebar label changed to intel-gpu" + description: "Move extractJsonData utility to module scope to avoid recreation on every render" - kind: removed - description: "Removed app bar health badge" - - kind: added - description: "Overview dashboard: plugin health, GPU node summary, allocation bar, active GPU pods" - - kind: added - description: "Device Plugins page: GpuDevicePlugin CRD instances with spec/status and daemon pods" - - kind: added - description: "GPU Nodes page: per-node GPU type, device count, allocation, workload pods" - - kind: added - description: "GPU Pods page: all pods requesting Intel GPU resources with per-container detail" - - kind: added - description: "Node detail injection: Intel GPU section on native Node detail pages (capacity, allocatable, utilization, active pods)" - - kind: added - description: "Pod detail injection: GPU resource requests/limits per container on native Pod detail pages" - - kind: added - description: "Nodes table: GPU Type and GPU Devices columns injected into native Nodes table" - - kind: added - description: "App bar health badge: hidden when no Intel GPU plugin detected" + description: "Remove dead AppBarGpuBadge component" + - kind: fixed + description: "Fix appVersion mismatch and inaccurate metrics description in Artifact Hub metadata" + - kind: fixed + description: "Resolve ESLint/Prettier indent conflict by disabling ESLint indent rule (Prettier is formatting authority)" annotations: - headlamp/plugin/archive-url: "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/download/v0.4.0/intel-gpu-0.4.0.tar.gz" - headlamp/plugin/archive-checksum: sha256:f529794d7995b35b954fa32c10874fa8367f6f5cd8040600e47a3013373219df + headlamp/plugin/archive-url: "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/download/v0.4.1/intel-gpu-0.4.1.tar.gz" + headlamp/plugin/archive-checksum: "" headlamp/plugin/version-compat: ">=0.20.0" headlamp/plugin/distro-compat: "in-cluster,web,app" diff --git a/package.json b/package.json index 037c0d2..ee90937 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "intel-gpu", - "version": "0.4.0", + "version": "0.4.1", "description": "Headlamp plugin for Intel GPU device plugin visibility and monitoring", "repository": { "type": "git", diff --git a/src/api/IntelGpuDataContext.tsx b/src/api/IntelGpuDataContext.tsx index c947f91..bf724a9 100644 --- a/src/api/IntelGpuDataContext.tsx +++ b/src/api/IntelGpuDataContext.tsx @@ -65,6 +65,18 @@ export function useIntelGpuContext(): IntelGpuContextValue { return ctx; } +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** Extract raw Kubernetes JSON from Headlamp KubeObject wrappers. */ +const extractJsonData = (items: unknown[]): unknown[] => + items.map(item => + item && typeof item === 'object' && 'jsonData' in item + ? (item as { jsonData: unknown }).jsonData + : item + ); + // --------------------------------------------------------------------------- // Provider // --------------------------------------------------------------------------- @@ -129,8 +141,8 @@ export function IntelGpuDataProvider({ children }: { children: React.ReactNode } try { const list = await ApiProxy.request(url); if (!cancelled && isKubeList(list)) { - const gpuPluinPods = filterIntelGpuPluginPods(list.items); - foundPluginPods.push(...gpuPluinPods); + const gpuPluginPods = filterIntelGpuPluginPods(list.items); + foundPluginPods.push(...gpuPluginPods); } } catch { // Silently ignore — some selectors may not match @@ -170,13 +182,6 @@ export function IntelGpuDataProvider({ children }: { children: React.ReactNode } // type helpers work correctly. // --------------------------------------------------------------------------- - const extractJsonData = (items: unknown[]): unknown[] => - items.map(item => - item && typeof item === 'object' && 'jsonData' in item - ? (item as { jsonData: unknown }).jsonData - : item - ); - const gpuNodes = useMemo(() => { if (!allNodes) return []; return filterIntelGpuNodes(extractJsonData(allNodes as unknown[])); diff --git a/src/components/AppBarGpuBadge.tsx b/src/components/AppBarGpuBadge.tsx deleted file mode 100644 index 74e9975..0000000 --- a/src/components/AppBarGpuBadge.tsx +++ /dev/null @@ -1,46 +0,0 @@ -/** - * AppBarGpuBadge — compact Intel GPU health indicator in the Headlamp app bar. - * - * Shows a status chip in the top navigation bar summarising GPU plugin health. - * Hides itself when no Intel GPU plugin is detected. - */ - -import { StatusLabel } from '@kinvolk/headlamp-plugin/lib/CommonComponents'; -import React from 'react'; -import { useIntelGpuContext } from '../api/IntelGpuDataContext'; - -export default function AppBarGpuBadge() { - const { pluginInstalled, gpuNodes, devicePlugins, loading } = useIntelGpuContext(); - - // Hide when loading or no plugin present - if (loading || !pluginInstalled) return null; - - const hasUnhealthyPlugin = devicePlugins.some(p => { - const desired = p.status?.desiredNumberScheduled ?? 0; - const ready = p.status?.numberReady ?? 0; - const unavailable = p.status?.numberUnavailable ?? 0; - return (desired > 0 && ready < desired) || unavailable > 0; - }); - - const status = hasUnhealthyPlugin ? 'warning' : 'success'; - const nodeCount = gpuNodes.length; - - return ( -
- - - Intel GPU{nodeCount > 0 ? ` · ${nodeCount}N` : ''} - - -
- ); -} diff --git a/src/components/MetricsPage.tsx b/src/components/MetricsPage.tsx index 65ef4bf..1493590 100644 --- a/src/components/MetricsPage.tsx +++ b/src/components/MetricsPage.tsx @@ -194,30 +194,41 @@ export default function MetricsPage() { const [metrics, setMetrics] = useState(null); const [fetchError, setFetchError] = useState(null); const [fetching, setFetching] = useState(false); + const [fetchSeq, setFetchSeq] = useState(0); - const doFetch = useCallback(async () => { - setFetching(true); - setFetchError(null); - try { - const result = await fetchGpuMetrics(); - setMetrics(result); - if (!result) { - setFetchError( - 'Could not reach Prometheus. Ensure kube-prometheus-stack is installed in the monitoring namespace.' - ); - } - } catch (e: unknown) { - setFetchError(e instanceof Error ? e.message : String(e)); - } finally { - setFetching(false); - } + const doFetch = useCallback(() => { + setFetchSeq(s => s + 1); }, []); useEffect(() => { - if (!ctxLoading) { - void doFetch(); - } - }, [ctxLoading, doFetch]); + if (ctxLoading) return; + + let cancelled = false; + setFetching(true); + setFetchError(null); + + fetchGpuMetrics() + .then(result => { + if (cancelled) return; + setMetrics(result); + if (!result) { + setFetchError( + 'Could not reach Prometheus. Ensure kube-prometheus-stack is installed in the monitoring namespace.' + ); + } + }) + .catch((e: unknown) => { + if (cancelled) return; + setFetchError(e instanceof Error ? e.message : String(e)); + }) + .finally(() => { + if (!cancelled) setFetching(false); + }); + + return () => { + cancelled = true; + }; + }, [ctxLoading, fetchSeq]); if (ctxLoading) { return ; diff --git a/src/components/NodeDetailSection.tsx b/src/components/NodeDetailSection.tsx index ec14034..a041995 100644 --- a/src/components/NodeDetailSection.tsx +++ b/src/components/NodeDetailSection.tsx @@ -52,11 +52,11 @@ export default function NodeDetailSection({ resource }: NodeDetailSectionProps) metadata: { name: string; labels?: Record }; }; - const nodeName = (node as { metadata: { name: string } }).metadata.name; - const capacity = getGpuResources((node as any).status?.capacity); - const allocatable = getGpuResources((node as any).status?.allocatable); + const nodeName = node.metadata.name; + const capacity = getGpuResources(node.status?.capacity); + const allocatable = getGpuResources(node.status?.allocatable); - const gpuType = getNodeGpuType(node as any); + const gpuType = getNodeGpuType(node); // Find GPU pods scheduled on this node const podsOnNode = loading ? [] : gpuPods.filter(p => p.spec?.nodeName === nodeName);