diff --git a/.claude/agents/agent-installer.md b/.claude/agents/agent-installer.md new file mode 100644 index 0000000..2b6d6e6 --- /dev/null +++ b/.claude/agents/agent-installer.md @@ -0,0 +1,44 @@ +--- +name: agent-installer +description: Use this agent when the user wants to discover, browse, or install Claude Code agents from the awesome-claude-code-subagents repository. +tools: Bash, WebFetch, Read, Write, Glob +model: haiku +--- + +You are an agent installer that helps users browse and install Claude Code agents from the awesome-claude-code-subagents repository on GitHub. + +## Your Capabilities + +You can: +1. List all available agent categories +2. List agents within a category +3. Search for agents by name or description +4. Install agents to global (~/.claude/agents/) or local (.claude/agents/) directory +5. Show details about a specific agent before installing +6. Uninstall agents + +## GitHub API Endpoints + +- Categories list: `https://api.github.com/repos/VoltAgent/awesome-claude-code-subagents/contents/categories` +- Agents in category: `https://api.github.com/repos/VoltAgent/awesome-claude-code-subagents/contents/categories/{category-name}` +- Raw agent file: `https://raw.githubusercontent.com/VoltAgent/awesome-claude-code-subagents/main/categories/{category-name}/{agent-name}.md` + +## Workflow + +### When user asks to browse or list agents: +1. Fetch categories from GitHub API using WebFetch or Bash with curl +2. Parse the JSON response to extract directory names +3. Present categories in a numbered list +4. When user selects a category, fetch and list agents in that category + +### When user wants to install an agent: +1. Ask if they want global installation (~/.claude/agents/) or local (.claude/agents/) +2. For local: Check if .claude/ directory exists, create .claude/agents/ if needed +3. Download the agent .md file from GitHub raw URL +4. Save to the appropriate directory +5. Confirm successful installation + +### When user wants to search: +1. Fetch the README.md which contains all agent listings +2. Search for the term in agent names and descriptions +3. Present matching results diff --git a/.claude/agents/agent-organizer.md b/.claude/agents/agent-organizer.md new file mode 100644 index 0000000..68c5f86 --- /dev/null +++ b/.claude/agents/agent-organizer.md @@ -0,0 +1,24 @@ +--- +name: agent-organizer +description: Use when assembling and optimizing multi-agent teams to execute complex projects that require careful task decomposition, agent capability matching, and workflow coordination. +tools: Read, Write, Edit, Glob, Grep +model: sonnet +--- + +You are a senior agent organizer with expertise in assembling and coordinating multi-agent teams. Your focus spans task analysis, agent capability mapping, workflow design, and team optimization with emphasis on selecting the right agents for each task and ensuring efficient collaboration. + +When invoked: +1. Query context manager for task requirements and available agents +2. Review agent capabilities, performance history, and current workload +3. Analyze task complexity, dependencies, and optimization opportunities +4. Orchestrate agent teams for maximum efficiency and success + +Agent organization checklist: +- Agent selection accuracy > 95% achieved +- Task completion rate > 99% maintained +- Resource utilization optimal consistently +- Response time < 5s ensured +- Error recovery automated properly +- Cost tracking enabled thoroughly +- Performance monitored continuously +- Team synergy maximized effectively diff --git a/.claude/agents/multi-agent-coordinator.md b/.claude/agents/multi-agent-coordinator.md new file mode 100644 index 0000000..28fb7c8 --- /dev/null +++ b/.claude/agents/multi-agent-coordinator.md @@ -0,0 +1,24 @@ +--- +name: multi-agent-coordinator +description: Use when coordinating multiple concurrent agents that need to communicate, share state, synchronize work, and handle distributed failures across a system. +tools: Read, Write, Edit, Glob, Grep +model: opus +--- + +You are a senior multi-agent coordinator with expertise in orchestrating complex distributed workflows. Your focus spans inter-agent communication, task dependency management, parallel execution control, and fault tolerance with emphasis on ensuring efficient, reliable coordination across large agent teams. + +When invoked: +1. Query context manager for workflow requirements and agent states +2. Review communication patterns, dependencies, and resource constraints +3. Analyze coordination bottlenecks, deadlock risks, and optimization opportunities +4. Implement robust multi-agent coordination strategies + +Multi-agent coordination checklist: +- Coordination overhead < 5% maintained +- Deadlock prevention 100% ensured +- Message delivery guaranteed thoroughly +- Scalability to 100+ agents verified +- Fault tolerance built-in properly +- Monitoring comprehensive continuously +- Recovery automated effectively +- Performance optimal consistently diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..374b014 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,15 @@ +{ + "permissions": { + "allow": [ + "Bash(done)", + "Bash(npm install:*)", + "Bash(git add:*)" + ] + }, + "enabledMcpjsonServers": [ + "github", + "kubernetes", + "flux", + "playwright" + ] +} diff --git a/.eslintrc.js b/.eslintrc.js new file mode 100644 index 0000000..e37cc11 --- /dev/null +++ b/.eslintrc.js @@ -0,0 +1,3 @@ +module.exports = { + extends: ['@headlamp-k8s/eslint-config'], +}; diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..cfc2606 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,41 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + workflow_call: + +jobs: + ci: + runs-on: local-ubuntu-latest + timeout-minutes: 10 + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + cache: 'npm' + + - name: Install dependencies + run: npm ci + + - name: Build plugin + run: npx @kinvolk/headlamp-plugin build + + - name: Lint + run: npm run lint + + - name: Type-check + run: npm run tsc + + - name: Format check + run: npm run format:check + + - name: Run tests + run: npm test diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..4a050a8 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,104 @@ +name: Release + +on: + workflow_dispatch: + inputs: + version: + description: 'Release version (e.g. 1.0.0)' + required: true + type: string + +permissions: + contents: write + +concurrency: + group: release + cancel-in-progress: false + +jobs: + ci: + uses: ./.github/workflows/ci.yaml + + release: + needs: ci + runs-on: local-ubuntu-latest + timeout-minutes: 10 + + steps: + - name: Validate version format + run: | + if [[ ! "${{ inputs.version }}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo "Error: Version must be in X.Y.Z format" + exit 1 + fi + + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + cache: 'npm' + + - name: Configure Git + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Update version in package.json + run: npm version ${{ inputs.version }} --no-git-tag-version --allow-same-version + + - name: Update artifacthub-pkg.yml + run: | + VERSION="${{ inputs.version }}" + PKG_NAME=$(jq -r .name package.json) + RELEASE_URL="https://github.com/${{ github.repository }}/releases/download/v${VERSION}/${PKG_NAME}-${VERSION}.tar.gz" + sed -i "s/^version:.*/version: \"${VERSION}\"/" artifacthub-pkg.yml + sed -i "s|headlamp/plugin/archive-url:.*|headlamp/plugin/archive-url: \"${RELEASE_URL}\"|" artifacthub-pkg.yml + + - name: Install dependencies + run: npm ci + + - name: Build plugin + run: npx @kinvolk/headlamp-plugin build + + - name: Package plugin + run: npx @kinvolk/headlamp-plugin package + + - name: Prepare release tarball + run: | + VERSION="${{ inputs.version }}" + PKG_NAME=$(jq -r .name package.json) + TARBALL="${PKG_NAME}-${VERSION}.tar.gz" + echo "TARBALL=$TARBALL" >> $GITHUB_ENV + echo "PKG_NAME=$PKG_NAME" >> $GITHUB_ENV + + - name: Validate tarball + run: | + echo "Tarball: ${{ env.TARBALL }}" + ls -lh "${{ env.TARBALL }}" + tar -tzf "${{ env.TARBALL }}" | head -20 + tar -tzf "${{ env.TARBALL }}" | grep -q "main.js" || { echo "Error: main.js not found in tarball"; exit 1; } + + - name: Compute checksum + run: | + CHECKSUM=$(sha256sum "${{ env.TARBALL }}" | awk '{print $1}') + echo "CHECKSUM=$CHECKSUM" >> $GITHUB_ENV + sed -i "s|headlamp/plugin/archive-checksum:.*|headlamp/plugin/archive-checksum: sha256:${CHECKSUM}|" artifacthub-pkg.yml + + - name: Commit and tag + run: | + VERSION="${{ inputs.version }}" + git add package.json package-lock.json artifacthub-pkg.yml + git commit -m "release: v${VERSION}" + git tag "v${VERSION}" + git push origin main --tags + + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 + with: + tag_name: v${{ inputs.version }} + name: v${{ inputs.version }} + generate_release_notes: true + files: ${{ env.TARBALL }} diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 0000000..a818c7f --- /dev/null +++ b/.mcp.json @@ -0,0 +1,12 @@ +{ + "mcpServers": { + "github": { + "type": "http", + "url": "https://api.githubcopilot.com/mcp/", + "headers": { "Authorization": "Bearer ${GITHUB_TOKEN}" } + }, + "kubernetes": { "type": "sse", "url": "http://localhost:8080/sse" }, + "flux": { "type": "sse", "url": "http://localhost:8081/sse" }, + "playwright": { "type": "sse", "url": "http://localhost:8086/sse" } + } +} diff --git a/.pluginrc b/.pluginrc new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.pluginrc @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.prettierrc.js b/.prettierrc.js new file mode 100644 index 0000000..fcb4dbd --- /dev/null +++ b/.prettierrc.js @@ -0,0 +1 @@ +module.exports = require('@headlamp-k8s/eslint-config/prettier-config'); diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..ea03f80 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,95 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project + +Headlamp plugin for Intel GPU device plugin visibility and monitoring. Read-only — monitors GpuDevicePlugin CRDs, GPU-capable nodes, pods requesting Intel GPU resources, and real-time power metrics via Prometheus. No cluster write operations. + +- **Plugin name**: `intel-gpu` +- **Target**: Headlamp >= v0.20.0 +- **Data sources**: GpuDevicePlugin CRDs (`deviceplugin.intel.com/v1`), Nodes, Pods (all namespaces), Prometheus (node-exporter i915 hwmon) +- **Reference plugin**: `../headlamp-kube-vip-plugin` + +## Commands + +```bash +npm start # dev server with hot reload +npm run build # production build +npm run package # package for headlamp +npm run tsc # TypeScript type check (no emit) +npm run lint # ESLint +npm run lint:fix # ESLint with auto-fix +npm run format # Prettier write +npm run format:check # Prettier check +npm test # vitest run +npm run test:watch # vitest watch mode +``` + +All tests and `tsc` must pass before committing. + +## Architecture + +``` +src/ +├── index.tsx # Plugin entry: registerRoute, registerSidebarEntry, registerDetailsViewSection, registerResourceTableColumnsProcessor +├── api/ +│ ├── k8s.ts # Types + helpers (GpuDevicePlugin CRD, Nodes, Pods, type guards, formatters) +│ ├── k8s.test.ts # Tests for k8s helpers (70+ test cases) +│ ├── metrics.ts # Prometheus GPU power metrics (node-exporter i915 hwmon) +│ └── IntelGpuDataContext.tsx # Shared React context provider with data fetching +└── components/ + ├── OverviewPage.tsx # Dashboard: plugin health, GPU node summary, allocation, active pods + ├── DevicePluginsPage.tsx # GpuDevicePlugin CRD instances with spec/status and daemon pods + ├── NodesPage.tsx # Per-node GPU type, device count, allocation, workload pods + ├── PodsPage.tsx # All pods requesting Intel GPU resources with per-container detail + ├── MetricsPage.tsx # Real-time GPU power metrics from Prometheus + ├── NodeDetailSection.tsx # Injected into native Node detail page (capacity, utilization, pods) + ├── PodDetailSection.tsx # Injected into native Pod detail page (GPU requests per container) + └── integrations/ + └── NodeColumns.tsx # GPU Type and GPU Devices columns for native Nodes table +``` + +## Data flow + +`IntelGpuDataContext.tsx` uses **two fetching strategies**: + +1. **Headlamp hooks** (`K8s.ResourceClasses.*.useList()`) — for Nodes and Pods. +2. **`ApiProxy.request()`** — for GpuDevicePlugin CRDs and plugin daemon pods (with label selector fallback). + +The plugin gracefully degrades when the GpuDevicePlugin CRD is not installed — GPU nodes and pods are still shown based on resource labels and capacity. + +## Key constants (src/api/k8s.ts) + +- API group: `deviceplugin.intel.com` +- API version: `v1` +- GPU resources: `gpu.intel.com/i915`, `gpu.intel.com/xe`, `gpu.intel.com/millicores`, `gpu.intel.com/memory.max` +- Resource prefix: `gpu.intel.com/` +- Node labels: `intel.feature.node.kubernetes.io/gpu`, `node-role.kubernetes.io/gpu`, `node-role.kubernetes.io/igpu` +- Pod selector: `app=intel-gpu-plugin` +- Prometheus services: `kube-prometheus-stack-prometheus`, `prometheus-operated`, `prometheus` (monitoring namespace, port 9090) + +## Code conventions + +- Functional React components only — no class components +- All imports from `@kinvolk/headlamp-plugin/lib` and `@kinvolk/headlamp-plugin/lib/CommonComponents` +- No additional UI libraries (no MUI direct imports, no Ant Design, etc.) +- TypeScript strict mode — no `any`, use `unknown` + type guards at API boundaries +- Context provider (`IntelGpuDataProvider`) wraps each route component in `index.tsx` +- Tests: vitest + @testing-library/react, mock with `vi.mock('@kinvolk/headlamp-plugin/lib', ...)` +- `vitest.setup.ts` provides a spec-compliant `localStorage` shim for Node 22+ compatibility + +## Testing + +Mock pattern for headlamp APIs: +```typescript +vi.mock('@kinvolk/headlamp-plugin/lib', () => ({ + ApiProxy: { request: vi.fn().mockResolvedValue({ items: [] }) }, + K8s: { + ResourceClasses: { + Node: { useList: vi.fn(() => [[], null]) }, + Pod: { useList: vi.fn(() => [[], null]) }, + }, + }, +})); +``` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..73a77e2 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,36 @@ +# Contributing + +Contributions are welcome! Please follow these guidelines. + +## Development Setup + +```bash +git clone https://github.com/privilegedescalation/headlamp-intel-gpu-plugin.git +cd headlamp-intel-gpu-plugin +npm install +npm start +``` + +## Before Submitting a PR + +```bash +npm run tsc # TypeScript type check +npm run lint # ESLint +npm run format:check # Prettier +npm test # All tests must pass +``` + +## Code Style + +- TypeScript strict mode (no `any`) +- Functional React components only +- All UI from `@kinvolk/headlamp-plugin/lib/CommonComponents` +- Tests with vitest + @testing-library/react + +## Commit Messages + +Use conventional commit format: +- `feat:` new features +- `fix:` bug fixes +- `chore:` maintenance +- `docs:` documentation diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c73f62f --- /dev/null +++ b/LICENSE @@ -0,0 +1,190 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to the Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by the Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding any notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2025 privilegedescalation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e5b5b02 --- /dev/null +++ b/README.md @@ -0,0 +1,110 @@ +# headlamp-intel-gpu-plugin + +[![CI](https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/actions/workflows/ci.yaml/badge.svg)](https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/actions/workflows/ci.yaml) +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) + +A [Headlamp](https://headlamp.dev/) plugin providing visibility into [Intel GPU device plugin](https://intel.github.io/intel-device-plugins-for-kubernetes/) deployments on Kubernetes. + +## Features + +- **Overview Dashboard** — Plugin health, GPU node summary, allocation bar, active GPU pods +- **Device Plugins** — GpuDevicePlugin CRD instances with spec/status and daemon pod health +- **GPU Nodes** — Per-node GPU type (discrete/integrated), device count, allocation, workload pods +- **GPU Pods** — All pods requesting Intel GPU resources with per-container detail +- **Metrics** — Real-time GPU power draw (W) and TDP via Prometheus node-exporter i915 hwmon +- **Node Detail Integration** — Intel GPU section injected into native Headlamp Node detail views +- **Pod Detail Integration** — GPU resource requests/limits injected into native Pod detail views +- **Nodes Table Columns** — GPU Type and GPU Devices columns added to native Nodes table + +## Installation + +### Plugin Manager (Headlamp UI) + +Search for `intel-gpu` in the Headlamp Plugin Manager. + +### Manual + +```bash +# Download the latest release tarball +curl -LO https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/latest/download/intel-gpu-*.tar.gz + +# Extract to Headlamp plugins directory +mkdir -p ~/.config/Headlamp/plugins +tar -xzf intel-gpu-*.tar.gz -C ~/.config/Headlamp/plugins/ +``` + +### From Source + +```bash +git clone https://github.com/privilegedescalation/headlamp-intel-gpu-plugin.git +cd headlamp-intel-gpu-plugin +npm install +npm run build +``` + +## Requirements + +- Headlamp >= v0.20.0 +- Intel GPU device plugin deployed (optional — plugin gracefully degrades without it) +- Optional: Node Feature Discovery with Intel GPU labels +- Optional: kube-prometheus-stack with node-exporter for GPU power metrics + +## RBAC + +This plugin is **read-only** and requires the following permissions: + +| Resource | API Group | Verbs | +|----------|-----------|-------| +| nodes | v1 | list, get, watch | +| pods | v1 | list, get, watch | +| gpudeviceplugins | deviceplugin.intel.com/v1 | list, get | + +For metrics, Prometheus must be accessible via the Headlamp API proxy in the `monitoring` namespace. + +## Architecture + +``` +src/ +├── index.tsx # Plugin entry point +├── api/ +│ ├── k8s.ts # Types and helper functions +│ ├── metrics.ts # Prometheus GPU metrics +│ └── IntelGpuDataContext.tsx # React context provider +└── components/ + ├── OverviewPage.tsx # Dashboard + ├── DevicePluginsPage.tsx # Device plugin CRDs + ├── NodesPage.tsx # GPU nodes + ├── PodsPage.tsx # GPU pods + ├── MetricsPage.tsx # Power metrics + ├── NodeDetailSection.tsx # Injected into Node detail view + ├── PodDetailSection.tsx # Injected into Pod detail view + └── integrations/ + └── NodeColumns.tsx # Nodes table columns +``` + +## Development + +```bash +npm install +npm start # dev server +npm test # run tests +npm run tsc # type check +npm run lint # ESLint +``` + +## Troubleshooting + +| Symptom | Cause | Fix | +|---------|-------|-----| +| No GPU nodes shown | No Intel GPU labels or resources on nodes | Install Intel Node Feature Discovery or Intel GPU device plugin | +| CRD not available warning | GpuDevicePlugin CRD not installed | Install Intel device plugins operator — plugin still works without it | +| No metrics data | Prometheus not found | Deploy kube-prometheus-stack in the `monitoring` namespace | +| Metrics show only discrete GPUs | Integrated GPUs lack hwmon | Expected — iGPU driver doesn't expose hwmon power data | + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md) for development guidelines. + +## License + +Apache License 2.0. See [LICENSE](LICENSE) for details. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..982a347 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,22 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +|---------|-----------| +| latest | Yes | + +## Plugin Scope + +This plugin is **read-only**. It does not perform any write operations against the Kubernetes cluster. It reads: + +- Nodes +- Pods (all namespaces) +- GpuDevicePlugin CRDs (`deviceplugin.intel.com/v1`) +- Prometheus metrics (via API proxy in `monitoring` namespace) + +All data is fetched through Headlamp's built-in API proxy, which respects the user's existing RBAC permissions. + +## Reporting a Vulnerability + +Please report security vulnerabilities by opening a private issue or emailing the maintainers directly. diff --git a/artifacthub-pkg.yml b/artifacthub-pkg.yml index 88bc7d6..1c2a677 100644 --- a/artifacthub-pkg.yml +++ b/artifacthub-pkg.yml @@ -1,5 +1,5 @@ version: "0.3.0" -name: headlamp-intel-gpu-plugin +name: intel-gpu displayName: Intel GPU description: >- Headlamp plugin for Intel GPU device plugin visibility and monitoring. @@ -71,7 +71,7 @@ changes: description: "App bar health badge: hidden when no Intel GPU plugin detected" annotations: - headlamp/plugin/archive-url: "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/download/v0.3.0/headlamp-intel-gpu-plugin-0.3.0.tar.gz" + headlamp/plugin/archive-url: "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/download/v0.3.0/intel-gpu-0.3.0.tar.gz" headlamp/plugin/archive-checksum: "sha256:fdc53099ee3123680f24fe4a319b753ca3d030aac31abd4e3f383221085c9c2d" headlamp/plugin/version-compat: ">=0.20.0" headlamp/plugin/distro-compat: "in-cluster,web,app" diff --git a/package-lock.json b/package-lock.json index d94bc1d..c206d75 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { - "name": "headlamp-intel-gpu-plugin", - "version": "0.1.0", + "name": "intel-gpu", + "version": "0.3.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "headlamp-intel-gpu-plugin", - "version": "0.1.0", + "name": "intel-gpu", + "version": "0.3.0", "license": "Apache-2.0", "devDependencies": { "@kinvolk/headlamp-plugin": "^0.13.0" diff --git a/package.json b/package.json index 61da9b2..21ee123 100644 --- a/package.json +++ b/package.json @@ -1,12 +1,16 @@ { - "name": "headlamp-intel-gpu-plugin", + "name": "intel-gpu", "version": "0.3.0", "description": "Headlamp plugin for Intel GPU device plugin visibility and monitoring", "repository": { "type": "git", - "url": "https://github.com/cpfarhood/headlamp-intel-gpu-plugin.git" + "url": "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin.git" }, - "author": "cpfarhood", + "bugs": { + "url": "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/issues" + }, + "homepage": "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin#readme", + "author": "privilegedescalation", "license": "Apache-2.0", "scripts": { "start": "headlamp-plugin start", diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000..22a9943 --- /dev/null +++ b/renovate.json @@ -0,0 +1,4 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": ["config:recommended"] +}