From 034e0b9db85ae8c8a9fde434317d7d22649da5dd Mon Sep 17 00:00:00 2001 From: DevContainer User Date: Wed, 4 Mar 2026 11:46:16 +0000 Subject: [PATCH] chore: align repo structure with plugin conventions Add missing config files (.eslintrc.js, .prettierrc.js, .pluginrc, .mcp.json, renovate.json), documentation (CLAUDE.md, CONTRIBUTING.md, README.md, SECURITY.md, LICENSE), CI/CD workflows (ci.yaml, release.yaml), and Claude agent definitions. Rename package from headlamp-intel-gpu-plugin to intel-gpu to match the short-name convention used by all other plugins. Co-Authored-By: Claude Opus 4.6 --- .claude/agents/agent-installer.md | 44 +++++ .claude/agents/agent-organizer.md | 24 +++ .claude/agents/multi-agent-coordinator.md | 24 +++ .claude/settings.local.json | 15 ++ .eslintrc.js | 3 + .github/workflows/ci.yaml | 41 +++++ .github/workflows/release.yaml | 104 ++++++++++++ .mcp.json | 12 ++ .pluginrc | 1 + .prettierrc.js | 1 + CLAUDE.md | 95 +++++++++++ CONTRIBUTING.md | 36 ++++ LICENSE | 190 ++++++++++++++++++++++ README.md | 110 +++++++++++++ SECURITY.md | 22 +++ artifacthub-pkg.yml | 4 +- package-lock.json | 8 +- package.json | 10 +- renovate.json | 4 + 19 files changed, 739 insertions(+), 9 deletions(-) create mode 100644 .claude/agents/agent-installer.md create mode 100644 .claude/agents/agent-organizer.md create mode 100644 .claude/agents/multi-agent-coordinator.md create mode 100644 .claude/settings.local.json create mode 100644 .eslintrc.js create mode 100644 .github/workflows/ci.yaml create mode 100644 .github/workflows/release.yaml create mode 100644 .mcp.json create mode 100644 .pluginrc create mode 100644 .prettierrc.js create mode 100644 CLAUDE.md create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 SECURITY.md create mode 100644 renovate.json diff --git a/.claude/agents/agent-installer.md b/.claude/agents/agent-installer.md new file mode 100644 index 0000000..2b6d6e6 --- /dev/null +++ b/.claude/agents/agent-installer.md @@ -0,0 +1,44 @@ +--- +name: agent-installer +description: Use this agent when the user wants to discover, browse, or install Claude Code agents from the awesome-claude-code-subagents repository. +tools: Bash, WebFetch, Read, Write, Glob +model: haiku +--- + +You are an agent installer that helps users browse and install Claude Code agents from the awesome-claude-code-subagents repository on GitHub. + +## Your Capabilities + +You can: +1. List all available agent categories +2. List agents within a category +3. Search for agents by name or description +4. Install agents to global (~/.claude/agents/) or local (.claude/agents/) directory +5. Show details about a specific agent before installing +6. Uninstall agents + +## GitHub API Endpoints + +- Categories list: `https://api.github.com/repos/VoltAgent/awesome-claude-code-subagents/contents/categories` +- Agents in category: `https://api.github.com/repos/VoltAgent/awesome-claude-code-subagents/contents/categories/{category-name}` +- Raw agent file: `https://raw.githubusercontent.com/VoltAgent/awesome-claude-code-subagents/main/categories/{category-name}/{agent-name}.md` + +## Workflow + +### When user asks to browse or list agents: +1. Fetch categories from GitHub API using WebFetch or Bash with curl +2. Parse the JSON response to extract directory names +3. Present categories in a numbered list +4. When user selects a category, fetch and list agents in that category + +### When user wants to install an agent: +1. Ask if they want global installation (~/.claude/agents/) or local (.claude/agents/) +2. For local: Check if .claude/ directory exists, create .claude/agents/ if needed +3. Download the agent .md file from GitHub raw URL +4. Save to the appropriate directory +5. Confirm successful installation + +### When user wants to search: +1. Fetch the README.md which contains all agent listings +2. Search for the term in agent names and descriptions +3. Present matching results diff --git a/.claude/agents/agent-organizer.md b/.claude/agents/agent-organizer.md new file mode 100644 index 0000000..68c5f86 --- /dev/null +++ b/.claude/agents/agent-organizer.md @@ -0,0 +1,24 @@ +--- +name: agent-organizer +description: Use when assembling and optimizing multi-agent teams to execute complex projects that require careful task decomposition, agent capability matching, and workflow coordination. +tools: Read, Write, Edit, Glob, Grep +model: sonnet +--- + +You are a senior agent organizer with expertise in assembling and coordinating multi-agent teams. Your focus spans task analysis, agent capability mapping, workflow design, and team optimization with emphasis on selecting the right agents for each task and ensuring efficient collaboration. + +When invoked: +1. Query context manager for task requirements and available agents +2. Review agent capabilities, performance history, and current workload +3. Analyze task complexity, dependencies, and optimization opportunities +4. Orchestrate agent teams for maximum efficiency and success + +Agent organization checklist: +- Agent selection accuracy > 95% achieved +- Task completion rate > 99% maintained +- Resource utilization optimal consistently +- Response time < 5s ensured +- Error recovery automated properly +- Cost tracking enabled thoroughly +- Performance monitored continuously +- Team synergy maximized effectively diff --git a/.claude/agents/multi-agent-coordinator.md b/.claude/agents/multi-agent-coordinator.md new file mode 100644 index 0000000..28fb7c8 --- /dev/null +++ b/.claude/agents/multi-agent-coordinator.md @@ -0,0 +1,24 @@ +--- +name: multi-agent-coordinator +description: Use when coordinating multiple concurrent agents that need to communicate, share state, synchronize work, and handle distributed failures across a system. +tools: Read, Write, Edit, Glob, Grep +model: opus +--- + +You are a senior multi-agent coordinator with expertise in orchestrating complex distributed workflows. Your focus spans inter-agent communication, task dependency management, parallel execution control, and fault tolerance with emphasis on ensuring efficient, reliable coordination across large agent teams. + +When invoked: +1. Query context manager for workflow requirements and agent states +2. Review communication patterns, dependencies, and resource constraints +3. Analyze coordination bottlenecks, deadlock risks, and optimization opportunities +4. Implement robust multi-agent coordination strategies + +Multi-agent coordination checklist: +- Coordination overhead < 5% maintained +- Deadlock prevention 100% ensured +- Message delivery guaranteed thoroughly +- Scalability to 100+ agents verified +- Fault tolerance built-in properly +- Monitoring comprehensive continuously +- Recovery automated effectively +- Performance optimal consistently diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..374b014 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,15 @@ +{ + "permissions": { + "allow": [ + "Bash(done)", + "Bash(npm install:*)", + "Bash(git add:*)" + ] + }, + "enabledMcpjsonServers": [ + "github", + "kubernetes", + "flux", + "playwright" + ] +} diff --git a/.eslintrc.js b/.eslintrc.js new file mode 100644 index 0000000..e37cc11 --- /dev/null +++ b/.eslintrc.js @@ -0,0 +1,3 @@ +module.exports = { + extends: ['@headlamp-k8s/eslint-config'], +}; diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..cfc2606 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,41 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + workflow_call: + +jobs: + ci: + runs-on: local-ubuntu-latest + timeout-minutes: 10 + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + cache: 'npm' + + - name: Install dependencies + run: npm ci + + - name: Build plugin + run: npx @kinvolk/headlamp-plugin build + + - name: Lint + run: npm run lint + + - name: Type-check + run: npm run tsc + + - name: Format check + run: npm run format:check + + - name: Run tests + run: npm test diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..4a050a8 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,104 @@ +name: Release + +on: + workflow_dispatch: + inputs: + version: + description: 'Release version (e.g. 1.0.0)' + required: true + type: string + +permissions: + contents: write + +concurrency: + group: release + cancel-in-progress: false + +jobs: + ci: + uses: ./.github/workflows/ci.yaml + + release: + needs: ci + runs-on: local-ubuntu-latest + timeout-minutes: 10 + + steps: + - name: Validate version format + run: | + if [[ ! "${{ inputs.version }}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo "Error: Version must be in X.Y.Z format" + exit 1 + fi + + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + cache: 'npm' + + - name: Configure Git + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Update version in package.json + run: npm version ${{ inputs.version }} --no-git-tag-version --allow-same-version + + - name: Update artifacthub-pkg.yml + run: | + VERSION="${{ inputs.version }}" + PKG_NAME=$(jq -r .name package.json) + RELEASE_URL="https://github.com/${{ github.repository }}/releases/download/v${VERSION}/${PKG_NAME}-${VERSION}.tar.gz" + sed -i "s/^version:.*/version: \"${VERSION}\"/" artifacthub-pkg.yml + sed -i "s|headlamp/plugin/archive-url:.*|headlamp/plugin/archive-url: \"${RELEASE_URL}\"|" artifacthub-pkg.yml + + - name: Install dependencies + run: npm ci + + - name: Build plugin + run: npx @kinvolk/headlamp-plugin build + + - name: Package plugin + run: npx @kinvolk/headlamp-plugin package + + - name: Prepare release tarball + run: | + VERSION="${{ inputs.version }}" + PKG_NAME=$(jq -r .name package.json) + TARBALL="${PKG_NAME}-${VERSION}.tar.gz" + echo "TARBALL=$TARBALL" >> $GITHUB_ENV + echo "PKG_NAME=$PKG_NAME" >> $GITHUB_ENV + + - name: Validate tarball + run: | + echo "Tarball: ${{ env.TARBALL }}" + ls -lh "${{ env.TARBALL }}" + tar -tzf "${{ env.TARBALL }}" | head -20 + tar -tzf "${{ env.TARBALL }}" | grep -q "main.js" || { echo "Error: main.js not found in tarball"; exit 1; } + + - name: Compute checksum + run: | + CHECKSUM=$(sha256sum "${{ env.TARBALL }}" | awk '{print $1}') + echo "CHECKSUM=$CHECKSUM" >> $GITHUB_ENV + sed -i "s|headlamp/plugin/archive-checksum:.*|headlamp/plugin/archive-checksum: sha256:${CHECKSUM}|" artifacthub-pkg.yml + + - name: Commit and tag + run: | + VERSION="${{ inputs.version }}" + git add package.json package-lock.json artifacthub-pkg.yml + git commit -m "release: v${VERSION}" + git tag "v${VERSION}" + git push origin main --tags + + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 + with: + tag_name: v${{ inputs.version }} + name: v${{ inputs.version }} + generate_release_notes: true + files: ${{ env.TARBALL }} diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 0000000..a818c7f --- /dev/null +++ b/.mcp.json @@ -0,0 +1,12 @@ +{ + "mcpServers": { + "github": { + "type": "http", + "url": "https://api.githubcopilot.com/mcp/", + "headers": { "Authorization": "Bearer ${GITHUB_TOKEN}" } + }, + "kubernetes": { "type": "sse", "url": "http://localhost:8080/sse" }, + "flux": { "type": "sse", "url": "http://localhost:8081/sse" }, + "playwright": { "type": "sse", "url": "http://localhost:8086/sse" } + } +} diff --git a/.pluginrc b/.pluginrc new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.pluginrc @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.prettierrc.js b/.prettierrc.js new file mode 100644 index 0000000..fcb4dbd --- /dev/null +++ b/.prettierrc.js @@ -0,0 +1 @@ +module.exports = require('@headlamp-k8s/eslint-config/prettier-config'); diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..ea03f80 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,95 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project + +Headlamp plugin for Intel GPU device plugin visibility and monitoring. Read-only — monitors GpuDevicePlugin CRDs, GPU-capable nodes, pods requesting Intel GPU resources, and real-time power metrics via Prometheus. No cluster write operations. + +- **Plugin name**: `intel-gpu` +- **Target**: Headlamp >= v0.20.0 +- **Data sources**: GpuDevicePlugin CRDs (`deviceplugin.intel.com/v1`), Nodes, Pods (all namespaces), Prometheus (node-exporter i915 hwmon) +- **Reference plugin**: `../headlamp-kube-vip-plugin` + +## Commands + +```bash +npm start # dev server with hot reload +npm run build # production build +npm run package # package for headlamp +npm run tsc # TypeScript type check (no emit) +npm run lint # ESLint +npm run lint:fix # ESLint with auto-fix +npm run format # Prettier write +npm run format:check # Prettier check +npm test # vitest run +npm run test:watch # vitest watch mode +``` + +All tests and `tsc` must pass before committing. + +## Architecture + +``` +src/ +├── index.tsx # Plugin entry: registerRoute, registerSidebarEntry, registerDetailsViewSection, registerResourceTableColumnsProcessor +├── api/ +│ ├── k8s.ts # Types + helpers (GpuDevicePlugin CRD, Nodes, Pods, type guards, formatters) +│ ├── k8s.test.ts # Tests for k8s helpers (70+ test cases) +│ ├── metrics.ts # Prometheus GPU power metrics (node-exporter i915 hwmon) +│ └── IntelGpuDataContext.tsx # Shared React context provider with data fetching +└── components/ + ├── OverviewPage.tsx # Dashboard: plugin health, GPU node summary, allocation, active pods + ├── DevicePluginsPage.tsx # GpuDevicePlugin CRD instances with spec/status and daemon pods + ├── NodesPage.tsx # Per-node GPU type, device count, allocation, workload pods + ├── PodsPage.tsx # All pods requesting Intel GPU resources with per-container detail + ├── MetricsPage.tsx # Real-time GPU power metrics from Prometheus + ├── NodeDetailSection.tsx # Injected into native Node detail page (capacity, utilization, pods) + ├── PodDetailSection.tsx # Injected into native Pod detail page (GPU requests per container) + └── integrations/ + └── NodeColumns.tsx # GPU Type and GPU Devices columns for native Nodes table +``` + +## Data flow + +`IntelGpuDataContext.tsx` uses **two fetching strategies**: + +1. **Headlamp hooks** (`K8s.ResourceClasses.*.useList()`) — for Nodes and Pods. +2. **`ApiProxy.request()`** — for GpuDevicePlugin CRDs and plugin daemon pods (with label selector fallback). + +The plugin gracefully degrades when the GpuDevicePlugin CRD is not installed — GPU nodes and pods are still shown based on resource labels and capacity. + +## Key constants (src/api/k8s.ts) + +- API group: `deviceplugin.intel.com` +- API version: `v1` +- GPU resources: `gpu.intel.com/i915`, `gpu.intel.com/xe`, `gpu.intel.com/millicores`, `gpu.intel.com/memory.max` +- Resource prefix: `gpu.intel.com/` +- Node labels: `intel.feature.node.kubernetes.io/gpu`, `node-role.kubernetes.io/gpu`, `node-role.kubernetes.io/igpu` +- Pod selector: `app=intel-gpu-plugin` +- Prometheus services: `kube-prometheus-stack-prometheus`, `prometheus-operated`, `prometheus` (monitoring namespace, port 9090) + +## Code conventions + +- Functional React components only — no class components +- All imports from `@kinvolk/headlamp-plugin/lib` and `@kinvolk/headlamp-plugin/lib/CommonComponents` +- No additional UI libraries (no MUI direct imports, no Ant Design, etc.) +- TypeScript strict mode — no `any`, use `unknown` + type guards at API boundaries +- Context provider (`IntelGpuDataProvider`) wraps each route component in `index.tsx` +- Tests: vitest + @testing-library/react, mock with `vi.mock('@kinvolk/headlamp-plugin/lib', ...)` +- `vitest.setup.ts` provides a spec-compliant `localStorage` shim for Node 22+ compatibility + +## Testing + +Mock pattern for headlamp APIs: +```typescript +vi.mock('@kinvolk/headlamp-plugin/lib', () => ({ + ApiProxy: { request: vi.fn().mockResolvedValue({ items: [] }) }, + K8s: { + ResourceClasses: { + Node: { useList: vi.fn(() => [[], null]) }, + Pod: { useList: vi.fn(() => [[], null]) }, + }, + }, +})); +``` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..73a77e2 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,36 @@ +# Contributing + +Contributions are welcome! Please follow these guidelines. + +## Development Setup + +```bash +git clone https://github.com/privilegedescalation/headlamp-intel-gpu-plugin.git +cd headlamp-intel-gpu-plugin +npm install +npm start +``` + +## Before Submitting a PR + +```bash +npm run tsc # TypeScript type check +npm run lint # ESLint +npm run format:check # Prettier +npm test # All tests must pass +``` + +## Code Style + +- TypeScript strict mode (no `any`) +- Functional React components only +- All UI from `@kinvolk/headlamp-plugin/lib/CommonComponents` +- Tests with vitest + @testing-library/react + +## Commit Messages + +Use conventional commit format: +- `feat:` new features +- `fix:` bug fixes +- `chore:` maintenance +- `docs:` documentation diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c73f62f --- /dev/null +++ b/LICENSE @@ -0,0 +1,190 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to the Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by the Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding any notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2025 privilegedescalation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e5b5b02 --- /dev/null +++ b/README.md @@ -0,0 +1,110 @@ +# headlamp-intel-gpu-plugin + +[![CI](https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/actions/workflows/ci.yaml/badge.svg)](https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/actions/workflows/ci.yaml) +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) + +A [Headlamp](https://headlamp.dev/) plugin providing visibility into [Intel GPU device plugin](https://intel.github.io/intel-device-plugins-for-kubernetes/) deployments on Kubernetes. + +## Features + +- **Overview Dashboard** — Plugin health, GPU node summary, allocation bar, active GPU pods +- **Device Plugins** — GpuDevicePlugin CRD instances with spec/status and daemon pod health +- **GPU Nodes** — Per-node GPU type (discrete/integrated), device count, allocation, workload pods +- **GPU Pods** — All pods requesting Intel GPU resources with per-container detail +- **Metrics** — Real-time GPU power draw (W) and TDP via Prometheus node-exporter i915 hwmon +- **Node Detail Integration** — Intel GPU section injected into native Headlamp Node detail views +- **Pod Detail Integration** — GPU resource requests/limits injected into native Pod detail views +- **Nodes Table Columns** — GPU Type and GPU Devices columns added to native Nodes table + +## Installation + +### Plugin Manager (Headlamp UI) + +Search for `intel-gpu` in the Headlamp Plugin Manager. + +### Manual + +```bash +# Download the latest release tarball +curl -LO https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/latest/download/intel-gpu-*.tar.gz + +# Extract to Headlamp plugins directory +mkdir -p ~/.config/Headlamp/plugins +tar -xzf intel-gpu-*.tar.gz -C ~/.config/Headlamp/plugins/ +``` + +### From Source + +```bash +git clone https://github.com/privilegedescalation/headlamp-intel-gpu-plugin.git +cd headlamp-intel-gpu-plugin +npm install +npm run build +``` + +## Requirements + +- Headlamp >= v0.20.0 +- Intel GPU device plugin deployed (optional — plugin gracefully degrades without it) +- Optional: Node Feature Discovery with Intel GPU labels +- Optional: kube-prometheus-stack with node-exporter for GPU power metrics + +## RBAC + +This plugin is **read-only** and requires the following permissions: + +| Resource | API Group | Verbs | +|----------|-----------|-------| +| nodes | v1 | list, get, watch | +| pods | v1 | list, get, watch | +| gpudeviceplugins | deviceplugin.intel.com/v1 | list, get | + +For metrics, Prometheus must be accessible via the Headlamp API proxy in the `monitoring` namespace. + +## Architecture + +``` +src/ +├── index.tsx # Plugin entry point +├── api/ +│ ├── k8s.ts # Types and helper functions +│ ├── metrics.ts # Prometheus GPU metrics +│ └── IntelGpuDataContext.tsx # React context provider +└── components/ + ├── OverviewPage.tsx # Dashboard + ├── DevicePluginsPage.tsx # Device plugin CRDs + ├── NodesPage.tsx # GPU nodes + ├── PodsPage.tsx # GPU pods + ├── MetricsPage.tsx # Power metrics + ├── NodeDetailSection.tsx # Injected into Node detail view + ├── PodDetailSection.tsx # Injected into Pod detail view + └── integrations/ + └── NodeColumns.tsx # Nodes table columns +``` + +## Development + +```bash +npm install +npm start # dev server +npm test # run tests +npm run tsc # type check +npm run lint # ESLint +``` + +## Troubleshooting + +| Symptom | Cause | Fix | +|---------|-------|-----| +| No GPU nodes shown | No Intel GPU labels or resources on nodes | Install Intel Node Feature Discovery or Intel GPU device plugin | +| CRD not available warning | GpuDevicePlugin CRD not installed | Install Intel device plugins operator — plugin still works without it | +| No metrics data | Prometheus not found | Deploy kube-prometheus-stack in the `monitoring` namespace | +| Metrics show only discrete GPUs | Integrated GPUs lack hwmon | Expected — iGPU driver doesn't expose hwmon power data | + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md) for development guidelines. + +## License + +Apache License 2.0. See [LICENSE](LICENSE) for details. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..982a347 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,22 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +|---------|-----------| +| latest | Yes | + +## Plugin Scope + +This plugin is **read-only**. It does not perform any write operations against the Kubernetes cluster. It reads: + +- Nodes +- Pods (all namespaces) +- GpuDevicePlugin CRDs (`deviceplugin.intel.com/v1`) +- Prometheus metrics (via API proxy in `monitoring` namespace) + +All data is fetched through Headlamp's built-in API proxy, which respects the user's existing RBAC permissions. + +## Reporting a Vulnerability + +Please report security vulnerabilities by opening a private issue or emailing the maintainers directly. diff --git a/artifacthub-pkg.yml b/artifacthub-pkg.yml index 88bc7d6..1c2a677 100644 --- a/artifacthub-pkg.yml +++ b/artifacthub-pkg.yml @@ -1,5 +1,5 @@ version: "0.3.0" -name: headlamp-intel-gpu-plugin +name: intel-gpu displayName: Intel GPU description: >- Headlamp plugin for Intel GPU device plugin visibility and monitoring. @@ -71,7 +71,7 @@ changes: description: "App bar health badge: hidden when no Intel GPU plugin detected" annotations: - headlamp/plugin/archive-url: "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/download/v0.3.0/headlamp-intel-gpu-plugin-0.3.0.tar.gz" + headlamp/plugin/archive-url: "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/download/v0.3.0/intel-gpu-0.3.0.tar.gz" headlamp/plugin/archive-checksum: "sha256:fdc53099ee3123680f24fe4a319b753ca3d030aac31abd4e3f383221085c9c2d" headlamp/plugin/version-compat: ">=0.20.0" headlamp/plugin/distro-compat: "in-cluster,web,app" diff --git a/package-lock.json b/package-lock.json index d94bc1d..c206d75 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { - "name": "headlamp-intel-gpu-plugin", - "version": "0.1.0", + "name": "intel-gpu", + "version": "0.3.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "headlamp-intel-gpu-plugin", - "version": "0.1.0", + "name": "intel-gpu", + "version": "0.3.0", "license": "Apache-2.0", "devDependencies": { "@kinvolk/headlamp-plugin": "^0.13.0" diff --git a/package.json b/package.json index 61da9b2..21ee123 100644 --- a/package.json +++ b/package.json @@ -1,12 +1,16 @@ { - "name": "headlamp-intel-gpu-plugin", + "name": "intel-gpu", "version": "0.3.0", "description": "Headlamp plugin for Intel GPU device plugin visibility and monitoring", "repository": { "type": "git", - "url": "https://github.com/cpfarhood/headlamp-intel-gpu-plugin.git" + "url": "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin.git" }, - "author": "cpfarhood", + "bugs": { + "url": "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/issues" + }, + "homepage": "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin#readme", + "author": "privilegedescalation", "license": "Apache-2.0", "scripts": { "start": "headlamp-plugin start", diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000..22a9943 --- /dev/null +++ b/renovate.json @@ -0,0 +1,4 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": ["config:recommended"] +}