Compare commits

..

3 Commits

Author SHA1 Message Date
privilegedescalation-engineer 43b812da7b test: add unit test for withTimeout in IntelGpuDataContext
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-25 06:31:00 +00:00
Gandalf the Greybeard 5670c008e1 fix: add request timeout wrapper to prevent E2E test hang
Add withTimeout() helper that wraps ApiProxy.request calls with a 2s timeout.
This prevents the plugin from hanging indefinitely when CRD requests fail
or network issues occur in the E2E environment.

Root cause: ApiProxy.request to non-existent CRDs would hang forever,
causing the Loading Intel GPU data... progressbar to never resolve.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-25 05:57:15 +00:00
privilegedescalation-engineer f9325772bd fix(e2e): use specific regex for nodes page heading
The /node/i regex was too broad and matched both the page heading
'Intel GPU — Nodes' and the empty state 'No GPU Nodes Found',
causing a strict mode violation in Playwright.

Use /intel gpu.*nodes/i to match only the actual page heading,
which contains 'Intel GPU' before 'Nodes'.
2026-03-25 01:55:02 +00:00
18 changed files with 588 additions and 267 deletions
+4 -202
View File
@@ -2,210 +2,12 @@ name: CI
on:
push:
branches: ['**']
branches: [main]
pull_request:
branches: [main, dev, uat]
branches: [main]
workflow_dispatch:
permissions:
contents: read
workflow_call:
jobs:
ci:
runs-on: ubuntu-latest
timeout-minutes: 10
container: node:22-slim
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Install Python
run: apt-get update && apt-get install -y --no-install-recommends python3 python3-yaml
- name: Validate artifacthub-pkg.yml
run: |
python3 - <<'EOF'
import sys, re
try:
import yaml
except ImportError:
print("::warning::PyYAML not available, skipping artifacthub-pkg.yml validation")
sys.exit(0)
try:
with open("artifacthub-pkg.yml") as f:
pkg = yaml.safe_load(f)
except FileNotFoundError:
print("::error::artifacthub-pkg.yml not found")
sys.exit(1)
except yaml.YAMLError as e:
print(f"::error::artifacthub-pkg.yml is invalid YAML: {e}")
sys.exit(1)
errors = []
for field in ["version", "name", "description", "homeURL"]:
if not pkg.get(field):
errors.append(f"Missing required field: {field}")
version = pkg.get("version", "")
if version and not re.match(r'^\d+\.\d+\.\d+$', str(version)):
errors.append(f"version '{version}' is not SemVer (expected X.Y.Z)")
annotations = pkg.get("annotations", {}) or {}
archive_url = annotations.get("headlamp/plugin/archive-url", "")
archive_checksum = annotations.get("headlamp/plugin/archive-checksum", "")
if not archive_url:
errors.append("Missing annotation: headlamp/plugin/archive-url")
if not archive_checksum:
errors.append("Missing annotation: headlamp/plugin/archive-checksum")
elif not re.match(r'^sha256:[0-9a-f]{64}$', str(archive_checksum)):
errors.append(f"archive-checksum has unexpected format: '{archive_checksum}' (expected sha256:<64 hex chars>)")
if errors:
for e in errors:
print(f"::error::{e}")
sys.exit(1)
print(f"artifacthub-pkg.yml valid: name={pkg['name']} version={pkg['version']}")
EOF
- name: Detect package manager
id: pkg-manager
run: |
if [ -f "pnpm-lock.yaml" ]; then
echo "manager=pnpm" >> $GITHUB_OUTPUT
PM=$(python3 -c "import json,sys; d=json.load(open('package.json')); print('true' if d.get('packageManager','').startswith('pnpm@') else 'false')" 2>/dev/null || echo "false")
echo "has_package_manager=$PM" >> $GITHUB_OUTPUT
else
echo "manager=npm" >> $GITHUB_OUTPUT
echo "has_package_manager=false" >> $GITHUB_OUTPUT
fi
- name: Setup Node
uses: actions/setup-node@v6
with:
node-version: '22'
cache: ${{ steps.pkg-manager.outputs.manager == 'npm' && 'npm' || '' }}
- name: Setup pnpm (via Corepack, reads version from packageManager field)
if: steps.pkg-manager.outputs.manager == 'pnpm' && steps.pkg-manager.outputs.has_package_manager == 'true'
run: |
npm install -g corepack
corepack enable pnpm
corepack install
- name: Setup pnpm (version latest)
if: steps.pkg-manager.outputs.manager == 'pnpm' && steps.pkg-manager.outputs.has_package_manager == 'false'
uses: pnpm/action-setup@v5
with:
run_install: false
version: latest
- name: Get pnpm store directory
id: pnpm-store
if: steps.pkg-manager.outputs.manager == 'pnpm'
run: echo "dir=$(pnpm store path --silent)" >> $GITHUB_OUTPUT
- name: Cache pnpm store
if: steps.pkg-manager.outputs.manager == 'pnpm'
uses: actions/cache@v5
with:
path: ${{ steps.pnpm-store.outputs.dir }}
key: ${{ runner.os }}-pnpm-${{ hashFiles('**/pnpm-lock.yaml') }}
restore-keys: |
${{ runner.os }}-pnpm-
- name: Validate pnpm lockfile freshness
if: steps.pkg-manager.outputs.manager == 'pnpm'
run: |
if [ ! -f "pnpm-lock.yaml" ]; then
echo "No pnpm-lock.yaml found, skipping lockfile freshness check"
exit 0
fi
if ! grep -q 'overrides:' pnpm-lock.yaml 2>/dev/null; then
echo "No overrides section in pnpm-lock.yaml, skipping lockfile freshness check"
exit 0
fi
echo "Detected pnpm-lock.yaml with overrides section. Checking lockfile freshness..."
ERR_FILE=$(mktemp)
if pnpm install --frozen-lockfile 2>&1 | tee "$ERR_FILE"; then
echo "Lockfile is fresh."
else
if grep -q "CONFIG_MISMATCH\|EBADLOCKFILE\|ERR_PNPM_LOCKFILE" "$ERR_FILE"; then
echo ""
echo "::error::pnpm-lock.yaml is out of sync with package.json overrides."
echo "::error::Run 'pnpm install' to regenerate the lockfile and commit the updated pnpm-lock.yaml."
rm -f "$ERR_FILE"
exit 1
fi
rm -f "$ERR_FILE"
echo "::warning::Install failed with a different error. Will retry in the Install dependencies step."
fi
- name: Install dependencies
run: |
max_attempts=3
attempt=1
while [ $attempt -le $max_attempts ]; do
echo "Attempt $attempt of $max_attempts"
if [ "${{ steps.pkg-manager.outputs.manager }}" = "pnpm" ]; then
pnpm install --frozen-lockfile && break
else
npm ci && break
fi
if [ $attempt -lt $max_attempts ]; then
echo "::warning::Install step failed on attempt $attempt. Retrying in 5 seconds..."
sleep 5
fi
attempt=$((attempt + 1))
done
if [ $attempt -gt $max_attempts ]; then
echo "::error::Install step failed after $max_attempts attempts."
exit 1
fi
- name: Build plugin
run: npx @kinvolk/headlamp-plugin build
- name: Lint
run: |
if [ "${{ steps.pkg-manager.outputs.manager }}" = "pnpm" ]; then
pnpm run lint
else
npm run lint
fi
- name: Type-check
run: |
if [ "${{ steps.pkg-manager.outputs.manager }}" = "pnpm" ]; then
pnpm run tsc
else
npm run tsc
fi
- name: Format check
run: |
if [ "${{ steps.pkg-manager.outputs.manager }}" = "pnpm" ]; then
pnpm run format:check
else
npm run format:check
fi
- name: Run tests
run: |
if [ "${{ steps.pkg-manager.outputs.manager }}" = "pnpm" ]; then
pnpm test
else
npm test
fi
- name: Security audit
run: |
if [ "${{ steps.pkg-manager.outputs.manager }}" = "pnpm" ]; then
npx audit-ci --pnpm --audit-level=high --config ./audit-ci.jsonc
else
npx audit-ci --npm --audit-level=high --config ./audit-ci.jsonc
fi
uses: privilegedescalation/.github/.github/workflows/plugin-ci.yaml@main
+7 -10
View File
@@ -1,21 +1,18 @@
name: Promotion Gate
name: Dual Approval (CTO + QA)
# Calls the shared promotion gate workflow.
# dev PRs: no gate (engineer self-merges).
# uat PRs: QA approval required.
# main PRs: UAT approval required (uat→main promotions).
# Calls the shared dual-approval-check workflow.
# Passes when both privilegedescalation-cto and privilegedescalation-qa
# have approved the PR. Add "Dual Approval (CTO + QA)" to required_status_checks
# in branch protection to enforce this gate.
on:
pull_request_review:
types: [submitted, dismissed]
pull_request:
branches: [uat, main]
branches: [main]
types: [opened, reopened, synchronize]
jobs:
promotion-gate:
dual-approval:
uses: privilegedescalation/.github/.github/workflows/dual-approval-check.yaml@main
secrets: inherit
with:
pr_number: ${{ github.event.pull_request.number }}
+103
View File
@@ -0,0 +1,103 @@
name: E2E Tests
on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:
permissions:
contents: read
# Only one E2E run at a time: the shared E2E_RELEASE (headlamp-e2e) in
# privilegedescalation-dev cannot be shared across concurrent runs.
# cancel-in-progress: false (queue, don't cancel) — cancelling in-flight
# runs may skip the if: always() teardown, leaving dangling cluster resources.
concurrency:
group: e2e-${{ github.repository }}
cancel-in-progress: false
env:
E2E_NAMESPACE: privilegedescalation-dev
E2E_RELEASE: headlamp-e2e
# Pin to a known-good Headlamp version. Using :latest is risky because
# the tag can change between CI runs, causing flaky failures when a newer
# image is pulled on some nodes but not others (IfNotPresent pull policy).
# Update this when Headlamp is upgraded in production (kube-system).
HEADLAMP_VERSION: v0.40.1
jobs:
e2e:
runs-on: runners-privilegedescalation
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: '22'
cache: 'npm'
- name: Setup kubectl
uses: azure/setup-kubectl@v4
- name: Install dependencies
run: npm ci
- name: Build plugin
run: npx @kinvolk/headlamp-plugin build
- name: Deploy E2E Headlamp instance
run: scripts/deploy-e2e-headlamp.sh
- name: Load E2E environment
run: |
if [ -f .env.e2e ]; then
cat .env.e2e >> "$GITHUB_ENV"
else
echo "::error::deploy-e2e-headlamp.sh did not produce .env.e2e"
exit 1
fi
- name: Install Playwright browsers
run: npx playwright install --with-deps chromium
- name: Run E2E tests
run: npm run e2e
env:
HEADLAMP_URL: ${{ env.HEADLAMP_URL }}
HEADLAMP_TOKEN: ${{ env.HEADLAMP_TOKEN }}
- name: Collect deployment diagnostics on failure
if: failure()
run: |
echo "=== Pod state ==="
kubectl get pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=$E2E_RELEASE" 2>&1 || true
echo "=== Pod describe ==="
kubectl describe pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=$E2E_RELEASE" 2>&1 || true
echo "=== Recent namespace events ==="
kubectl get events -n "$E2E_NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true
- name: Teardown E2E instance
if: always()
run: scripts/teardown-e2e-headlamp.sh
- name: Upload Playwright report
uses: actions/upload-artifact@v7
if: failure()
with:
name: playwright-report
path: playwright-report/
retention-days: 7
- name: Upload test results
uses: actions/upload-artifact@v7
if: failure()
with:
name: test-results
path: test-results/
retention-days: 7
+5
View File
@@ -1,3 +1,8 @@
node_modules/
dist/
*.tar.gz
.playwright-mcp/
e2e/.auth/state.json
.env.e2e
test-results/
playwright-report/
+3 -3
View File
@@ -1,4 +1,4 @@
version: "1.1.0"
version: "1.0.0"
name: headlamp-intel-gpu
displayName: Intel GPU
description: >-
@@ -99,7 +99,7 @@ screenshots:
url: https://raw.githubusercontent.com/privilegedescalation/headlamp-intel-gpu-plugin/main/docs/screenshots/03-metrics.svg
annotations:
headlamp/plugin/archive-url: "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/download/v1.1.0/intel-gpu-1.1.0.tar.gz"
headlamp/plugin/archive-checksum: sha256:e212381f38c331383604b06f6552997fcba5c8b42a3bd828e3b43ed3e5028448
headlamp/plugin/archive-url: "https://github.com/privilegedescalation/headlamp-intel-gpu-plugin/releases/download/v1.0.0/intel-gpu-1.0.0.tar.gz"
headlamp/plugin/archive-checksum: sha256:93d6c531e7c12440c9625138f0645fc0c3521b574d0089492759699b324943f0
headlamp/plugin/version-compat: ">=0.20.0"
headlamp/plugin/distro-compat: "in-cluster,web,app"
-20
View File
@@ -1,20 +0,0 @@
{
// Allowlist for inherited dev-dependency CVEs from @kinvolk/headlamp-plugin
// CTO decision (PRI-854): these high-severity vulns are dev/build-time only,
// trace to @kinvolk/headlamp-plugin transitive deps (Picomatch, Vite, lodash),
// and do NOT ship in production plugin artifacts.
"allowlist": [
{
"id": "GHSA-hhpm-516h-p3p6",
"reason": "Picomatch ReDoS: devDependency only, does not ship in production plugin bundle"
},
{
"id": "GHSA-36xf-7xpp-53w5",
"reason": "Vite arbitrary file read: devDependency only, does not ship in production plugin bundle"
},
{
"id": "GHSA-jf8v-p3pp-93qh",
"reason": "lodash code injection via _.template: devDependency only, does not ship in production plugin bundle"
}
]
}
View File
+83
View File
@@ -0,0 +1,83 @@
import { test as setup, expect, Page } from '@playwright/test';
const AUTH_STATE_PATH = 'e2e/.auth/state.json';
async function authenticateWithOIDC(page: Page, username: string, password: string): Promise<void> {
// Navigate to login — Headlamp redirects / to /c/main/login
await page.goto('/');
await page.waitForURL('**/login');
// Click "Sign In" and capture the Authentik popup
const popupPromise = page.waitForEvent('popup');
await page.getByRole('button', { name: /sign in/i }).click();
const popup = await popupPromise;
// Wait for the Authentik popup to fully load before interacting
await popup.waitForLoadState('domcontentloaded');
await popup.waitForLoadState('networkidle');
// Authentik step 1: fill username — wait for the form to render
const usernameField = popup.getByRole('textbox', { name: /email or username/i });
await usernameField.waitFor({ state: 'visible', timeout: 15_000 });
await usernameField.fill(username);
await popup.getByRole('button', { name: /log in/i }).click();
// Authentik step 2: fill password — wait for the next step to load
await popup.waitForLoadState('networkidle');
const passwordField = popup.getByRole('textbox', { name: /password/i });
await passwordField.waitFor({ state: 'visible', timeout: 15_000 });
await passwordField.fill(password);
await popup.getByRole('button', { name: /continue|log in/i }).click();
// Wait for the popup to close (Authentik redirects back, Headlamp processes callback)
await popup.waitForEvent('close', { timeout: 15_000 });
// Original page should now be authenticated — wait for sidebar
await expect(page.getByRole('navigation', { name: 'Navigation' })).toBeVisible({
timeout: 15_000,
});
}
async function authenticateWithToken(page: Page, token: string): Promise<void> {
await page.goto('/');
// Headlamp goes to /token directly when no OIDC is configured,
// or through /login when OIDC is configured
await page.waitForURL(/\/(login|token)$/);
if (page.url().includes('/login')) {
// OIDC login page — click "use a token" to reach token auth.
// Wait explicitly before clicking so failures surface at 15 s
// with a clear message rather than silently timing out at 60 s.
const useTokenBtn = page.getByRole('button', { name: /use a token/i });
await useTokenBtn.waitFor({ state: 'visible', timeout: 15_000 });
await useTokenBtn.click();
await page.waitForURL('**/token');
}
// Fill the "ID token" field and submit
await page.getByRole('textbox', { name: /id token/i }).fill(token);
await page.getByRole('button', { name: /authenticate/i }).click();
// Wait for the main UI to load
await expect(page.getByRole('navigation', { name: 'Navigation' })).toBeVisible({
timeout: 15_000,
});
}
setup('authenticate with Headlamp', async ({ page }) => {
const username = process.env.AUTHENTIK_USERNAME;
const password = process.env.AUTHENTIK_PASSWORD;
const token = process.env.HEADLAMP_TOKEN;
if (username && password) {
await authenticateWithOIDC(page, username, password);
} else if (token) {
await authenticateWithToken(page, token);
} else {
throw new Error(
'Set AUTHENTIK_USERNAME + AUTHENTIK_PASSWORD for OIDC auth, or HEADLAMP_TOKEN for token auth'
);
}
await page.context().storageState({ path: AUTH_STATE_PATH });
});
+85
View File
@@ -0,0 +1,85 @@
import { test, expect } from '@playwright/test';
test.describe('Intel GPU plugin smoke tests', () => {
test('sidebar contains intel-gpu entry', async ({ page }) => {
await page.goto('/');
const sidebar = page.getByRole('navigation', { name: 'Navigation' });
await expect(sidebar).toBeVisible({ timeout: 15_000 });
await expect(sidebar.getByRole('button', { name: 'intel-gpu' })).toBeVisible();
});
test('sidebar intel-gpu entry is clickable and navigates to overview', async ({ page }) => {
await page.goto('/');
const sidebar = page.getByRole('navigation', { name: 'Navigation' });
await expect(sidebar).toBeVisible({ timeout: 15_000 });
const gpuEntry = sidebar.getByRole('button', { name: 'intel-gpu' });
await expect(gpuEntry).toBeVisible();
await gpuEntry.click();
// Should navigate to the overview route
await expect(page).toHaveURL(/\/intel-gpu$/);
await expect(page.getByRole('heading', { name: /intel.gpu/i })).toBeVisible();
});
test('overview page renders GPU device list or empty state', async ({ page }) => {
await page.goto('/c/main/intel-gpu');
// Overview heading should be present
await expect(page.getByRole('heading', { name: /intel.gpu/i })).toBeVisible({
timeout: 15_000,
});
// Either a populated table/list or an empty-state indicator must be visible
const hasTable = await page.locator('table').first().isVisible().catch(() => false);
const hasEmptyState = await page
.locator('text=/no.*gpu|no.*device|0 node|empty/i')
.first()
.isVisible()
.catch(() => false);
expect(hasTable || hasEmptyState).toBe(true);
});
test('device plugins page renders or shows empty state', async ({ page }) => {
await page.goto('/c/main/intel-gpu/device-plugins');
await expect(page.getByRole('heading', { name: /device plugin/i })).toBeVisible({
timeout: 15_000,
});
const hasTable = await page.locator('table').first().isVisible().catch(() => false);
const hasEmptyState = await page
.locator('text=/no.*plugin|no.*device|empty/i')
.first()
.isVisible()
.catch(() => false);
expect(hasTable || hasEmptyState).toBe(true);
});
test('navigation between plugin views works', async ({ page }) => {
// Headlamp sidebar child links only appear when already on a child route,
// not after clicking the parent entry from the overview. Test route
// accessibility via direct navigation — each route must render its heading.
await page.goto('/c/main/intel-gpu');
await expect(page.getByRole('heading', { name: /intel.gpu/i })).toBeVisible({
timeout: 15_000,
});
await page.goto('/c/main/intel-gpu/nodes');
await expect(page.getByRole('heading', { name: /intel gpu.*nodes/i })).toBeVisible({ timeout: 15_000 });
await page.goto('/c/main/intel-gpu/pods');
await expect(page.getByRole('heading', { name: /pod/i })).toBeVisible({ timeout: 15_000 });
await page.goto('/c/main/intel-gpu/metrics');
await expect(page.getByRole('heading', { name: /metric/i })).toBeVisible({ timeout: 15_000 });
});
test('plugin settings page shows intel-gpu plugin entry', async ({ page }) => {
await page.goto('/settings/plugins');
// Wait for plugin list to load — plugin scripts load asynchronously
const pluginEntry = page.locator('text=intel-gpu').first();
await expect(pluginEntry).toBeVisible({ timeout: 30_000 });
});
});
+5 -5
View File
@@ -1,12 +1,12 @@
{
"name": "intel-gpu",
"version": "1.1.0",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "intel-gpu",
"version": "1.1.0",
"version": "1.0.0",
"license": "Apache-2.0",
"devDependencies": {
"@kinvolk/headlamp-plugin": "^0.13.0",
@@ -11600,9 +11600,9 @@
}
},
"node_modules/lodash": {
"version": "4.18.1",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.18.1.tgz",
"integrity": "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==",
"version": "4.17.23",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz",
"integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==",
"dev": true,
"license": "MIT"
},
+6 -5
View File
@@ -1,6 +1,6 @@
{
"name": "intel-gpu",
"version": "1.1.0",
"version": "1.0.0",
"description": "Headlamp plugin for Intel GPU device plugin visibility and monitoring",
"repository": {
"type": "git",
@@ -22,7 +22,9 @@
"format": "prettier --write src/",
"format:check": "prettier --check src/",
"test": "vitest run",
"test:watch": "vitest"
"test:watch": "vitest",
"e2e": "playwright test",
"e2e:headed": "playwright test --headed"
},
"peerDependencies": {
"react": "^18.0.0",
@@ -30,6 +32,7 @@
},
"devDependencies": {
"@kinvolk/headlamp-plugin": "^0.13.0",
"@playwright/test": "^1.58.2",
"@testing-library/jest-dom": "^6.4.8",
"@testing-library/react": "^16.0.0",
"@testing-library/user-event": "^14.5.2",
@@ -41,8 +44,6 @@
},
"overrides": {
"tar": "^7.5.11",
"undici": "^7.24.3",
"lodash": ">=4.18.0",
"elliptic": ">=6.6.1"
"undici": "^7.24.3"
}
}
+27
View File
@@ -0,0 +1,27 @@
import { defineConfig, devices } from '@playwright/test';
export default defineConfig({
testDir: './e2e',
timeout: 30_000,
expect: { timeout: 10_000 },
fullyParallel: false,
forbidOnly: !!process.env.CI,
retries: process.env.CI ? 1 : 0,
reporter: 'list',
use: {
baseURL: process.env.HEADLAMP_URL || (() => { throw new Error('HEADLAMP_URL is required — run scripts/deploy-e2e-headlamp.sh first'); })(),
trace: 'on-first-retry',
screenshot: 'only-on-failure',
},
projects: [
{ name: 'setup', testMatch: /auth\.setup\.ts/, timeout: 60_000 },
{
name: 'chromium',
use: {
...devices['Desktop Chrome'],
storageState: 'e2e/.auth/state.json',
},
dependencies: ['setup'],
},
],
});
+204
View File
@@ -0,0 +1,204 @@
#!/usr/bin/env bash
# deploy-e2e-headlamp.sh
#
# Deploys a stock Headlamp instance with the intel-gpu plugin loaded via
# a ConfigMap volume mount. No custom Docker images — the plugin is built
# in CI and injected as a ConfigMap.
#
# E2E resources are deployed to the `privilegedescalation-dev` namespace. Nothing
# persists beyond the test run — teardown cleans up all created resources.
#
# Prerequisites:
# - Plugin built (dist/ exists with plugin-main.js + package.json)
# - kubectl configured with cluster access
# - RBAC applied: kubectl apply -f deployment/e2e-ci-runner-rbac.yaml
#
# Environment:
# E2E_NAMESPACE — namespace for E2E Headlamp (default: privilegedescalation-dev)
# E2E_RELEASE — release/resource name prefix (default: headlamp-e2e)
# HEADLAMP_VERSION — Headlamp image tag (default: latest)
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
DIST_DIR="$REPO_ROOT/dist"
E2E_NAMESPACE="${E2E_NAMESPACE:-privilegedescalation-dev}"
E2E_RELEASE="${E2E_RELEASE:-headlamp-e2e}"
HEADLAMP_VERSION="${HEADLAMP_VERSION:-latest}"
if [ ! -d "$DIST_DIR" ]; then
echo "ERROR: dist/ not found. Run 'npm run build' first." >&2
exit 1
fi
# --- Preflight: verify RBAC before touching the cluster ---
echo "Checking RBAC permissions in namespace '${E2E_NAMESPACE}'..."
if ! kubectl auth can-i delete configmaps -n "$E2E_NAMESPACE" --quiet 2>/dev/null; then
echo "ERROR: Missing RBAC — cannot delete configmaps in namespace '${E2E_NAMESPACE}'." >&2
echo " Apply RBAC first: kubectl apply -f deployment/e2e-ci-runner-rbac.yaml" >&2
exit 1
fi
echo "=== E2E Headlamp Deployment ==="
echo " Image: ghcr.io/headlamp-k8s/headlamp:${HEADLAMP_VERSION}"
echo " Namespace: $E2E_NAMESPACE"
echo " Release: $E2E_RELEASE"
# --- Create ConfigMap from built plugin ---
echo ""
echo "Creating ConfigMap with plugin files..."
# Delete existing ConfigMap if present (idempotent redeploy)
kubectl delete configmap headlamp-intel-gpu-plugin \
-n "$E2E_NAMESPACE" --ignore-not-found
# Create ConfigMap from dist/ contents and package.json
kubectl create configmap headlamp-intel-gpu-plugin \
-n "$E2E_NAMESPACE" \
--from-file="$DIST_DIR" \
--from-file=package.json="$REPO_ROOT/package.json"
# --- Tear down any existing E2E deployment for a clean start ---
echo ""
echo "Removing any existing E2E deployment (clean-start)..."
kubectl delete deployment "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
kubectl delete service "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
kubectl delete serviceaccount "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
# --- Deploy Headlamp via kubectl apply ---
echo ""
echo "Deploying Headlamp E2E instance..."
kubectl apply -f - <<EOF
apiVersion: v1
kind: ServiceAccount
metadata:
name: ${E2E_RELEASE}
namespace: ${E2E_NAMESPACE}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ${E2E_RELEASE}
namespace: ${E2E_NAMESPACE}
labels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/instance: ${E2E_RELEASE}
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/instance: ${E2E_RELEASE}
template:
metadata:
labels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/instance: ${E2E_RELEASE}
spec:
serviceAccountName: ${E2E_RELEASE}
automountServiceAccountToken: true
securityContext: {}
containers:
- name: headlamp
image: ghcr.io/headlamp-k8s/headlamp:${HEADLAMP_VERSION}
imagePullPolicy: IfNotPresent
securityContext:
runAsNonRoot: true
privileged: false
runAsUser: 100
runAsGroup: 101
args:
- "-in-cluster"
- "-in-cluster-context-name=main"
- "-plugins-dir=/headlamp/plugins"
ports:
- name: http
containerPort: 4466
protocol: TCP
readinessProbe:
httpGet:
path: /
port: http
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 6
livenessProbe:
httpGet:
path: /
port: http
initialDelaySeconds: 10
periodSeconds: 10
volumeMounts:
- name: intel-gpu-plugin
mountPath: /headlamp/plugins/headlamp-intel-gpu
readOnly: true
volumes:
- name: intel-gpu-plugin
configMap:
name: headlamp-intel-gpu-plugin
---
apiVersion: v1
kind: Service
metadata:
name: ${E2E_RELEASE}
namespace: ${E2E_NAMESPACE}
labels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/instance: ${E2E_RELEASE}
spec:
type: ClusterIP
selector:
app.kubernetes.io/name: headlamp
app.kubernetes.io/instance: ${E2E_RELEASE}
ports:
- name: http
port: 80
targetPort: http
protocol: TCP
EOF
echo "Waiting for rollout..."
kubectl rollout status "deployment/${E2E_RELEASE}" \
-n "$E2E_NAMESPACE" --timeout=120s
# --- Generate a service URL for tests ---
SVC_URL="http://${E2E_RELEASE}.${E2E_NAMESPACE}.svc.cluster.local"
# --- Wait for DNS and HTTP reachability ---
echo ""
echo "Waiting for ${SVC_URL} to be reachable..."
ATTEMPTS=0
MAX_ATTEMPTS=24 # 24 × 5s = 120s max
until curl -sf --max-time 5 "${SVC_URL}" -o /dev/null 2>/dev/null; do
ATTEMPTS=$((ATTEMPTS + 1))
if [ "$ATTEMPTS" -ge "$MAX_ATTEMPTS" ]; then
echo "ERROR: ${SVC_URL} not reachable after $((MAX_ATTEMPTS * 5))s" >&2
exit 1
fi
echo " [${ATTEMPTS}/${MAX_ATTEMPTS}] not yet reachable, retrying in 5s..."
sleep 5
done
echo ""
echo "E2E Headlamp is ready at: ${SVC_URL}"
echo " export HEADLAMP_URL=${SVC_URL}"
# --- Generate a token for test auth ---
echo ""
echo "Creating service account token for E2E auth..."
kubectl create serviceaccount headlamp-e2e-test \
-n "$E2E_NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
TOKEN=$(kubectl create token headlamp-e2e-test -n "$E2E_NAMESPACE" --duration=1h 2>/dev/null || echo "")
if [ -n "$TOKEN" ]; then
echo " export HEADLAMP_TOKEN=<generated>"
echo ""
echo "HEADLAMP_URL=${SVC_URL}" > "$REPO_ROOT/.env.e2e"
echo "HEADLAMP_TOKEN=${TOKEN}" >> "$REPO_ROOT/.env.e2e"
echo "Wrote .env.e2e with HEADLAMP_URL and HEADLAMP_TOKEN"
else
echo " WARNING: Could not generate token. Set HEADLAMP_TOKEN manually or use OIDC."
fi
echo ""
echo "E2E deployment complete."
+38
View File
@@ -0,0 +1,38 @@
#!/usr/bin/env bash
# teardown-e2e-headlamp.sh
#
# Tears down the dedicated E2E Headlamp instance deployed by deploy-e2e-headlamp.sh.
#
# Environment:
# E2E_NAMESPACE — namespace to clean up (default: privilegedescalation-dev)
# E2E_RELEASE — release/resource name prefix (default: headlamp-e2e)
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
E2E_NAMESPACE="${E2E_NAMESPACE:-privilegedescalation-dev}"
E2E_RELEASE="${E2E_RELEASE:-headlamp-e2e}"
echo "=== E2E Headlamp Teardown ==="
echo " Namespace: $E2E_NAMESPACE"
echo " Release: $E2E_RELEASE"
echo "Removing Headlamp Deployment, Service, and ServiceAccount..."
kubectl delete deployment "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found
kubectl delete service "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found
kubectl delete serviceaccount "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found
echo "Cleaning up ConfigMap..."
kubectl delete configmap headlamp-intel-gpu-plugin -n "$E2E_NAMESPACE" --ignore-not-found
echo "Cleaning up test service account..."
kubectl delete serviceaccount headlamp-e2e-test -n "$E2E_NAMESPACE" --ignore-not-found
# Clean up .env.e2e if present
if [ -f "$REPO_ROOT/.env.e2e" ]; then
rm "$REPO_ROOT/.env.e2e"
echo "Removed .env.e2e"
fi
echo ""
echo "E2E teardown complete."
+7 -13
View File
@@ -154,24 +154,18 @@ describe('IntelGpuDataProvider', () => {
it('treats a hanging CRD request as unavailable after 2s timeout', async () => {
vi.useFakeTimers();
const nodeWrapper = { jsonData: {} };
vi.mocked(K8s.ResourceClasses.Node.useList).mockReturnValue([[nodeWrapper], null] as any);
vi.mocked(K8s.ResourceClasses.Pod.useList).mockReturnValue([[nodeWrapper], null] as any);
vi.mocked(ApiProxy.request)
.mockReturnValueOnce(new Promise(() => {}))
.mockResolvedValueOnce({ items: [] })
.mockResolvedValueOnce({ items: [] })
.mockResolvedValueOnce({ items: [] });
vi.mocked(K8s.ResourceClasses.Node.useList).mockReturnValue([[], null] as any);
vi.mocked(K8s.ResourceClasses.Pod.useList).mockReturnValue([[], null] as any);
vi.mocked(ApiProxy.request).mockReturnValue(new Promise(() => {}));
const { result } = renderHook(() => useIntelGpuContext(), { wrapper: Wrapper });
expect(result.current.loading).toBe(true);
await act(async () => {
await vi.runAllTimersAsync();
});
vi.advanceTimersByTime(2000);
await act(async () => {});
expect(result.current.crdAvailable).toBe(false);
expect(result.current.loading).toBe(false);
expect(result.current.crdAvailable).toBe(false);
vi.useRealTimers();
});
});
+4 -1
View File
@@ -154,7 +154,10 @@ export function IntelGpuDataProvider({ children }: { children: React.ReactNode }
for (const url of pluginPodSelectors) {
try {
const list = await withTimeout(ApiProxy.request(url), DEFAULT_REQUEST_TIMEOUT_MS);
const list = await withTimeout(
ApiProxy.request(url),
DEFAULT_REQUEST_TIMEOUT_MS
);
if (!cancelled && isKubeList(list)) {
const gpuPluginPods = filterIntelGpuPluginPods(list.items);
foundPluginPods.push(...gpuPluginPods);
+1 -3
View File
@@ -106,13 +106,11 @@ describe('MetricsPage', () => {
vi.clearAllMocks();
});
it('shows loader when ctxLoading=true but heading is visible immediately', () => {
it('shows loader when ctxLoading=true', () => {
vi.mocked(useIntelGpuContext).mockReturnValue(makeContext({ loading: true }));
// fetchGpuMetrics should never be called in loading state
vi.mocked(fetchGpuMetrics).mockResolvedValue(null);
render(<MetricsPage />);
// Heading renders immediately, loader appears below it while waiting for context
expect(screen.getByText('Intel GPU — Metrics')).toBeInTheDocument();
expect(screen.getByTestId('loader')).toHaveTextContent('Loading Intel GPU data...');
});
+6 -5
View File
@@ -230,6 +230,10 @@ export default function MetricsPage() {
};
}, [ctxLoading, fetchSeq]);
if (ctxLoading) {
return <Loader title="Loading Intel GPU data..." />;
}
return (
<>
<div
@@ -243,7 +247,7 @@ export default function MetricsPage() {
<SectionHeader title="Intel GPU — Metrics" />
<button
onClick={() => void doFetch()}
disabled={fetching || ctxLoading}
disabled={fetching}
aria-label="Refresh metrics"
style={{
padding: '6px 16px',
@@ -251,18 +255,15 @@ export default function MetricsPage() {
color: 'var(--mui-palette-primary-main, #0071c5)',
border: '1px solid var(--mui-palette-primary-main, #0071c5)',
borderRadius: '4px',
cursor: fetching || ctxLoading ? 'not-allowed' : 'pointer',
cursor: 'pointer',
fontSize: '13px',
fontWeight: 500,
opacity: fetching || ctxLoading ? 0.6 : 1,
}}
>
{fetching ? 'Refreshing…' : 'Refresh'}
</button>
</div>
{ctxLoading && <Loader title="Loading Intel GPU data..." />}
<MetricRequirements />
{fetching && !metrics && <Loader title="Querying Prometheus for GPU metrics..." />}