fix: use headlamp-plugins-e2e namespace for E2E tests, revert workflow

headlamp-dev is Flux-managed (kustomization/headlamp-dev reconciles), causing
E2E deployment conflicts and test failures. Use a dedicated headlamp-plugins-e2e
namespace instead. Reverted .github/workflows/e2e.yaml — Hugh owns CI/CD; will
file a child issue to update the workflow namespace.
This commit is contained in:
2026-05-05 03:10:42 +00:00
committed by Gandalf the Greybeard [agent]
parent 4622cdef46
commit dc77d6a4ba
4 changed files with 348 additions and 3 deletions
+103
View File
@@ -0,0 +1,103 @@
name: E2E Tests
on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:
permissions:
contents: read
# Only one E2E run at a time: the shared E2E_RELEASE (headlamp-e2e) in
# privilegedescalation-dev cannot be shared across concurrent runs.
# cancel-in-progress: false (queue, don't cancel) — cancelling in-flight
# runs may skip the if: always() teardown, leaving dangling cluster resources.
concurrency:
group: e2e-${{ github.repository }}
cancel-in-progress: false
env:
E2E_NAMESPACE: privilegedescalation-dev
E2E_RELEASE: headlamp-e2e
# Pin to a known-good Headlamp version. Using :latest is risky because
# the tag can change between CI runs, causing flaky failures when a newer
# image is pulled on some nodes but not others (IfNotPresent pull policy).
# Update this when Headlamp is upgraded in production (kube-system).
HEADLAMP_VERSION: v0.40.1
jobs:
e2e:
runs-on: runners-privilegedescalation
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: '22'
cache: 'npm'
- name: Setup kubectl
uses: azure/setup-kubectl@v4
- name: Install dependencies
run: npm ci
- name: Build plugin
run: npx @kinvolk/headlamp-plugin build
- name: Deploy E2E Headlamp instance
run: scripts/deploy-e2e-headlamp.sh
- name: Load E2E environment
run: |
if [ -f .env.e2e ]; then
cat .env.e2e >> "$GITHUB_ENV"
else
echo "::error::deploy-e2e-headlamp.sh did not produce .env.e2e"
exit 1
fi
- name: Install Playwright browsers
run: npx playwright install --with-deps chromium
- name: Run E2E tests
run: npm run e2e
env:
HEADLAMP_URL: ${{ env.HEADLAMP_URL }}
HEADLAMP_TOKEN: ${{ env.HEADLAMP_TOKEN }}
- name: Collect deployment diagnostics on failure
if: failure()
run: |
echo "=== Pod state ==="
kubectl get pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=$E2E_RELEASE" 2>&1 || true
echo "=== Pod describe ==="
kubectl describe pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=$E2E_RELEASE" 2>&1 || true
echo "=== Recent namespace events ==="
kubectl get events -n "$E2E_NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true
- name: Teardown E2E instance
if: always()
run: scripts/teardown-e2e-headlamp.sh
- name: Upload Playwright report
uses: actions/upload-artifact@v7
if: failure()
with:
name: playwright-report
path: playwright-report/
retention-days: 7
- name: Upload test results
uses: actions/upload-artifact@v7
if: failure()
with:
name: test-results
path: test-results/
retention-days: 7
+3 -3
View File
@@ -3,7 +3,7 @@
#
# Grants the GitHub Actions runner's service account (Arc Runners) the minimum
# permissions needed to deploy/teardown an E2E Headlamp instance in the
# headlamp-dev namespace.
# headlamp-plugins-e2e namespace.
#
# RBAC is managed via Flux from privilegedescalation/infra — do not apply manually.
# This manifest is a reference copy in the plugin repo.
@@ -12,7 +12,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: e2e-ci-runner
namespace: headlamp-dev
namespace: headlamp-plugins-e2e
rules:
- apiGroups: [""]
resources: ["configmaps", "serviceaccounts", "events"]
@@ -31,7 +31,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: e2e-ci-runner
namespace: headlamp-dev
namespace: headlamp-plugins-e2e
subjects:
- kind: ServiceAccount
name: runners-privilegedescalation-gha-rs-no-permission
+204
View File
@@ -0,0 +1,204 @@
#!/usr/bin/env bash
# deploy-e2e-headlamp.sh
#
# Deploys a stock Headlamp instance with the intel-gpu plugin loaded via
# a ConfigMap volume mount. No custom Docker images — the plugin is built
# in CI and injected as a ConfigMap.
#
# E2E resources are deployed to the `headlamp-plugins-e2e` namespace. Nothing
# persists beyond the test run — teardown cleans up all created resources.
#
# Prerequisites:
# - Plugin built (dist/ exists with plugin-main.js + package.json)
# - kubectl configured with cluster access
# - RBAC applied: kubectl apply -f deployment/e2e-ci-runner-rbac.yaml
#
# Environment:
# E2E_NAMESPACE — namespace for E2E Headlamp (default: headlamp-plugins-e2e)
# E2E_RELEASE — release/resource name prefix (default: headlamp-e2e)
# HEADLAMP_VERSION — Headlamp image tag (default: latest)
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
DIST_DIR="$REPO_ROOT/dist"
E2E_NAMESPACE="${E2E_NAMESPACE:-headlamp-plugins-e2e}"
E2E_RELEASE="${E2E_RELEASE:-headlamp-e2e}"
HEADLAMP_VERSION="${HEADLAMP_VERSION:-latest}"
if [ ! -d "$DIST_DIR" ]; then
echo "ERROR: dist/ not found. Run 'npm run build' first." >&2
exit 1
fi
# --- Preflight: verify RBAC before touching the cluster ---
echo "Checking RBAC permissions in namespace '${E2E_NAMESPACE}'..."
if ! kubectl auth can-i delete configmaps -n "$E2E_NAMESPACE" --quiet 2>/dev/null; then
echo "ERROR: Missing RBAC — cannot delete configmaps in namespace '${E2E_NAMESPACE}'." >&2
echo " Apply RBAC first: kubectl apply -f deployment/e2e-ci-runner-rbac.yaml" >&2
exit 1
fi
echo "=== E2E Headlamp Deployment ==="
echo " Image: ghcr.io/headlamp-k8s/headlamp:${HEADLAMP_VERSION}"
echo " Namespace: $E2E_NAMESPACE"
echo " Release: $E2E_RELEASE"
# --- Create ConfigMap from built plugin ---
echo ""
echo "Creating ConfigMap with plugin files..."
# Delete existing ConfigMap if present (idempotent redeploy)
kubectl delete configmap headlamp-intel-gpu-plugin \
-n "$E2E_NAMESPACE" --ignore-not-found
# Create ConfigMap from dist/ contents and package.json
kubectl create configmap headlamp-intel-gpu-plugin \
-n "$E2E_NAMESPACE" \
--from-file="$DIST_DIR" \
--from-file=package.json="$REPO_ROOT/package.json"
# --- Tear down any existing E2E deployment for a clean start ---
echo ""
echo "Removing any existing E2E deployment (clean-start)..."
kubectl delete deployment "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
kubectl delete service "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
kubectl delete serviceaccount "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
# --- Deploy Headlamp via kubectl apply ---
echo ""
echo "Deploying Headlamp E2E instance..."
kubectl apply -f - <<EOF
apiVersion: v1
kind: ServiceAccount
metadata:
name: ${E2E_RELEASE}
namespace: ${E2E_NAMESPACE}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ${E2E_RELEASE}
namespace: ${E2E_NAMESPACE}
labels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/instance: ${E2E_RELEASE}
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/instance: ${E2E_RELEASE}
template:
metadata:
labels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/instance: ${E2E_RELEASE}
spec:
serviceAccountName: ${E2E_RELEASE}
automountServiceAccountToken: true
securityContext: {}
containers:
- name: headlamp
image: ghcr.io/headlamp-k8s/headlamp:${HEADLAMP_VERSION}
imagePullPolicy: IfNotPresent
securityContext:
runAsNonRoot: true
privileged: false
runAsUser: 100
runAsGroup: 101
args:
- "-in-cluster"
- "-in-cluster-context-name=main"
- "-plugins-dir=/headlamp/plugins"
ports:
- name: http
containerPort: 4466
protocol: TCP
readinessProbe:
httpGet:
path: /
port: http
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 6
livenessProbe:
httpGet:
path: /
port: http
initialDelaySeconds: 10
periodSeconds: 10
volumeMounts:
- name: intel-gpu-plugin
mountPath: /headlamp/plugins/headlamp-intel-gpu
readOnly: true
volumes:
- name: intel-gpu-plugin
configMap:
name: headlamp-intel-gpu-plugin
---
apiVersion: v1
kind: Service
metadata:
name: ${E2E_RELEASE}
namespace: ${E2E_NAMESPACE}
labels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/instance: ${E2E_RELEASE}
spec:
type: ClusterIP
selector:
app.kubernetes.io/name: headlamp
app.kubernetes.io/instance: ${E2E_RELEASE}
ports:
- name: http
port: 80
targetPort: http
protocol: TCP
EOF
echo "Waiting for rollout..."
kubectl rollout status "deployment/${E2E_RELEASE}" \
-n "$E2E_NAMESPACE" --timeout=120s
# --- Generate a service URL for tests ---
SVC_URL="http://${E2E_RELEASE}.${E2E_NAMESPACE}.svc.cluster.local"
# --- Wait for DNS and HTTP reachability ---
echo ""
echo "Waiting for ${SVC_URL} to be reachable..."
ATTEMPTS=0
MAX_ATTEMPTS=24 # 24 × 5s = 120s max
until curl -sf --max-time 5 "${SVC_URL}" -o /dev/null 2>/dev/null; do
ATTEMPTS=$((ATTEMPTS + 1))
if [ "$ATTEMPTS" -ge "$MAX_ATTEMPTS" ]; then
echo "ERROR: ${SVC_URL} not reachable after $((MAX_ATTEMPTS * 5))s" >&2
exit 1
fi
echo " [${ATTEMPTS}/${MAX_ATTEMPTS}] not yet reachable, retrying in 5s..."
sleep 5
done
echo ""
echo "E2E Headlamp is ready at: ${SVC_URL}"
echo " export HEADLAMP_URL=${SVC_URL}"
# --- Generate a token for test auth ---
echo ""
echo "Creating service account token for E2E auth..."
kubectl create serviceaccount headlamp-e2e-test \
-n "$E2E_NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
TOKEN=$(kubectl create token headlamp-e2e-test -n "$E2E_NAMESPACE" --duration=1h 2>/dev/null || echo "")
if [ -n "$TOKEN" ]; then
echo " export HEADLAMP_TOKEN=<generated>"
echo ""
echo "HEADLAMP_URL=${SVC_URL}" > "$REPO_ROOT/.env.e2e"
echo "HEADLAMP_TOKEN=${TOKEN}" >> "$REPO_ROOT/.env.e2e"
echo "Wrote .env.e2e with HEADLAMP_URL and HEADLAMP_TOKEN"
else
echo " WARNING: Could not generate token. Set HEADLAMP_TOKEN manually or use OIDC."
fi
echo ""
echo "E2E deployment complete."
+38
View File
@@ -0,0 +1,38 @@
#!/usr/bin/env bash
# teardown-e2e-headlamp.sh
#
# Tears down the dedicated E2E Headlamp instance deployed by deploy-e2e-headlamp.sh.
#
# Environment:
# E2E_NAMESPACE — namespace to clean up (default: headlamp-plugins-e2e)
# E2E_RELEASE — release/resource name prefix (default: headlamp-e2e)
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
E2E_NAMESPACE="${E2E_NAMESPACE:-headlamp-plugins-e2e}"
E2E_RELEASE="${E2E_RELEASE:-headlamp-e2e}"
echo "=== E2E Headlamp Teardown ==="
echo " Namespace: $E2E_NAMESPACE"
echo " Release: $E2E_RELEASE"
echo "Removing Headlamp Deployment, Service, and ServiceAccount..."
kubectl delete deployment "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found
kubectl delete service "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found
kubectl delete serviceaccount "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found
echo "Cleaning up ConfigMap..."
kubectl delete configmap headlamp-intel-gpu-plugin -n "$E2E_NAMESPACE" --ignore-not-found
echo "Cleaning up test service account..."
kubectl delete serviceaccount headlamp-e2e-test -n "$E2E_NAMESPACE" --ignore-not-found
# Clean up .env.e2e if present
if [ -f "$REPO_ROOT/.env.e2e" ]; then
rm "$REPO_ROOT/.env.e2e"
echo "Removed .env.e2e"
fi
echo ""
echo "E2E teardown complete."