From e80673dc1c6d3b0a4bc21879186002b978d540a8 Mon Sep 17 00:00:00 2001 From: Chris Farhood Date: Tue, 5 May 2026 03:10:30 +0000 Subject: [PATCH 1/2] fix(e2e): remove Service delete to fix Endpoints UID race causing ERR_NAME_NOT_RESOLVED Deleting the Service between test runs causes a FailedToUpdateEndpoint error (UID precondition failure) when the old Endpoints are garbage collected and the new Service tries to create fresh Endpoints. This leaves the service unreachable by DNS (ERR_NAME_NOT_RESOLVED). Fix: stop deleting the Service. kubectl apply upserts it in-place, so the existing Endpoints object persists. The new pod IP is added automatically when the fresh Deployment pod becomes ready. Closes PRI-609 Co-Authored-By: Paperclip --- scripts/deploy-e2e-headlamp.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/deploy-e2e-headlamp.sh b/scripts/deploy-e2e-headlamp.sh index ed55a84..48ceff2 100755 --- a/scripts/deploy-e2e-headlamp.sh +++ b/scripts/deploy-e2e-headlamp.sh @@ -59,10 +59,15 @@ kubectl create configmap headlamp-intel-gpu-plugin \ --from-file=package.json="$REPO_ROOT/package.json" # --- Tear down any existing E2E deployment for a clean start --- +# Deleting the Deployment forces a fresh pod (new ReplicaSet) regardless of +# whether the pod spec changed. The ServiceAccount is also deleted for a clean +# token state. The Service is NOT deleted — leaving it in place avoids an +# Endpoints UID race (FailedToUpdateEndpoint) that causes DNS resolution +# failures. kubectl apply below upserts the Service in-place, and the new +# pod's IP is added to the existing Endpoints automatically. echo "" echo "Removing any existing E2E deployment (clean-start)..." kubectl delete deployment "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait -kubectl delete service "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait kubectl delete serviceaccount "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait # --- Deploy Headlamp via kubectl apply --- -- 2.52.0 From f1500ed326c400beeaa25bdda91c998d70750fb8 Mon Sep 17 00:00:00 2001 From: Chris Farhood Date: Tue, 5 May 2026 03:50:32 +0000 Subject: [PATCH 2/2] fix(e2e): update namespace to headlamp-dev where RBAC is configured The Service delete fix on this branch is correct, but the E2E workflow and scripts still defaulted to privilegedescalation-dev where the arc-runners service account has no permissions. This caused RBAC Forbidden errors instead of the original ERR_NAME_NOT_RESOLVED. Changed all defaults to headlamp-dev, matching the successful gandalf/rename-ns-headlamp-dev branch. --- .github/workflows/e2e.yaml | 4 ++-- scripts/deploy-e2e-headlamp.sh | 6 +++--- scripts/teardown-e2e-headlamp.sh | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 37f33a9..49ab07f 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -11,7 +11,7 @@ permissions: contents: read # Only one E2E run at a time: the shared E2E_RELEASE (headlamp-e2e) in -# privilegedescalation-dev cannot be shared across concurrent runs. +# headlamp-dev cannot be shared across concurrent runs. # cancel-in-progress: false (queue, don't cancel) — cancelling in-flight # runs may skip the if: always() teardown, leaving dangling cluster resources. concurrency: @@ -19,7 +19,7 @@ concurrency: cancel-in-progress: false env: - E2E_NAMESPACE: privilegedescalation-dev + E2E_NAMESPACE: headlamp-dev E2E_RELEASE: headlamp-e2e # Pin to a known-good Headlamp version. Using :latest is risky because # the tag can change between CI runs, causing flaky failures when a newer diff --git a/scripts/deploy-e2e-headlamp.sh b/scripts/deploy-e2e-headlamp.sh index 48ceff2..783c758 100755 --- a/scripts/deploy-e2e-headlamp.sh +++ b/scripts/deploy-e2e-headlamp.sh @@ -5,7 +5,7 @@ # a ConfigMap volume mount. No custom Docker images — the plugin is built # in CI and injected as a ConfigMap. # -# E2E resources are deployed to the `privilegedescalation-dev` namespace. Nothing +# E2E resources are deployed to the `headlamp-dev` namespace. Nothing # persists beyond the test run — teardown cleans up all created resources. # # Prerequisites: @@ -14,7 +14,7 @@ # - RBAC applied: kubectl apply -f deployment/e2e-ci-runner-rbac.yaml # # Environment: -# E2E_NAMESPACE — namespace for E2E Headlamp (default: privilegedescalation-dev) +# E2E_NAMESPACE — namespace for E2E Headlamp (default: headlamp-dev) # E2E_RELEASE — release/resource name prefix (default: headlamp-e2e) # HEADLAMP_VERSION — Headlamp image tag (default: latest) set -euo pipefail @@ -22,7 +22,7 @@ set -euo pipefail REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" DIST_DIR="$REPO_ROOT/dist" -E2E_NAMESPACE="${E2E_NAMESPACE:-privilegedescalation-dev}" +E2E_NAMESPACE="${E2E_NAMESPACE:-headlamp-dev}" E2E_RELEASE="${E2E_RELEASE:-headlamp-e2e}" HEADLAMP_VERSION="${HEADLAMP_VERSION:-latest}" diff --git a/scripts/teardown-e2e-headlamp.sh b/scripts/teardown-e2e-headlamp.sh index 0afe16f..d9bdf43 100755 --- a/scripts/teardown-e2e-headlamp.sh +++ b/scripts/teardown-e2e-headlamp.sh @@ -4,13 +4,13 @@ # Tears down the dedicated E2E Headlamp instance deployed by deploy-e2e-headlamp.sh. # # Environment: -# E2E_NAMESPACE — namespace to clean up (default: privilegedescalation-dev) +# E2E_NAMESPACE — namespace to clean up (default: headlamp-dev) # E2E_RELEASE — release/resource name prefix (default: headlamp-e2e) set -euo pipefail REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" -E2E_NAMESPACE="${E2E_NAMESPACE:-privilegedescalation-dev}" +E2E_NAMESPACE="${E2E_NAMESPACE:-headlamp-dev}" E2E_RELEASE="${E2E_RELEASE:-headlamp-e2e}" echo "=== E2E Headlamp Teardown ===" -- 2.52.0