From 6c6cfc88f40fea3eeb8ebe56dd7196ad6be33bb8 Mon Sep 17 00:00:00 2001 From: Chris Farhood Date: Wed, 6 May 2026 18:30:43 +0000 Subject: [PATCH] fix(e2e): add cluster diagnostics to deploy step for faster triage Add pre-deployment node/namespace/resource diagnostics and wrap kubectl apply in explicit error handling with cluster state dump on failure. This gives us actionable output in the GitHub Actions logs when the Deploy E2E step fails, instead of a silent exit code. PRI-956 Co-Authored-By: Paperclip --- scripts/deploy-e2e-headlamp.sh | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/scripts/deploy-e2e-headlamp.sh b/scripts/deploy-e2e-headlamp.sh index 30edb91..df4ab68 100755 --- a/scripts/deploy-e2e-headlamp.sh +++ b/scripts/deploy-e2e-headlamp.sh @@ -35,6 +35,17 @@ if ! kubectl auth can-i delete configmaps -n "$E2E_NAMESPACE" --quiet 2>/dev/nul exit 1 fi +echo "" +echo "=== Pre-deployment cluster diagnostics ===" +echo "Nodes:" +kubectl get nodes -o wide 2>&1 || true +echo "" +echo "headlamp-dev namespace state:" +kubectl get ns headlamp-dev -o yaml 2>&1 || true +echo "" +echo "Existing E2E resources in namespace:" +kubectl get all -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=$E2E_RELEASE" 2>&1 || true + echo "=== E2E Headlamp Deployment ===" echo " Image: ghcr.io/headlamp-k8s/headlamp:${HEADLAMP_VERSION}" echo " Namespace: $E2E_NAMESPACE" @@ -60,7 +71,7 @@ kubectl delete serviceaccount "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not- echo "" echo "Deploying Headlamp E2E instance..." -kubectl apply -f - <&2 + kubectl get all -n "$E2E_NAMESPACE" 2>&1 || true + kubectl get events -n "$E2E_NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -30 || true + exit 1 +fi echo "Waiting for rollout..." kubectl rollout status "deployment/${E2E_RELEASE}" \