From ba1433764589b6085a620d431411883959031491 Mon Sep 17 00:00:00 2001 From: Chris Farhood Date: Tue, 5 May 2026 15:56:29 +0000 Subject: [PATCH] fix e2e: add comprehensive RBAC checks and deployment diagnostics - Check all required permissions (not just delete configmaps) before deploy - Dump pod state, events, and logs on rollout or service unreachability failures - Improves debuggability of E2E deployment failures in CI Co-Authored-By: Paperclip --- scripts/deploy-e2e-headlamp.sh | 52 +++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/scripts/deploy-e2e-headlamp.sh b/scripts/deploy-e2e-headlamp.sh index 6172852..3d29088 100644 --- a/scripts/deploy-e2e-headlamp.sh +++ b/scripts/deploy-e2e-headlamp.sh @@ -14,10 +14,37 @@ if [ ! -d "$DIST_DIR" ]; then fi echo "Checking RBAC permissions in namespace '${E2E_NAMESPACE}'..." -if ! kubectl auth can-i delete configmaps -n "$E2E_NAMESPACE" --quiet 2>/dev/null; then - echo "ERROR: Missing RBAC — cannot delete configmaps in namespace '${E2E_NAMESPACE}'." >&2 +REQUIRED_PERMS=( + "create configmaps" + "delete configmaps" + "get configmaps" + "create serviceaccounts" + "delete serviceaccounts" + "get serviceaccounts" + "create deployments" + "get deployments" + "list pods" + "get pods" + "create services" + "get services" + "delete services" + "create pods/exec" + "create token" +) +FAILED="" +for perm in "${REQUIRED_PERMS[@]}"; do + if ! kubectl auth can-i "$perm" -n "$E2E_NAMESPACE" --quiet 2>/dev/null; then + echo "ERROR: Missing RBAC — ${perm} in namespace '${E2E_NAMESPACE}'." >&2 + FAILED="$perm" + break + fi +done +if [ -n "$FAILED" ]; then + echo "ERROR: Missing required RBAC permission: ${FAILED}" >&2 + echo "Hub operator needs to grant this permission to the workflow's service account in namespace ${E2E_NAMESPACE}." >&2 exit 1 fi +echo "RBAC check passed." echo "=== E2E Headlamp Deployment ===" echo " Image: ghcr.io/headlamp-k8s/headlamp:${HEADLAMP_VERSION}" @@ -134,8 +161,17 @@ spec: EOF echo "Waiting for rollout..." -kubectl rollout status "deployment/${E2E_RELEASE}" \ - -n "$E2E_NAMESPACE" --timeout=120s +if ! kubectl rollout status "deployment/${E2E_RELEASE}" \ + -n "$E2E_NAMESPACE" --timeout=120s 2>&1; then + echo "=== Rollout failed. Dumping diagnostics ===" >&2 + echo "=== Pods ===" >&2 + kubectl get pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=${E2E_RELEASE}" 2>&1 || true + echo "=== Pod events ===" >&2 + kubectl describe pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=${E2E_RELEASE}" 2>&1 | tail -30 || true + echo "=== Namespace events ===" >&2 + kubectl get events -n "$E2E_NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true + exit 1 +fi SVC_URL="http://${E2E_RELEASE}.${E2E_NAMESPACE}.svc.cluster.local" @@ -146,6 +182,14 @@ MAX_ATTEMPTS=24 until curl -sf --max-time 5 "${SVC_URL}" -o /dev/null 2>/dev/null; do ATTEMPTS=$((ATTEMPTS + 1)) if [ "$ATTEMPTS" -ge "$MAX_ATTEMPTS" ]; then + echo "" + echo "=== Service unreachable after $((MAX_ATTEMPTS * 5))s. Dumping diagnostics ===" >&2 + echo "=== Pod state ===" >&2 + kubectl get pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=${E2E_RELEASE}" 2>&1 || true + echo "=== Pod logs ===" >&2 + kubectl logs -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=${E2E_RELEASE}" --tail=50 2>&1 || true + echo "=== Namespace events ===" >&2 + kubectl get events -n "$E2E_NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true echo "ERROR: ${SVC_URL} not reachable after $((MAX_ATTEMPTS * 5))s" >&2 exit 1 fi