From acd53c297bfed23aeb8138a06e205fe2ff43cc40 Mon Sep 17 00:00:00 2001 From: Hugh Hackman Date: Sun, 22 Mar 2026 04:51:30 +0000 Subject: [PATCH] fix: wait for HTTP reachability after rollout in deploy-e2e-headlamp.sh kubectl rollout status confirms the pod is ready per readinessProbe, but Kubernetes Service DNS propagation to the runner pod may lag behind. This caused intermittent E2E failures with ERR_NAME_NOT_RESOLVED. Add a poll loop (max 120s) after rollout status that verifies the service URL is reachable via HTTP before writing .env.e2e. This eliminates the race condition between DNS propagation and Playwright launch. Fixes: PRI-687 (intermittent E2E DNS failure) --- scripts/deploy-e2e-headlamp.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/scripts/deploy-e2e-headlamp.sh b/scripts/deploy-e2e-headlamp.sh index 412dd89..6186d5a 100755 --- a/scripts/deploy-e2e-headlamp.sh +++ b/scripts/deploy-e2e-headlamp.sh @@ -157,6 +157,24 @@ kubectl rollout status "deployment/${E2E_RELEASE}" \ # --- Generate a service URL for tests --- SVC_URL="http://${E2E_RELEASE}.${E2E_NAMESPACE}.svc.cluster.local" + +# --- Wait for DNS and HTTP reachability --- +# rollout status only confirms the pod is ready per readinessProbe. +# Kubernetes Service DNS may still be propagating to the runner pod. +# Poll until the service is reachable over HTTP before handing off. +echo "" +echo "Waiting for ${SVC_URL} to be reachable..." +ATTEMPTS=0 +MAX_ATTEMPTS=24 # 24 × 5s = 120s max +until curl -sf --max-time 5 "${SVC_URL}" -o /dev/null 2>/dev/null; do + ATTEMPTS=$((ATTEMPTS + 1)) + if [ "$ATTEMPTS" -ge "$MAX_ATTEMPTS" ]; then + echo "ERROR: ${SVC_URL} not reachable after $((MAX_ATTEMPTS * 5))s" >&2 + exit 1 + fi + echo " [${ATTEMPTS}/${MAX_ATTEMPTS}] not yet reachable, retrying in 5s..." + sleep 5 +done echo "" echo "E2E Headlamp is ready at: ${SVC_URL}" echo " export HEADLAMP_URL=${SVC_URL}"