Compare commits

..

1 Commits

Author SHA1 Message Date
privilegedescalation-engineer[bot] 2a156c4e63 fix(e2e): serialize concurrent E2E runs to prevent environment conflicts
Concurrent E2E runs share the same headlamp-e2e release name in
privilegedescalation-dev. When two runs overlap, one run's teardown
(if: always()) deletes the shared Deployment/Service/ConfigMap while
the other is still using it, causing Playwright auth setup to time out
waiting for the Headlamp UI.

Adds a repo-wide concurrency group so only one E2E run executes at a
time. cancel-in-progress: false queues incoming runs rather than
cancelling in-flight ones to avoid leaving dangling cluster resources
when teardown is interrupted.

Fixes: PRI-815

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-24 16:31:14 +00:00
6 changed files with 27 additions and 69 deletions
-2
View File
@@ -16,5 +16,3 @@ jobs:
dual-approval:
uses: privilegedescalation/.github/.github/workflows/dual-approval-check.yaml@main
secrets: inherit
with:
pr_number: ${{ github.event.pull_request.number }}
+6 -19
View File
@@ -10,10 +10,12 @@ on:
permissions:
contents: read
# Only one E2E run at a time: the shared E2E_RELEASE (headlamp-e2e) in
# privilegedescalation-dev cannot be shared across concurrent runs.
# cancel-in-progress: false (queue, don't cancel) — cancelling in-flight
# runs may skip the if: always() teardown, leaving dangling cluster resources.
# Serialize E2E runs repo-wide. All concurrent runs share the same
# E2E_RELEASE name (headlamp-e2e) in a single namespace. Without
# serialization, one run's teardown (if: always()) deletes the
# deployment while a concurrent run is still using it, causing auth
# setup timeouts. cancel-in-progress: false queues rather than kills
# to avoid leaving dangling cluster resources.
concurrency:
group: e2e-${{ github.repository }}
cancel-in-progress: false
@@ -21,11 +23,6 @@ concurrency:
env:
E2E_NAMESPACE: privilegedescalation-dev
E2E_RELEASE: headlamp-e2e
# Pin to a known-good Headlamp version. Using :latest is risky because
# the tag can change between CI runs, causing flaky failures when a newer
# image is pulled on some nodes but not others (IfNotPresent pull policy).
# Update this when Headlamp is upgraded in production (kube-system).
HEADLAMP_VERSION: v0.40.1
jobs:
e2e:
@@ -72,16 +69,6 @@ jobs:
HEADLAMP_URL: ${{ env.HEADLAMP_URL }}
HEADLAMP_TOKEN: ${{ env.HEADLAMP_TOKEN }}
- name: Collect deployment diagnostics on failure
if: failure()
run: |
echo "=== Pod state ==="
kubectl get pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=$E2E_RELEASE" 2>&1 || true
echo "=== Pod describe ==="
kubectl describe pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=$E2E_RELEASE" 2>&1 || true
echo "=== Recent namespace events ==="
kubectl get events -n "$E2E_NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true
- name: Teardown E2E instance
if: always()
run: scripts/teardown-e2e-headlamp.sh
-29
View File
@@ -30,35 +30,6 @@ rules:
- apiGroups: [""]
resources: ["serviceaccounts/token"]
verbs: ["create"]
# RBAC pre-flight check: verify polaris namespace has proxy-reader Role + RoleBinding
# before running E2E tests. Required by the "RBAC pre-flight check" workflow step.
- apiGroups: ["rbac.authorization.k8s.io"]
resources: ["roles", "rolebindings"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: e2e-ci-runner-polaris-reader
namespace: polaris
rules:
- apiGroups: ["rbac.authorization.k8s.io"]
resources: ["roles", "rolebindings"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: e2e-ci-runner-polaris-reader-binding
namespace: polaris
subjects:
- kind: ServiceAccount
name: runners-privilegedescalation-gha-rs-no-permission
namespace: arc-runners
roleRef:
kind: Role
name: e2e-ci-runner-polaris-reader
apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
+2 -6
View File
@@ -45,12 +45,8 @@ async function authenticateWithToken(page: Page, token: string): Promise<void> {
await page.waitForURL(/\/(login|token)$/);
if (page.url().includes('/login')) {
// OIDC login page — click "use a token" to reach token auth.
// Wait explicitly before clicking so failures surface at 15 s
// with a clear message rather than silently timing out at 60 s.
const useTokenBtn = page.getByRole('button', { name: /use a token/i });
await useTokenBtn.waitFor({ state: 'visible', timeout: 15_000 });
await useTokenBtn.click();
// OIDC login page — click "use a token" to reach token auth
await page.getByRole('button', { name: /use a token/i }).click();
await page.waitForURL('**/token');
}
+17 -1
View File
@@ -1,5 +1,21 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"extends": ["github>privilegedescalation/.github:renovate-config"]
"extends": ["config:recommended"],
"baseBranches": ["main"],
"schedule": ["every weekend"],
"prConcurrentLimit": 10,
"pinDigests": true,
"packageRules": [
{
"matchManagers": ["npm"],
"matchUpdateTypes": ["minor", "patch"],
"groupName": "npm minor and patch"
},
{
"matchManagers": ["github-actions"],
"matchUpdateTypes": ["minor", "patch"],
"groupName": "github-actions minor and patch"
}
]
}
+2 -12
View File
@@ -16,7 +16,7 @@
# Environment:
# E2E_NAMESPACE — namespace for E2E Headlamp (default: privilegedescalation-dev)
# E2E_RELEASE — release/resource name prefix (default: headlamp-e2e)
# HEADLAMP_VERSION — Headlamp image tag (default: v0.40.1, pinned to match production)
# HEADLAMP_VERSION — Headlamp image tag (default: latest)
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
@@ -24,7 +24,7 @@ DIST_DIR="$REPO_ROOT/dist"
E2E_NAMESPACE="${E2E_NAMESPACE:-privilegedescalation-dev}"
E2E_RELEASE="${E2E_RELEASE:-headlamp-e2e}"
HEADLAMP_VERSION="${HEADLAMP_VERSION:-v0.40.1}"
HEADLAMP_VERSION="${HEADLAMP_VERSION:-latest}"
if [ ! -d "$DIST_DIR" ]; then
echo "ERROR: dist/ not found. Run 'npm run build' first." >&2
@@ -58,16 +58,6 @@ kubectl create configmap headlamp-polaris-plugin \
--from-file="$DIST_DIR" \
--from-file=package.json="$REPO_ROOT/package.json"
# --- Tear down any existing E2E deployment for a clean start ---
# kubectl apply without prior deletion only patches in-place: if the pod spec is
# unchanged between runs, no new rollout is triggered and a degraded pod keeps
# serving. Delete first to guarantee a fresh pod regardless of prior state.
echo ""
echo "Removing any existing E2E deployment (clean-start)..."
kubectl delete deployment "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
kubectl delete service "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
kubectl delete serviceaccount "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
# --- Deploy Headlamp via kubectl apply ---
echo ""
echo "Deploying Headlamp E2E instance..."