fix(e2e): grant CI runner read access to polaris namespace for RBAC pre-flight check

The RBAC pre-flight check workflow step (commit 46350c5) verifies that polaris-dashboard-proxy-reader Role and RoleBinding exist in the polaris namespace before running E2E tests. However, the CI runner's RBAC (e2e-ci-runner-role in privilegedescalation-dev) did not include permission to read roles/rolebindings in the polaris namespace, causing the pre-flight check to fail with a generic kubectl error on all branches. Fix: add rules to e2e-ci-runner-role allowing get on roles/rolebindings in privilegedescalation-dev (for the pre-flight check itself), plus a new Role + RoleBinding in the polaris namespace granting the runner read access to rbac resources there. Without this fix, the pre-flight check exits 1 on every branch until someone SSHs into the runner pod and manually applies the polaris RBAC manifest — which they shouldn't need to do. Co-Authored-By: Paperclip <noreply@paperclip.ing>
fix: pass pr_number to dual-approval-check workflow (#119 )
2026-05-03 15:13:03 +00:00 · 2026-04-15 03:33:19 +00:00 · 2026-03-24 22:26:32 +00:00 · 2026-03-24 21:57:58 +00:00 · 2026-03-24 21:42:51 +00:00 · 2026-03-24 21:28:38 +00:00
7 changed files with 78 additions and 26 deletions
@@ -16,3 +16,5 @@ jobs:
  dual-approval:
    uses: privilegedescalation/.github/.github/workflows/dual-approval-check.yaml@main
    secrets: inherit
+    with:
+      pr_number: ${{ github.event.pull_request.number }}
@@ -10,9 +10,22 @@ on:
 permissions:
  contents: read

+# Only one E2E run at a time: the shared E2E_RELEASE (headlamp-e2e) in
+# privilegedescalation-dev cannot be shared across concurrent runs.
+# cancel-in-progress: false (queue, don't cancel) — cancelling in-flight
+# runs may skip the if: always() teardown, leaving dangling cluster resources.
+concurrency:
+  group: e2e-${{ github.repository }}
+  cancel-in-progress: false
+
 env:
  E2E_NAMESPACE: privilegedescalation-dev
  E2E_RELEASE: headlamp-e2e
+  # Pin to a known-good Headlamp version. Using :latest is risky because
+  # the tag can change between CI runs, causing flaky failures when a newer
+  # image is pulled on some nodes but not others (IfNotPresent pull policy).
+  # Update this when Headlamp is upgraded in production (kube-system).
+  HEADLAMP_VERSION: v0.40.1

 jobs:
  e2e:
@@ -24,7 +37,7 @@ jobs:
        uses: actions/checkout@v6

      - name: Setup Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: '22'
          cache: 'npm'
@@ -59,12 +72,22 @@ jobs:
          HEADLAMP_URL: ${{ env.HEADLAMP_URL }}
          HEADLAMP_TOKEN: ${{ env.HEADLAMP_TOKEN }}

+      - name: Collect deployment diagnostics on failure
+        if: failure()
+        run: |
+          echo "=== Pod state ==="
+          kubectl get pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=$E2E_RELEASE" 2>&1 || true
+          echo "=== Pod describe ==="
+          kubectl describe pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=$E2E_RELEASE" 2>&1 || true
+          echo "=== Recent namespace events ==="
+          kubectl get events -n "$E2E_NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true
+
      - name: Teardown E2E instance
        if: always()
        run: scripts/teardown-e2e-headlamp.sh

      - name: Upload Playwright report
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
        if: failure()
        with:
          name: playwright-report
@@ -72,7 +95,7 @@ jobs:
          retention-days: 7

      - name: Upload test results
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
        if: failure()
        with:
          name: test-results
@@ -11,7 +11,7 @@ description: >-
  `polaris-dashboard` service in the `polaris` namespace.
 license: Apache-2.0
 homeURL: "https://github.com/privilegedescalation/headlamp-polaris-plugin"
-appVersion: "5.0"
+appVersion: "10.1.6"
 category: security
 keywords:
  - polaris
@@ -72,5 +72,5 @@ maintainers:
 annotations:
  headlamp/plugin/archive-url: "https://github.com/privilegedescalation/headlamp-polaris-plugin/releases/download/v1.0.0/headlamp-polaris-1.0.0.tar.gz"
  headlamp/plugin/version-compat: ">=0.26"
-  headlamp/plugin/archive-checksum: sha256:ce75449a05d3d3dd3c546db36a2257fae3e4601e466108182e64310a1a4f6d71
+  headlamp/plugin/archive-checksum: sha256:a165e871b40f11a44950aa9f10eb7f7883276f749026ae7a4f886278ecd9bd7d
  headlamp/plugin/distro-compat: "in-cluster,web,desktop"
@@ -30,6 +30,35 @@ rules:
  - apiGroups: [""]
    resources: ["serviceaccounts/token"]
    verbs: ["create"]
+  # RBAC pre-flight check: verify polaris namespace has proxy-reader Role + RoleBinding
+  # before running E2E tests. Required by the "RBAC pre-flight check" workflow step.
+  - apiGroups: ["rbac.authorization.k8s.io"]
+    resources: ["roles", "rolebindings"]
+    verbs: ["get"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: e2e-ci-runner-polaris-reader
+  namespace: polaris
+rules:
+  - apiGroups: ["rbac.authorization.k8s.io"]
+    resources: ["roles", "rolebindings"]
+    verbs: ["get"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: e2e-ci-runner-polaris-reader-binding
+  namespace: polaris
+subjects:
+  - kind: ServiceAccount
+    name: runners-privilegedescalation-gha-rs-no-permission
+    namespace: arc-runners
+roleRef:
+  kind: Role
+  name: e2e-ci-runner-polaris-reader
+  apiGroup: rbac.authorization.k8s.io
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
@@ -45,8 +45,12 @@ async function authenticateWithToken(page: Page, token: string): Promise<void> {
  await page.waitForURL(/\/(login|token)$/);

  if (page.url().includes('/login')) {
-    // OIDC login page — click "use a token" to reach token auth
-    await page.getByRole('button', { name: /use a token/i }).click();
+    // OIDC login page — click "use a token" to reach token auth.
+    // Wait explicitly before clicking so failures surface at 15 s
+    // with a clear message rather than silently timing out at 60 s.
+    const useTokenBtn = page.getByRole('button', { name: /use a token/i });
+    await useTokenBtn.waitFor({ state: 'visible', timeout: 15_000 });
+    await useTokenBtn.click();
    await page.waitForURL('**/token');
  }

@@ -1,21 +1,5 @@
 {
  "$schema": "https://docs.renovatebot.com/renovate-schema.json",
-  "extends": ["config:recommended"],
-  "baseBranches": ["main"],
-  "schedule": ["every weekend"],
-  "prConcurrentLimit": 10,
-  "pinDigests": true,
-  "packageRules": [
-    {
-      "matchManagers": ["npm"],
-      "matchUpdateTypes": ["minor", "patch"],
-      "groupName": "npm minor and patch"
-    },
-    {
-      "matchManagers": ["github-actions"],
-      "matchUpdateTypes": ["minor", "patch"],
-      "groupName": "github-actions minor and patch"
-    }
-  ]
+  "extends": ["github>privilegedescalation/.github:renovate-config"]
 }

@@ -16,7 +16,7 @@
 # Environment:
 #   E2E_NAMESPACE     — namespace for E2E Headlamp (default: privilegedescalation-dev)
 #   E2E_RELEASE       — release/resource name prefix (default: headlamp-e2e)
-#   HEADLAMP_VERSION  — Headlamp image tag (default: latest)
+#   HEADLAMP_VERSION  — Headlamp image tag (default: v0.40.1, pinned to match production)
 set -euo pipefail

 REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
@@ -24,7 +24,7 @@ DIST_DIR="$REPO_ROOT/dist"

 E2E_NAMESPACE="${E2E_NAMESPACE:-privilegedescalation-dev}"
 E2E_RELEASE="${E2E_RELEASE:-headlamp-e2e}"
-HEADLAMP_VERSION="${HEADLAMP_VERSION:-latest}"
+HEADLAMP_VERSION="${HEADLAMP_VERSION:-v0.40.1}"

 if [ ! -d "$DIST_DIR" ]; then
  echo "ERROR: dist/ not found. Run 'npm run build' first." >&2
@@ -58,6 +58,16 @@ kubectl create configmap headlamp-polaris-plugin \
  --from-file="$DIST_DIR" \
  --from-file=package.json="$REPO_ROOT/package.json"

+# --- Tear down any existing E2E deployment for a clean start ---
+# kubectl apply without prior deletion only patches in-place: if the pod spec is
+# unchanged between runs, no new rollout is triggered and a degraded pod keeps
+# serving. Delete first to guarantee a fresh pod regardless of prior state.
+echo ""
+echo "Removing any existing E2E deployment (clean-start)..."
+kubectl delete deployment "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
+kubectl delete service "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
+kubectl delete serviceaccount "${E2E_RELEASE}" -n "$E2E_NAMESPACE" --ignore-not-found --wait
+
 # --- Deploy Headlamp via kubectl apply ---
 echo ""
 echo "Deploying Headlamp E2E instance..."
Author	SHA1	Message	Date
Chris Farhood	a65743dea3	fix(e2e): grant CI runner read access to polaris namespace for RBAC pre-flight check The RBAC pre-flight check workflow step (commit `46350c5`) verifies that polaris-dashboard-proxy-reader Role and RoleBinding exist in the polaris namespace before running E2E tests. However, the CI runner's RBAC (e2e-ci-runner-role in privilegedescalation-dev) did not include permission to read roles/rolebindings in the polaris namespace, causing the pre-flight check to fail with a generic kubectl error on all branches. Fix: add rules to e2e-ci-runner-role allowing get on roles/rolebindings in privilegedescalation-dev (for the pre-flight check itself), plus a new Role + RoleBinding in the polaris namespace granting the runner read access to rbac resources there. Without this fix, the pre-flight check exits 1 on every branch until someone SSHs into the runner pod and manually applies the polaris RBAC manifest — which they shouldn't need to do. Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-05-03 15:13:03 +00:00
privilegedescalation-engineer[bot]	dff1265435	fix: pass pr_number to dual-approval-check workflow (#119 ) Companion PR to privilegedescalation/.github#81 Co-authored-by: Hugh Hackman <hugh@paperclip.ing> Co-authored-by: Paperclip <noreply@paperclip.ing>	2026-04-15 03:33:19 +00:00
privilegedescalation-ceo[bot]	7c58826668	Merge pull request #117 from privilegedescalation/ci/e2e-deploy-diagnostics ci(e2e): add deployment diagnostics step on failure	2026-03-24 22:26:32 +00:00
privilegedescalation-engineer[bot]	4edc829b3f	ci(e2e): add deployment diagnostics step on failure When the E2E deploy step fails (rollout timeout, pod not ready, etc.), previously required manual cluster investigation to diagnose the root cause. This heartbeat had to grep CI logs and query kubectl separately to determine a :latest image drift issue. The new step captures pod state, pod describe output, and recent namespace events immediately when a failure occurs — surfacing the root cause directly in the CI run log. Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-03-24 21:57:58 +00:00
privilegedescalation-ceo[bot]	8f10be39bd	Merge pull request #116 from privilegedescalation/fix/pin-headlamp-version-e2e fix(e2e): pin Headlamp image to v0.40.1 instead of :latest	2026-03-24 21:42:51 +00:00
privilegedescalation-engineer[bot]	27212a91e1	fix(e2e): pin Headlamp image to v0.40.1 instead of :latest The :latest tag caused E2E flakiness when a newer Headlamp image was pulled on some cluster nodes (IfNotPresent policy) but not others. Concurrent E2E runs on main saw different image versions, and the newest :latest (sha256:89c6c65) failed to pass the readiness probe within 120s. Pin to v0.40.1 — the same version running in production (kube-system) — so all nodes use the same cached digest and CI is deterministic. Update this pin when Headlamp is upgraded in production. Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-03-24 21:28:38 +00:00
privilegedescalation-ceo[bot]	7b72306133	Merge pull request #109 from privilegedescalation/feat/renovate-extend-org-config feat: extend Renovate config from org-level preset	2026-03-24 18:45:58 +00:00
privilegedescalation-ceo[bot]	e16e6255d0	Merge pull request #110 from privilegedescalation/ci/e2e-concurrency-guard ci: add concurrency guard to E2E workflow	2026-03-24 18:45:55 +00:00
privilegedescalation-ceo[bot]	4beb0c4d0e	Merge pull request #113 from privilegedescalation/fix/e2e-clean-deploy fix(e2e): clean-delete existing deployment before redeploy for guaranteed fresh pod	2026-03-24 18:45:52 +00:00
Gandalf the Greybeard	175d3ec6a2	fix(e2e): clean-delete existing deployment before redeploy for guaranteed fresh pod kubectl apply without prior deletion patches in place: if the pod spec is unchanged between runs, no rollout is triggered and a potentially degraded pod from a prior run keeps serving. This caused the auth.setup.ts timeout (waiting for the "use a token" button) even when no concurrent runs were present — the headlamp-e2e pod was in an inconsistent state from a previous run that didn't tear down cleanly. Changes: - deploy-e2e-headlamp.sh: delete Deployment, Service, and ServiceAccount (with --wait) before applying, guaranteeing a fresh pod each run - auth.setup.ts: add explicit waitFor({ state: 'visible', timeout: 15_000 }) before the "use a token" button click, so failures surface at 15 s with a clear locator error rather than silently timing out at 60 s Fixes the pre-existing infra issue blocking PR#110. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-24 16:40:30 +00:00
privilegedescalation-engineer[bot]	e63cd03267	fix(e2e): use cancel-in-progress: false to prevent dangling cluster resources cancel-in-progress: true would cancel in-flight E2E runs when a new one arrives. GitHub Actions does not guarantee that if: always() steps run on cancelled jobs, so teardown-e2e-headlamp.sh may be skipped — leaving the headlamp-e2e Deployment/Service/ConfigMap dangling in privilegedescalation-dev. Switching to false (queue) ensures the running job always completes its teardown before the next run starts. Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-03-24 16:34:36 +00:00
privilegedescalation-engineer[bot]	4d878c8737	ci: add concurrency guard to E2E workflow Prevents parallel E2E runs from conflicting over the shared headlamp-e2e Helm release in privilegedescalation-dev. With cancel-in-progress: true, a new push cancels any in-progress run on the same repo — only one E2E suite runs at a time. Observed failure: PR#109 and PR#108 ran concurrently and the auth setup in PR#109 timed out, likely due to resource contention on the shared headlamp-e2e instance. Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-03-24 16:27:52 +00:00
privilegedescalation-ceo[bot]	5f817ec4f6	Merge pull request #108 from privilegedescalation/fix/node24-action-versions ci: upgrade e2e.yaml actions to Node.js 24-compatible versions	2026-03-24 16:25:26 +00:00
Hugh Hackman	490807cef6	feat: extend Renovate config from org-level preset Replaces the duplicated Renovate config with a simple extend from the org-level preset (privilegedescalation/.github:renovate-config). All rules (schedule, pinDigests, npm/github-actions minor+patch+major groups) are now inherited from the org config, which was updated in PR #66 to add major-version update rules for GitHub Actions. This eliminates config drift between repos and reduces maintenance toil — future rule changes only need to be made in one place. Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-03-24 16:16:15 +00:00
Hugh Hackman	06d7dfb212	ci: upgrade e2e.yaml actions to Node.js 24-compatible versions Update action versions ahead of GitHub's June 2, 2026 Node.js 20 deprecation: - actions/setup-node@v4 → @v6 - actions/upload-artifact@v4 → @v7 Co-Authored-By: Paperclip <noreply@paperclip.ing>	2026-03-24 16:11:05 +00:00
privilegedescalation-engineer[bot]	ba508b8fc4	release: v1.0.0 (#107 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2026-03-24 15:27:35 +00:00