headlamp-polaris-plugin/.github/workflows/e2e.yaml

name: E2E Tests

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]
  workflow_dispatch:

permissions:
  contents: read

# Only one E2E run at a time: the shared E2E_RELEASE (headlamp-e2e) in
# headlamp-dev cannot be shared across concurrent runs.
# cancel-in-progress: false (queue, don't cancel) — cancelling in-flight
# runs may skip the if:always() teardown, leaving dangling cluster resources.
concurrency:
  group: e2e-${{ github.repository }}
  cancel-in-progress: false

env:
  E2E_NAMESPACE: headlamp-dev
  E2E_RELEASE: headlamp-e2e
  # Pin to a known-good Headlamp version. Using :latest is risky because
  # the tag can change between CI runs, causing flaky failures when a newer
  # image is pulled on some nodes but not others (IfNotPresent pull policy).
  # Update this when Headlamp is upgraded in production (kube-system).
  HEADLAMP_VERSION: v0.40.1

jobs:
  e2e:
    runs-on: runners-privilegedescalation
    timeout-minutes: 15

    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: '22'
          cache: 'npm'

      - name: Setup kubectl
        uses: azure/setup-kubectl@v4

      - name: Locate kubeconfig for ARC runner
        run: |
          set -euo pipefail
          for dir in /runner /home/runner/.kube /home/github/.kube; do
            if [ -f "${dir}/config" ]; then
              echo "Found kubeconfig at ${dir}/config"
              echo "KUBECONFIG=${dir}/config" >> "$GITHUB_ENV"
              break
            fi
          done
          if [ -z "${KUBECONFIG:-}" ]; then
            echo "::error::No kubeconfig found for ARC runner. Checked: /runner, ~/.kube, ~/kube"
            exit 1
          fi
          echo "Using kubeconfig: ${KUBECONFIG}"
          kubectl cluster-info --request-timeout=5s

      - name: Apply RBAC for E2E pipeline
        run: |
          set -x
          kubectl apply -f deployment/e2e-ci-runner-rbac.yaml --dry-run=server 2>&1 || true
          kubectl apply -f deployment/e2e-ci-runner-rbac.yaml 2>&1
          echo "exit code: $?"
          echo "Waiting for RBAC propagation..."
          sleep 5
          echo "Verifying CI runner permissions..."
          kubectl auth can-i create roles -n headlamp-dev --as="system:serviceaccount:arc-runners:runners-privilegedescalation-gha-rs-no-permission" 2>&1 || { echo "::error::CI runner still lacks roles permission after propagation wait"; exit 1; }
          set +x

      - name: Apply Polaris dashboard RBAC
        run: kubectl apply -f deployment/polaris-rbac.yaml

      - name: RBAC pre-flight check
        run: |
          echo "Checking RBAC resources..."
          MISSING=0
          kubectl get role polaris-dashboard-proxy-reader -n polaris -o name >/dev/null 2>&1 || MISSING=1
          kubectl get rolebinding polaris-dashboard-proxy-reader -n polaris -o name >/dev/null 2>&1 || MISSING=1
          kubectl auth can-i delete configmaps -n "$E2E_NAMESPACE" 2>/dev/null || MISSING=1
          if [ "$MISSING" -eq 0 ]; then
            echo "RBAC pre-flight check passed."
          else
            echo "::error::RBAC pre-flight check failed. Missing required permissions."
            exit 1
          fi

      - name: Install dependencies
        run: npm ci

      - name: Build plugin
        run: npx @kinvolk/headlamp-plugin build

      - name: Deploy E2E Headlamp instance
        run: scripts/deploy-e2e-headlamp.sh

      - name: Load E2E environment
        run: |
          if [ -f .env.e2e ]; then
            cat .env.e2e >> "$GITHUB_ENV"
          else
            echo "::error::deploy-e2e-headlamp.sh did not produce .env.e2e"
            exit 1
          fi

      - name: Install Playwright browsers
        run: npx playwright install --with-deps chromium

      - name: Run E2E tests
        run: npm run e2e
        env:
          HEADLAMP_URL: ${{ env.HEADLAMP_URL }}
          HEADLAMP_TOKEN: ${{ env.HEADLAMP_TOKEN }}

      - name: Collect deployment diagnostics on failure
        if: failure()
        run: |
          echo "=== Pod state ==="
          kubectl get pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=$E2E_RELEASE" 2>&1 || true
          echo "=== Pod describe ==="
          kubectl describe pods -n "$E2E_NAMESPACE" -l "app.kubernetes.io/instance=$E2E_RELEASE" 2>&1 || true
          echo "=== Recent namespace events ==="
          kubectl get events -n "$E2E_NAMESPACE" --sort-by='.lastTimestamp' 2>&1 | tail -20 || true

      - name: Teardown E2E instance
        if: always()
        run: scripts/teardown-e2e-headlamp.sh

      - name: Upload Playwright report
        uses: actions/upload-artifact@v7
        if: failure()
        with:
          name: playwright-report
          path: playwright-report/
          retention-days: 7

      - name: Upload test results
        uses: actions/upload-artifact@v7
        if: failure()
        with:
          name: test-results
          path: test-results/
          retention-days: 7