diff --git a/.github/scripts/ci-health-check.sh b/.github/scripts/ci-health-check.sh index 094a19c..988b8fc 100755 --- a/.github/scripts/ci-health-check.sh +++ b/.github/scripts/ci-health-check.sh @@ -12,6 +12,11 @@ # - excludes "headlamp-agent-skills" (skills bundle, not a Headlamp plugin) # If discovery fails (network error, GH_TOKEN missing, API outage), we fall # back to a hardcoded list so the health check still produces a useful report. +# +# Failure Categories: +# - code: test/lint/build/typecheck failures on main +# - infra: startup_failure, timed_out, runner issues +# - pending: action_required (awaiting review/approval) - informational only set -euo pipefail ORG="privilegedescalation" @@ -44,6 +49,7 @@ echo "" failures=0 warnings=0 +process_pending=0 for repo in "${PLUGIN_REPOS[@]}"; do echo "--- ${repo} ---" @@ -57,18 +63,40 @@ for repo in "${PLUGIN_REPOS[@]}"; do continue fi - # Count CI failures on main — exclude E2E and Release (tracked separately below) - main_failures=$(echo "$runs" | jq '[.[] | select(.headBranch=="main" and .conclusion=="failure" and .name!="Release" and .name!="E2E Tests")] | length') total=$(echo "$runs" | jq 'length') - if [ "$main_failures" -gt 0 ]; then - echo " FAIL: ${main_failures} CI failure(s) in last ${total} runs on main:" + # Categorize failures: + # - code failures: test/lint/build on main + # - infra failures: startup_failure, timed_out + # - process pending: action_required + + code_failures=$(echo "$runs" | jq '[.[] | select(.headBranch=="main" and .conclusion=="failure" and .name!="Release" and .name!="E2E Tests")] | length') + infra_failures=$(echo "$runs" | jq '[.[] | select(.conclusion=="startup_failure" or .conclusion=="timed_out")] | length') + action_required=$(echo "$runs" | jq '[.[] | select(.conclusion=="action_required")] | length') + + if [ "$code_failures" -gt 0 ]; then + echo " FAIL (code): ${code_failures} CI failure(s) in last ${total} runs on main:" echo "$runs" | jq -r '.[] | select(.headBranch=="main" and .conclusion=="failure" and .name!="Release" and .name!="E2E Tests") | " - \(.name) (\(.updatedAt))"' ((failures++)) || true - else + fi + + if [ "$infra_failures" -gt 0 ]; then + echo " FAIL (infra): ${infra_failures} infrastructure failure(s):" + echo "$runs" | jq -r '.[] | select(.conclusion=="startup_failure" or .conclusion=="timed_out") | " - \(.name): \(.conclusion) (\(.updatedAt))"' + ((failures++)) || true + fi + + if [ "$code_failures" -eq 0 ] && [ "$infra_failures" -eq 0 ]; then echo " OK: CI passing on main" fi + # Process pending — informational only (awaiting review/approval) + if [ "$action_required" -gt 0 ]; then + echo " INFO: ${action_required} workflow run(s) awaiting action (dual approval, review, etc.):" + echo "$runs" | jq -r '.[] | select(.conclusion=="action_required") | " - \(.name) on \(.headBranch) (\(.updatedAt))"' + ((process_pending++)) || true + fi + # Surface E2E test failures as warnings (infra blocker: RBAC not yet applied — PRI-494) e2e_failures=$(echo "$runs" | jq '[.[] | select(.headBranch=="main" and .name=="E2E Tests" and .conclusion=="failure")] | length') if [ "$e2e_failures" -gt 0 ]; then @@ -83,15 +111,6 @@ for repo in "${PLUGIN_REPOS[@]}"; do ((warnings++)) || true fi - # Check for action_required — GitHub's "Require approval for first-time contributors" setting - # blocks workflow runs from GitHub App bot accounts. This is a CI pipeline blocker (see PRI-44). - action_required_count=$(echo "$runs" | jq '[.[] | select(.conclusion=="action_required")] | length') - if [ "$action_required_count" -gt 0 ]; then - echo " FAIL: ${action_required_count} workflow run(s) with action_required (GitHub App PR approval blocked):" - echo "$runs" | jq -r '.[] | select(.conclusion=="action_required") | " - \(.name) on \(.headBranch) (\(.updatedAt))"' - ((failures++)) || true - fi - # Check latest release latest_release=$(gh api "repos/${ORG}/${repo}/releases" --jq '.[0].tag_name // "none"' 2>/dev/null || echo "error") echo " Latest release: ${latest_release}" @@ -103,7 +122,8 @@ echo "=== Summary ===" echo "Repos scanned: ${#PLUGIN_REPOS[@]}" echo "With failures: ${failures}" echo "With warnings: ${warnings}" +echo "With pending approval: ${process_pending}" if [ "$failures" -gt 0 ]; then exit 1 -fi +fi \ No newline at end of file diff --git a/.github/workflows/dual-approval-check.yaml b/.github/workflows/dual-approval-check.yaml index 5127c64..57ef00f 100644 --- a/.github/workflows/dual-approval-check.yaml +++ b/.github/workflows/dual-approval-check.yaml @@ -1,22 +1,5 @@ name: Dual Approval Check -# Reusable workflow: verifies that both the CTO and QA bot accounts -# have approved a pull request. Plugin repos call this on -# pull_request_review events to get a required GitHub status check. -# -# Usage in a plugin repo's workflow: -# -# on: -# pull_request_review: -# types: [submitted, dismissed] -# pull_request: -# types: [opened, reopened, synchronize] -# -# jobs: -# dual-approval: -# uses: privilegedescalation/.github/.github/workflows/dual-approval-check.yaml@main -# secrets: inherit - on: workflow_call: inputs: @@ -50,8 +33,8 @@ jobs: PR_NUMBER: ${{ inputs.pr_number }} REPO: ${{ github.repository }} run: | - if [ -z "${PR_NUMBER}" ]; then - echo "::notice::No PR number in context (dismissed review?). Skipping dual approval check — no action needed." + if [ -z "${PR_NUMBER}" ] || [ "${PR_NUMBER}" = "null" ]; then + echo "::notice::No PR number in context (dismissed review or workflow_call without pr_number). Skipping dual approval check — no action needed." exit 0 fi @@ -62,11 +45,16 @@ jobs: -H "Accept: application/vnd.github.v3+json" \ "https://api.github.com/repos/${REPO}/pulls/${PR_NUMBER}/reviews") + if [ -z "${REVIEWS}" ] || [ "${REVIEWS}" = "null" ]; then + echo "::warning::Could not fetch reviews for PR #${PR_NUMBER}. Assuming no approvals yet." + exit 1 + fi + CTO_APPROVED=$(echo "${REVIEWS}" | jq -r --arg user "${CTO_REVIEWER}" \ - '[.[] | select(.user.login == $user or .user.login == ($user + "[bot]"))] | last | .state == "APPROVED"') + '[.[] | select(.user.login == $user or .user.login == ($user + "[bot]"))] | last | if .state then .state == "APPROVED" else false end') QA_APPROVED=$(echo "${REVIEWS}" | jq -r --arg user "${QA_REVIEWER}" \ - '[.[] | select(.user.login == $user or .user.login == ($user + "[bot]"))] | last | .state == "APPROVED"') + '[.[] | select(.user.login == $user or .user.login == ($user + "[bot]"))] | last | if .state then .state == "APPROVED" else false end') echo "CTO (${CTO_REVIEWER}) approved: ${CTO_APPROVED}" echo "QA (${QA_REVIEWER}) approved: ${QA_APPROVED}" @@ -82,4 +70,4 @@ jobs: echo " Missing: QA approval from ${QA_REVIEWER}" fi exit 1 - fi + fi \ No newline at end of file