From 2df48640bbb7ce5b381a9bee45fb7af36e73d40b Mon Sep 17 00:00:00 2001 From: Hugh Hackman Date: Sat, 21 Mar 2026 00:17:29 +0000 Subject: [PATCH] fix: improve ci-health-check signal and replace node with jq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace node -e JSON parsing with jq (available on our runners) - Exclude Release workflow failures from FAIL count — these fail at the post-release PR-creation step due to missing RELEASE_APP org secrets (tracked in PRI-380), not actual CI breakage - Demote Release failures to WARN so the health check exits 0 when only Release is broken, giving clean signal for real CI problems - Increase run limit from 5 to 10 for better intermittent failure detection - Remove unnecessary Node.js setup step from the workflow Co-Authored-By: Paperclip --- .github/scripts/ci-health-check.sh | 34 ++++++++++++-------------- .github/workflows/ci-health-check.yaml | 5 ---- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/.github/scripts/ci-health-check.sh b/.github/scripts/ci-health-check.sh index 58604c5..b305308 100755 --- a/.github/scripts/ci-health-check.sh +++ b/.github/scripts/ci-health-check.sh @@ -23,8 +23,8 @@ warnings=0 for repo in "${PLUGIN_REPOS[@]}"; do echo "--- ${repo} ---" - # Get last 5 runs - runs=$(gh run list --repo "${ORG}/${repo}" --limit 5 --json name,conclusion,headBranch,updatedAt 2>/dev/null || echo "[]") + # Get last 10 runs (wider window to catch intermittent failures) + runs=$(gh run list --repo "${ORG}/${repo}" --limit 10 --json name,conclusion,headBranch,updatedAt 2>/dev/null || echo "[]") if [ "$runs" = "[]" ]; then echo " WARNING: No workflow runs found" @@ -32,27 +32,23 @@ for repo in "${PLUGIN_REPOS[@]}"; do continue fi - # Use node for JSON parsing (jq not available) - main_failures=$(echo "$runs" | node -e " - const d = JSON.parse(require('fs').readFileSync(0,'utf8')); - const fails = d.filter(r => r.headBranch==='main' && r.conclusion==='failure'); - console.log(fails.length); - ") - total=$(echo "$runs" | node -e " - const d = JSON.parse(require('fs').readFileSync(0,'utf8')); - console.log(d.length); - ") + # Count CI failures on main — exclude Release workflow failures since those + # fail at the post-release PR-creation step (tracked separately via PRI-380). + main_failures=$(echo "$runs" | jq '[.[] | select(.headBranch=="main" and .conclusion=="failure" and .name!="Release")] | length') + total=$(echo "$runs" | jq 'length') if [ "$main_failures" -gt 0 ]; then - echo " FAIL: ${main_failures} failure(s) in last ${total} runs on main:" - echo "$runs" | node -e " - const d = JSON.parse(require('fs').readFileSync(0,'utf8')); - d.filter(r => r.headBranch==='main' && r.conclusion==='failure') - .forEach(r => console.log(' - ' + r.name + ' (' + r.updatedAt + ')')); - " + echo " FAIL: ${main_failures} CI failure(s) in last ${total} runs on main:" + echo "$runs" | jq -r '.[] | select(.headBranch=="main" and .conclusion=="failure" and .name!="Release") | " - \(.name) (\(.updatedAt))"' ((failures++)) || true else - echo " OK: All recent runs passing" + echo " OK: All recent CI runs passing" + # Surface any Release failures as a warning (known issue: PRI-380) + release_failures=$(echo "$runs" | jq '[.[] | select(.name=="Release" and .conclusion=="failure")] | length') + if [ "$release_failures" -gt 0 ]; then + echo " WARN: Release workflow has ${release_failures} failure(s) — see PRI-380 (missing RELEASE_APP org secrets)" + ((warnings++)) || true + fi fi # Check latest release diff --git a/.github/workflows/ci-health-check.yaml b/.github/workflows/ci-health-check.yaml index bf49be9..6a77c94 100644 --- a/.github/workflows/ci-health-check.yaml +++ b/.github/workflows/ci-health-check.yaml @@ -12,11 +12,6 @@ jobs: - name: Checkout repository uses: actions/checkout@v6 - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: '22' - - name: Run CI/CD health check env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}