d077c62bcb
Enhanced the ci-health-check.sh script to: - Add stale repo detection (repos with no updates in 30+ days) - Add CI workflow configuration checks - Add color-coded output for better readability - Track multiple failure types (CI failures, stale repos, no CI) - Provide clearer summary reporting - Increase CRITICAL_THRESHOLD to 3 for better filtering This enables proactive monitoring of both CI health and repository maintenance status across all privilegedescalation repos. Co-Authored-By: Paperclip <noreply@paperclip.ing>
107 lines
2.9 KiB
Bash
Executable File
107 lines
2.9 KiB
Bash
Executable File
#!/bin/bash
|
|
# CI Health Check Script
|
|
# Checks CI health across all privilegedescalation repos and reports failures
|
|
|
|
set -euo pipefail
|
|
|
|
# Configuration
|
|
ORG="privilegedescalation"
|
|
MAX_AGE_DAYS=30
|
|
CRITICAL_THRESHOLD=3 # Number of consecutive failures to consider critical
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
YELLOW='\033[1;33m'
|
|
GREEN='\033[0;32m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Repos to monitor
|
|
REPOS=(
|
|
"org"
|
|
"infra"
|
|
"headlamp-sealed-secrets-plugin"
|
|
"headlamp-rook-plugin"
|
|
"headlamp-intel-gpu-plugin"
|
|
"headlamp-kube-vip-plugin"
|
|
"headlamp-tns-csi-plugin"
|
|
"headlamp-argocd-plugin"
|
|
"headlamp-polaris-plugin"
|
|
)
|
|
|
|
echo "=== CI Health Check for $ORG ==="
|
|
echo "Generated: $(date -u +"%Y-%m-%d %H:%M:%S UTC")"
|
|
echo ""
|
|
|
|
# Track issues
|
|
FAILURES=()
|
|
STALE_REPOS=()
|
|
NO_CI_REPOS=()
|
|
|
|
for repo in "${REPOS[@]}"; do
|
|
echo "Checking $repo..."
|
|
|
|
# Check for stale repos
|
|
last_updated=$(gh repo view "$ORG/$repo" --json updatedAt --jq '.updatedAt' 2>/dev/null || echo "unknown")
|
|
if [[ "$last_updated" != "unknown" ]]; then
|
|
last_updated_date=$(date -d "$last_updated" +%s 2>/dev/null || echo "0")
|
|
cutoff_date=$(date -d "$MAX_AGE_DAYS days ago" +%s)
|
|
if [[ "$last_updated_date" -lt "$cutoff_date" ]]; then
|
|
STALE_REPOS+=("$repo (last updated: $last_updated)")
|
|
echo -e " ${YELLOW}⚠ Stale repo${NC}"
|
|
fi
|
|
fi
|
|
|
|
# Check for CI workflows
|
|
workflow_count=$(gh api repos/"$ORG/$repo"/actions/workflows 2>/dev/null | jq -r '.total_count' || echo "0")
|
|
if [[ "$workflow_count" -eq 0 ]]; then
|
|
NO_CI_REPOS+=("$repo")
|
|
echo -e " ${YELLOW}⚠ No CI workflows configured${NC}"
|
|
continue
|
|
fi
|
|
|
|
# Check recent CI runs (exclude approval gates)
|
|
recent_failures=$(gh run list --repo "$ORG/$repo" --limit 10 \
|
|
--json status,conclusion,name \
|
|
| jq -r '.[] | select(.conclusion == "failure") | select(.name | contains("CI") or contains("E2E") or contains("ci") or contains("e2e")) | .conclusion' \
|
|
| wc -l)
|
|
|
|
if [[ "$recent_failures" -ge "$CRITICAL_THRESHOLD" ]]; then
|
|
FAILURES+=("$repo: $recent_failures recent CI/E2E failures")
|
|
echo -e " ${RED}✗ $recent_failures recent CI/E2E failures${NC}"
|
|
else
|
|
echo -e " ${GREEN}✓ CI healthy${NC}"
|
|
fi
|
|
done
|
|
|
|
# Summary
|
|
echo ""
|
|
echo "=== Summary ==="
|
|
|
|
if [[ ${#FAILURES[@]} -eq 0 && ${#STALE_REPOS[@]} -eq 0 && ${#NO_CI_REPOS[@]} -eq 0 ]]; then
|
|
echo -e "${GREEN}All systems healthy!${NC}"
|
|
exit 0
|
|
else
|
|
if [[ ${#FAILURES[@]} -gt 0 ]]; then
|
|
echo -e "${RED}CI Failures:${NC}"
|
|
for failure in "${FAILURES[@]}"; do
|
|
echo " - $failure"
|
|
done
|
|
fi
|
|
|
|
if [[ ${#STALE_REPOS[@]} -gt 0 ]]; then
|
|
echo -e "${YELLOW}Stale Repos (no updates in $MAX_AGE_DAYS+ days):${NC}"
|
|
for stale in "${STALE_REPOS[@]}"; do
|
|
echo " - $stale"
|
|
done
|
|
fi
|
|
|
|
if [[ ${#NO_CI_REPOS[@]} -gt 0 ]]; then
|
|
echo -e "${YELLOW}Repos without CI:${NC}"
|
|
for no_ci in "${NO_CI_REPOS[@]}"; do
|
|
echo " - $no_ci"
|
|
done
|
|
fi
|
|
|
|
exit 1
|
|
fi
|