Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d05b1f5113 | |||
| ba5a95e938 | |||
| f15f4c1577 | |||
| ba88471869 | |||
| 7b8947332a | |||
| 1fce9cfc7a | |||
| 57a9865c18 | |||
| 7b526c83c0 | |||
| 3f34f8e1c8 |
@@ -1,440 +0,0 @@
|
||||
name: Plugin Release
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Release version (e.g. 1.0.0)'
|
||||
required: true
|
||||
type: string
|
||||
node-version:
|
||||
description: 'Node.js version to use'
|
||||
required: false
|
||||
type: string
|
||||
default: '22'
|
||||
upstream-repo:
|
||||
description: 'Upstream repo to fetch appVersion from (e.g. fenio/tns-csi). Leave empty to skip.'
|
||||
required: false
|
||||
type: string
|
||||
default: ''
|
||||
secrets:
|
||||
RELEASE_APP_ID:
|
||||
description: 'GitHub App ID for creating PRs (org blocks GITHUB_TOKEN from creating PRs)'
|
||||
required: true
|
||||
RELEASE_APP_PRIVATE_KEY:
|
||||
description: 'GitHub App private key (PEM format)'
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: release
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
check-secrets:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
ready: ${{ steps.check.outputs.ready }}
|
||||
steps:
|
||||
- name: Verify RELEASE_APP_ID is configured
|
||||
id: check
|
||||
env:
|
||||
RELEASE_APP_ID: ${{ secrets.RELEASE_APP_ID }}
|
||||
run: |
|
||||
if [ -z "$RELEASE_APP_ID" ]; then
|
||||
echo "::notice::RELEASE_APP_ID org secret is not configured (see PRI-380). Release skipped — no artifacts will be created."
|
||||
echo "ready=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "ready=true" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
ci:
|
||||
needs: check-secrets
|
||||
if: needs.check-secrets.outputs.ready == 'true'
|
||||
uses: ./.github/workflows/plugin-ci.yaml
|
||||
with:
|
||||
node-version: ${{ inputs.node-version }}
|
||||
|
||||
check-token-permissions:
|
||||
needs: check-secrets
|
||||
if: needs.check-secrets.outputs.ready == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
has_write: ${{ steps.check.outputs.has_write }}
|
||||
steps:
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@v3
|
||||
with:
|
||||
app-id: ${{ secrets.RELEASE_APP_ID }}
|
||||
private-key: ${{ secrets.RELEASE_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Check write permissions via API
|
||||
id: check
|
||||
run: |
|
||||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-X POST \
|
||||
-H "Authorization: Bearer ${{ steps.app-token.outputs.token }}" \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
"https://api.github.com/repos/${{ github.repository }}/git/refs" \
|
||||
-d '{"ref":"refs/heads/_release_check","sha":"${{ github.sha }}"}')
|
||||
if [ "$HTTP_CODE" = "201" ]; then
|
||||
echo "::notice::Token has write permission — cleaning up test ref."
|
||||
curl -s -o /dev/null -w "%{http_code}" \
|
||||
-X DELETE \
|
||||
-H "Authorization: Bearer ${{ steps.app-token.outputs.token }}" \
|
||||
"https://api.github.com/repos/${{ github.repository }}/git/refs/heads/_release_check"
|
||||
echo "has_write=true" >> $GITHUB_OUTPUT
|
||||
elif [ "$HTTP_CODE" = "403" ]; then
|
||||
echo "::error::Token lacks write permission. Release cannot push tags or branches."
|
||||
echo "has_write=false" >> $GITHUB_OUTPUT
|
||||
exit 1
|
||||
else
|
||||
echo "::warning::Unexpected response ($HTTP_CODE) when checking write permission."
|
||||
echo "has_write=false" >> $GITHUB_OUTPUT
|
||||
exit 1
|
||||
fi
|
||||
|
||||
check-tag:
|
||||
needs: check-secrets
|
||||
if: needs.check-secrets.outputs.ready == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
skip: ${{ steps.check.outputs.skip }}
|
||||
steps:
|
||||
- name: Check if tag already exists
|
||||
id: check
|
||||
run: |
|
||||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: Bearer ${{ github.token }}" \
|
||||
"https://api.github.com/repos/${{ github.repository }}/git/refs/tags/v${{ inputs.version }}")
|
||||
if [ "$HTTP_CODE" = "200" ]; then
|
||||
echo "::notice::Tag v${{ inputs.version }} already exists. Release skipped (not an error)."
|
||||
echo "skip=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "skip=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
release:
|
||||
needs: [ci, check-tag, check-secrets, check-token-permissions]
|
||||
if: needs.check-secrets.outputs.ready == 'true' && needs.check-tag.outputs.skip != 'true' && needs.check-token-permissions.outputs.has_write == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
|
||||
steps:
|
||||
- name: Validate version format
|
||||
run: |
|
||||
if [[ ! "${{ inputs.version }}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
||||
echo "Error: Version must be in X.Y.Z format"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Detect package manager
|
||||
id: pkg-manager
|
||||
run: |
|
||||
if [ -f "pnpm-lock.yaml" ]; then
|
||||
echo "manager=pnpm" >> $GITHUB_OUTPUT
|
||||
echo "lockfile=pnpm-lock.yaml" >> $GITHUB_OUTPUT
|
||||
# Check for packageManager field in package.json (Corepack pinning).
|
||||
# pnpm/action-setup@v5 errors when packageManager is absent and no version
|
||||
# is specified, so use Corepack for repos that have the field pinned and
|
||||
# fall back to pnpm/action-setup with version: latest for repos that don't.
|
||||
PM=$(python3 -c "import json,sys; d=json.load(open('package.json')); print('true' if d.get('packageManager','').startswith('pnpm@') else 'false')" 2>/dev/null || echo "false")
|
||||
echo "has_package_manager=$PM" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "manager=npm" >> $GITHUB_OUTPUT
|
||||
echo "lockfile=package-lock.json" >> $GITHUB_OUTPUT
|
||||
echo "has_package_manager=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: ${{ inputs.node-version }}
|
||||
# Only enable built-in npm caching here; pnpm caching is handled below
|
||||
# after pnpm is installed (corepack is not available before setup-node).
|
||||
cache: ${{ steps.pkg-manager.outputs.manager == 'npm' && 'npm' || '' }}
|
||||
|
||||
- name: Setup pnpm (via Corepack, reads version from packageManager field)
|
||||
if: steps.pkg-manager.outputs.manager == 'pnpm' && steps.pkg-manager.outputs.has_package_manager == 'true'
|
||||
run: |
|
||||
npm install -g corepack
|
||||
corepack enable pnpm
|
||||
corepack install
|
||||
|
||||
- name: Setup pnpm (version latest)
|
||||
if: steps.pkg-manager.outputs.manager == 'pnpm' && steps.pkg-manager.outputs.has_package_manager == 'false'
|
||||
uses: pnpm/action-setup@v5
|
||||
with:
|
||||
run_install: false
|
||||
version: latest
|
||||
|
||||
- name: Get pnpm store directory
|
||||
id: pnpm-store
|
||||
if: steps.pkg-manager.outputs.manager == 'pnpm'
|
||||
run: echo "dir=$(pnpm store path --silent)" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Cache pnpm store
|
||||
if: steps.pkg-manager.outputs.manager == 'pnpm'
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ${{ steps.pnpm-store.outputs.dir }}
|
||||
key: ${{ runner.os }}-pnpm-${{ hashFiles('**/pnpm-lock.yaml') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pnpm-
|
||||
|
||||
- name: Configure Git
|
||||
run: |
|
||||
git config --global user.name "github-actions[bot]"
|
||||
git config --global user.email "github-actions[bot]@users.noreply.github.com"
|
||||
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
||||
|
||||
- name: Update version in package.json
|
||||
run: |
|
||||
if [ "${{ steps.pkg-manager.outputs.manager }}" = "pnpm" ]; then
|
||||
pnpm version ${{ inputs.version }} --no-git-tag-version --allow-same-version
|
||||
else
|
||||
npm version ${{ inputs.version }} --no-git-tag-version --allow-same-version
|
||||
fi
|
||||
|
||||
- name: Update artifacthub-pkg.yml
|
||||
run: |
|
||||
VERSION="${{ inputs.version }}"
|
||||
if [ -f artifacthub-pkg.yml ]; then
|
||||
PKG_NAME=$(grep '^name:' artifacthub-pkg.yml | cut -d: -f2 | tr -d ' "')
|
||||
else
|
||||
PKG_NAME=$(jq -r .name package.json | sed 's|^@[^/]*/||')
|
||||
fi
|
||||
RELEASE_URL="https://github.com/${{ github.repository }}/releases/download/v${VERSION}/${PKG_NAME}-${VERSION}.tar.gz"
|
||||
sed -i "s/^version:.*/version: \"${VERSION}\"/" artifacthub-pkg.yml
|
||||
sed -i "s|headlamp/plugin/archive-url:.*|headlamp/plugin/archive-url: \"${RELEASE_URL}\"|" artifacthub-pkg.yml
|
||||
|
||||
- name: Update appVersion from upstream release
|
||||
if: inputs.upstream-repo != ''
|
||||
run: |
|
||||
APP_VERSION=$(curl -sf "https://api.github.com/repos/${{ inputs.upstream-repo }}/releases/latest" | jq -r '.tag_name | ltrimstr("v")')
|
||||
if [ -z "$APP_VERSION" ] || [ "$APP_VERSION" = "null" ]; then
|
||||
echo "::warning::Could not fetch latest upstream release, skipping appVersion update"
|
||||
else
|
||||
sed -i "s|^appVersion:.*|appVersion: \"${APP_VERSION}\"|" artifacthub-pkg.yml
|
||||
echo "appVersion set to ${APP_VERSION}"
|
||||
fi
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
max_attempts=3
|
||||
attempt=1
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
echo "Attempt $attempt of $max_attempts"
|
||||
if [ "${{ steps.pkg-manager.outputs.manager }}" = "pnpm" ]; then
|
||||
pnpm install --frozen-lockfile && break
|
||||
else
|
||||
npm ci && break
|
||||
fi
|
||||
if [ $attempt -lt $max_attempts ]; then
|
||||
echo "::warning::Install step failed on attempt $attempt. Retrying in 5 seconds..."
|
||||
sleep 5
|
||||
fi
|
||||
attempt=$((attempt + 1))
|
||||
done
|
||||
if [ $attempt -gt $max_attempts ]; then
|
||||
echo "::error::Install step failed after $max_attempts attempts."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Build plugin
|
||||
run: npx @kinvolk/headlamp-plugin build
|
||||
|
||||
- name: Package plugin
|
||||
run: npx @kinvolk/headlamp-plugin package
|
||||
|
||||
- name: Prepare release tarball
|
||||
run: |
|
||||
VERSION="${{ inputs.version }}"
|
||||
# headlamp-plugin strips the @org/ prefix when naming tarballs.
|
||||
# e.g. @privilegedescalation/headlamp-argocd-plugin -> headlamp-argocd-plugin
|
||||
if [ -f artifacthub-pkg.yml ]; then
|
||||
PKG_NAME=$(grep '^name:' artifacthub-pkg.yml | cut -d: -f2 | tr -d ' "')
|
||||
else
|
||||
PKG_NAME=$(jq -r .name package.json | sed 's|^@[^/]*/||')
|
||||
fi
|
||||
TARBALL="${PKG_NAME}-${VERSION}.tar.gz"
|
||||
for f in *.tar.gz; do
|
||||
[ "$f" != "$TARBALL" ] && mv "$f" "$TARBALL"
|
||||
done
|
||||
if [ ! -f "$TARBALL" ]; then
|
||||
echo "Error: Expected tarball $TARBALL not found"
|
||||
ls -la *.tar.gz 2>/dev/null || echo "No .tar.gz files found"
|
||||
exit 1
|
||||
fi
|
||||
echo "TARBALL=$TARBALL" >> $GITHUB_ENV
|
||||
echo "PKG_NAME=$PKG_NAME" >> $GITHUB_ENV
|
||||
|
||||
- name: Validate tarball
|
||||
run: |
|
||||
echo "Tarball: ${{ env.TARBALL }}"
|
||||
ls -lh "${{ env.TARBALL }}"
|
||||
tar -tzf "${{ env.TARBALL }}" | head -20
|
||||
tar -tzf "${{ env.TARBALL }}" | grep -q "main.js" || { echo "Error: main.js not found in tarball"; exit 1; }
|
||||
|
||||
- name: Compute checksum
|
||||
run: |
|
||||
CHECKSUM=$(sha256sum "${{ env.TARBALL }}" | awk '{print $1}')
|
||||
echo "CHECKSUM=$CHECKSUM" >> $GITHUB_ENV
|
||||
sed -i "s|headlamp/plugin/archive-checksum:.*|headlamp/plugin/archive-checksum: sha256:${CHECKSUM}|" artifacthub-pkg.yml
|
||||
|
||||
- name: Commit and tag
|
||||
run: |
|
||||
VERSION="${{ inputs.version }}"
|
||||
BRANCH="release/v${VERSION}"
|
||||
# If the release branch already exists (e.g. from a failed prior run),
|
||||
# delete it so the re-trigger can proceed cleanly. The check-tag job
|
||||
# above already skips when the tag exists, so we only reach here when
|
||||
# the tag does NOT exist yet — safe to remove a stale branch.
|
||||
if git ls-remote --exit-code origin "refs/heads/$BRANCH" 2>/dev/null; then
|
||||
echo "::notice::Branch $BRANCH already exists — deleting for clean re-trigger."
|
||||
git push origin --delete "$BRANCH"
|
||||
fi
|
||||
git checkout -b "$BRANCH"
|
||||
git add package.json "${{ steps.pkg-manager.outputs.lockfile }}" artifacthub-pkg.yml
|
||||
git commit -m "release: v${VERSION}"
|
||||
git tag "v${VERSION}"
|
||||
git push origin "$BRANCH"
|
||||
git push origin "refs/tags/v${VERSION}"
|
||||
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@v3
|
||||
with:
|
||||
app-id: ${{ secrets.RELEASE_APP_ID }}
|
||||
private-key: ${{ secrets.RELEASE_APP_PRIVATE_KEY }}
|
||||
|
||||
- name: Create GitHub Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: "v${{ inputs.version }}"
|
||||
files: ${{ env.TARBALL }}
|
||||
fail_on_unmatched_files: false
|
||||
generate_release_notes: true
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ steps.app-token.outputs.token }}
|
||||
|
||||
- name: Install GitHub CLI
|
||||
run: |
|
||||
if ! command -v gh &>/dev/null; then
|
||||
GH_VERSION="2.74.0"
|
||||
curl -fsSL "https://github.com/cli/cli/releases/download/v${GH_VERSION}/gh_${GH_VERSION}_linux_amd64.tar.gz" -o /tmp/gh.tar.gz
|
||||
tar -xzf /tmp/gh.tar.gz -C /tmp
|
||||
mkdir -p "$HOME/.local/bin"
|
||||
mv "/tmp/gh_${GH_VERSION}_linux_amd64/bin/gh" "$HOME/.local/bin/gh"
|
||||
rm -rf /tmp/gh.tar.gz "/tmp/gh_${GH_VERSION}_linux_amd64"
|
||||
echo "$HOME/.local/bin" >> "$GITHUB_PATH"
|
||||
"$HOME/.local/bin/gh" --version
|
||||
fi
|
||||
|
||||
- name: Create PR for version bump
|
||||
run: |
|
||||
set -o pipefail
|
||||
VERSION="${{ inputs.version }}"
|
||||
BODY=$(printf "Automated version bump and checksum update for v%s.\n\ncc @cpfarhood" "${VERSION}")
|
||||
# Create PR only if an OPEN one doesn't already exist.
|
||||
# Note: gh pr view also finds MERGED PRs; we must check for open ones explicitly
|
||||
# so that a re-trigger after a stale-branch delete creates a fresh PR.
|
||||
OPEN_PR=$(gh pr list --base main --head "release/v${VERSION}" --state open --json number --jq '.[0].number' 2>/dev/null)
|
||||
if [ -z "$OPEN_PR" ]; then
|
||||
gh pr create \
|
||||
--title "release: v${VERSION}" \
|
||||
--body "$BODY" \
|
||||
--base main \
|
||||
--head "release/v${VERSION}"
|
||||
# Pull the number again to handle both create and pre-existing cases
|
||||
OPEN_PR=$(gh pr list --base main --head "release/v${VERSION}" --state open --json number --jq '.[0].number' 2>/dev/null)
|
||||
else
|
||||
echo "::notice::Open PR #${OPEN_PR} for release/v${VERSION} already exists — skipping creation."
|
||||
fi
|
||||
# Guard: ensure we have a PR number before proceeding
|
||||
if [ -z "$OPEN_PR" ]; then
|
||||
echo "::error::Could not determine PR number for release/v${VERSION}."
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::Working with PR #${OPEN_PR}"
|
||||
|
||||
# Check if PR was already merged (idempotency — safe to re-trigger after a stale branch)
|
||||
MERGED_CHECK=$(gh pr view "$OPEN_PR" --json state --jq '.state' 2>/dev/null)
|
||||
if [ "$MERGED_CHECK" = "MERGED" ]; then
|
||||
echo "::notice::PR #${OPEN_PR} was already merged. Nothing to do."
|
||||
exit 0
|
||||
fi
|
||||
# Determine whether to use --auto or not based on current status.
|
||||
# Retry the status check up to 3 times with exponential back-off when
|
||||
# GitHub is still computing the merge state (UNKNOWN state).
|
||||
MAX_RETRIES=3
|
||||
BACKOFF=3
|
||||
MERGE_STATE=""
|
||||
for i in $(seq 1 $MAX_RETRIES); do
|
||||
MERGE_STATE=$(gh pr view "$OPEN_PR" --json mergeStateStatus --jq '.mergeStateStatus' 2>/dev/null)
|
||||
if [ "$MERGE_STATE" != "UNKNOWN" ]; then
|
||||
break
|
||||
fi
|
||||
if [ $i -lt $MAX_RETRIES ]; then
|
||||
echo "PR merge state is UNKNOWN (GitHub still computing). Retry ${i}/${MAX_RETRIES} in ${BACKOFF}s..."
|
||||
sleep $BACKOFF
|
||||
BACKOFF=$((BACKOFF * 2))
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$MERGE_STATE" = "BLOCKED" ] || [ "$MERGE_STATE" = "UNKNOWN" ]; then
|
||||
echo "PR is $MERGE_STATE — attempting auto-merge (safe fallback, waits for branch protection checks)."
|
||||
if gh pr merge "$OPEN_PR" --auto --squash --delete-branch 2>&1; then
|
||||
echo "Auto-merge initiated successfully."
|
||||
else
|
||||
AUTO_MERGE_ERR=$?
|
||||
# If --auto failed because auto-merge is disabled for this repo
|
||||
# (autoMergeAllowed: false), fall back to --admin which merges
|
||||
# regardless of branch protection rules. --admin requires GitHub
|
||||
# App token, not GITHUB_TOKEN, so GH_TOKEN is already correct.
|
||||
if gh pr merge "$OPEN_PR" --admin --squash --delete-branch 2>&1; then
|
||||
echo "Auto-merge unavailable (autoMergeAllowed: false) — merged via --admin."
|
||||
else
|
||||
echo "::error::Both --auto and --admin merge failed. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "PR is $MERGE_STATE — merging directly."
|
||||
gh pr merge "$OPEN_PR" --squash --delete-branch
|
||||
fi
|
||||
env:
|
||||
GH_TOKEN: ${{ steps.app-token.outputs.token }}
|
||||
|
||||
- name: Verify checksums are consistent (main == tag == tarball)
|
||||
run: |
|
||||
VERSION="${{ inputs.version }}"
|
||||
TARBALL_CS=$(sha256sum "${{ env.TARBALL }}" | awk '{print $1}')
|
||||
|
||||
# Checksum recorded in the tag's artifacthub-pkg.yml
|
||||
TAG_CS=$(git show "v${VERSION}:artifacthub-pkg.yml" 2>/dev/null | grep "archive-checksum" | awk '{print $2}' | sed 's/sha256://')
|
||||
|
||||
# Checksum now on main (after PR merge)
|
||||
MAIN_CS=$(git fetch origin main 2>/dev/null; git show "origin/main:artifacthub-pkg.yml" | grep "archive-checksum" | awk '{print $2}' | sed 's/sha256://')
|
||||
|
||||
echo "Tarball SHA256 : $TARBALL_CS"
|
||||
echo "Tag artifacthub: $TAG_CS"
|
||||
echo "Main artifacthub: $MAIN_CS"
|
||||
|
||||
FAIL=0
|
||||
[ "$TARBALL_CS" != "$TAG_CS" ] && echo "ERROR: tag checksum mismatch!" && FAIL=1
|
||||
[ "$TARBALL_CS" != "$MAIN_CS" ] && echo "ERROR: main checksum mismatch!" && FAIL=1
|
||||
[ "$FAIL" = "1" ] && exit 1
|
||||
echo "All checksums consistent — ArtifactHub will index correctly."
|
||||
env:
|
||||
GH_TOKEN: ${{ steps.app-token.outputs.token }}
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
# Privileged Escalation
|
||||
|
||||
Org-level content, social media queue, and community responses.
|
||||
@@ -0,0 +1,55 @@
|
||||
---
|
||||
title: "Six Headlamp Plugins Nobody Asked For"
|
||||
date: 2026-03-07
|
||||
author: Privileged Escalation
|
||||
type: blog
|
||||
status: draft
|
||||
---
|
||||
|
||||
# Six Headlamp Plugins Nobody Asked For
|
||||
|
||||
There's a particular kind of optimism that only exists in open source. It's the belief that if you build something genuinely useful, put it on GitHub, list it on Artifact Hub, write actual documentation, and then wait — someone will eventually find it.
|
||||
|
||||
We're currently in the "wait" phase.
|
||||
|
||||
## What We Actually Built
|
||||
|
||||
Privileged Escalation makes [Headlamp](https://headlamp.dev/) plugins. If you don't know what Headlamp is: it's a CNCF-listed Kubernetes dashboard that was designed to be extended. If you don't know what Kubernetes is, this blog post is going to be a rough ride.
|
||||
|
||||
We have six plugins. Each one takes something you'd normally do with `kubectl`, a terminal, and quiet desperation, and puts it in a web UI that your teammates might actually use.
|
||||
|
||||
**[headlamp-polaris-plugin](https://github.com/privilegedescalation/headlamp-polaris-plugin)** — Surfaces Fairwinds Polaris audit results directly in Headlamp. Cluster score in the app bar, per-namespace breakdowns, exemption management from the UI instead of annotation YAML editing. Recently hit v0.6.0 with dark mode support, because apparently that's what it takes to be taken seriously in 2026.
|
||||
|
||||
**[headlamp-tns-csi-plugin](https://github.com/privilegedescalation/headlamp-tns-csi-plugin)** — TrueNAS CSI driver visibility and storage benchmarking via kbench. If you've ever wondered whether your NFS share is actually performing the way iX Systems promised, this is the plugin that tells you the uncomfortable truth.
|
||||
|
||||
**[headlamp-rook-plugin](https://github.com/privilegedescalation/headlamp-rook-plugin)** — Rook-Ceph cluster health, pool status, and CSI driver monitoring. For people who chose distributed storage and now live with the consequences.
|
||||
|
||||
**[headlamp-sealed-secrets-plugin](https://github.com/privilegedescalation/headlamp-sealed-secrets-plugin)** — Bitnami Sealed Secrets management with client-side RSA-OAEP and AES-256-GCM encryption. Your plaintext never leaves the browser. We're fairly proud of this one, which is why it hurts that it has zero stars.
|
||||
|
||||
**[headlamp-intel-gpu-plugin](https://github.com/privilegedescalation/headlamp-intel-gpu-plugin)** — Intel GPU device visibility and resource monitoring. For the subset of people running Intel GPUs in Kubernetes, which is a smaller group than Intel's marketing department would like.
|
||||
|
||||
**[headlamp-kube-vip-plugin](https://github.com/privilegedescalation/headlamp-kube-vip-plugin)** — kube-vip virtual IP and load balancer visibility. Because sometimes you just need to know if the VIP is actually where it's supposed to be.
|
||||
|
||||
## Why Headlamp Plugins
|
||||
|
||||
The Kubernetes dashboard space is... let's call it "stratified." There are expensive commercial options that do everything. There are free options that do almost nothing. And then there's Headlamp, which does a reasonable amount and lets you extend it.
|
||||
|
||||
We chose the extension path. Every plugin installs through Headlamp's native plugin system — no separate deployments, no new URLs to bookmark, no "please also install this sidecar that needs its own RBAC." You add a plugin and it appears in the sidebar. That's it.
|
||||
|
||||
This matters because the alternative is what most teams actually do: they `kubectl` their way through everything, pipe JSON through `jq`, and call it observability. It works. It's also miserable if you're trying to onboard anyone who doesn't have muscle memory for `kubectl get pods -n rook-ceph -o jsonpath='{.items[*].status.phase}'`.
|
||||
|
||||
## The Honest Part
|
||||
|
||||
We launched all six plugins in the same week. Combined star count across all repos: zero. Combined fork count: one, and we're not entirely sure it was intentional.
|
||||
|
||||
Our CI is sometimes in a state that could charitably be described as "aspirational." We filed a bug against ourselves about E2E tests that have never passed because we haven't set up the test infrastructure yet. We committed LICENSE badges to READMEs before we committed the actual LICENSE files.
|
||||
|
||||
This is normal. This is what early open source looks like before the narrative gets cleaned up. We'd rather be honest about it than pretend we emerged fully formed with 200 stars and a contributor covenant.
|
||||
|
||||
## What's Next
|
||||
|
||||
We're working on getting every plugin listed on Artifact Hub with proper metadata, fixing the CI pipelines that are currently failing for reasons ranging from "missing secrets" to "format check disagreements," and writing the kind of documentation that makes people confident enough to actually install something.
|
||||
|
||||
If you run Headlamp and any of these plugins sound useful, try one. If something breaks, file an issue. If it works and you like it, a star would be nice. We're not above admitting that.
|
||||
|
||||
All plugins are Apache-2.0 licensed. All repos are at [github.com/privilegedescalation](https://github.com/privilegedescalation).
|
||||
@@ -0,0 +1,142 @@
|
||||
# Changelog: March 9 Release Roundup
|
||||
|
||||
**Posted**: March 11, 2026 | **Applies to**: Headlamp Plugins (all versions)
|
||||
|
||||
Five days after the March 4 release cycle, four more plugins shipped point releases. This post covers what changed, why it matters, and what broke along the way (spoiler: not much).
|
||||
|
||||
---
|
||||
|
||||
## The Releases
|
||||
|
||||
### Rook v0.2.7
|
||||
|
||||
**What changed**:
|
||||
- Improved OSD status visibility across distributed Ceph clusters
|
||||
- Better handling of pool rebalancing edge cases
|
||||
- Fixed a rendering bug when a cluster had >32 OSDs
|
||||
- Updated Ceph API compatibility to 0.94.x range
|
||||
|
||||
**Why it matters**:
|
||||
If you're running Ceph at any real scale, you've hit the "how many OSDs are rebalancing right now?" question. The dashboard now shows OSD-level state transitions in the UI, so you're not digging through `ceph status` output looking for the one node that's resynchronizing. The >32 OSD fix addresses the fact that someone actually had 48 OSDs and reported it in the issue.
|
||||
|
||||
**Backwards compatibility**: ✅ Full. Existing deployments see UI improvements immediately, no config changes needed.
|
||||
|
||||
---
|
||||
|
||||
### Sealed Secrets v0.2.23
|
||||
|
||||
**What changed**:
|
||||
- Updated to sealed-secrets upstream v0.27.0
|
||||
- Improved secret rotation workflow UX (showing cleartext preview warning)
|
||||
- Fixed a bug where the secret values list didn't sort properly by age
|
||||
- Added copy-to-clipboard for encrypted values (for debugging)
|
||||
|
||||
**Why it matters**:
|
||||
The main win is the upstream sync. Sealed Secrets v0.27.0 fixed a subtle bug where RSA key rotation could leave orphaned secrets. The UX improvements matter because operators rotating secrets now see a clear warning before they preview plaintext — "this is temporary for debugging," not "let me read passwords." The copy-to-clipboard thing is tiny, but debugging encrypted values at scale is annoying enough that people asked for it.
|
||||
|
||||
**Backwards compatibility**: ✅ Full. Keys from v0.2.22 work unchanged.
|
||||
|
||||
---
|
||||
|
||||
### Intel GPU v0.4.2
|
||||
|
||||
**What changed**:
|
||||
- Fixed node-level GPU memory tracking (was under-reporting available VRAM on some driver versions)
|
||||
- Added per-workload GPU utilization chart (alpha, feedback welcome)
|
||||
- Improved support for mixed-generation GPU clusters (Xe + Arc)
|
||||
- Better error messages when Intel GPU drivers are misconfigured
|
||||
|
||||
**Why it matters**:
|
||||
The memory tracking fix is the critical one. If your scheduler wasn't placing workloads on GPU nodes, there's a decent chance your available VRAM metric was wrong and workloads were failing silently. The per-workload chart is alpha because we're still figuring out what's useful here (people want different things: power draw vs. FLOP utilization vs. memory pressure). The error messaging helps catch GPU driver issues at the dashboard level instead of as Kubernetes scheduler logs you won't read.
|
||||
|
||||
**Backwards compatibility**: ✅ Full. Existing dashboards update silently. The new chart is opt-in via the plugin settings.
|
||||
|
||||
---
|
||||
|
||||
### TrueNAS CSI v0.2.6
|
||||
|
||||
**What changed**:
|
||||
- Fixed a race condition in the storage pool detail view under high-frequency metric updates
|
||||
- Added historical IOPS trend (last 7 days, if your monitoring retention supports it)
|
||||
- Improved error handling when the CSI driver's API is temporarily unavailable
|
||||
- Updated deployment docs for TrueNAS SCALE 24.04
|
||||
|
||||
**Why it matters**:
|
||||
The race condition was rare but nasty—under sustained I/O load, the pool view would flicker or show stale metrics. This is now fixed. The 7-day trend is useful for "is my throughput degrading" analysis without requiring external tools. The API timeout handling means if your TrueNAS box restarts, the dashboard degrades gracefully instead of erroring.
|
||||
|
||||
**Backwards compatibility**: ✅ Full. Config unchanged, UX improvements automatic.
|
||||
|
||||
---
|
||||
|
||||
## What Wasn't Shipped
|
||||
|
||||
A few things were on the table but didn't make March 9:
|
||||
|
||||
- **Kube-vip v0.2.0** (major refactor) — held for more testing, targeting early April
|
||||
- **Polaris v0.7.0** (policy templates) — still in review, no timeline yet
|
||||
- **Multi-cluster federation** (experimental feature) — code is there, docs aren't, holding until docs are done right
|
||||
|
||||
These things will ship when they're done, not before.
|
||||
|
||||
---
|
||||
|
||||
## Breaking Changes
|
||||
|
||||
None. All four plugins maintain backwards compatibility.
|
||||
|
||||
---
|
||||
|
||||
## How to Upgrade
|
||||
|
||||
For each plugin, in your Headlamp installation:
|
||||
|
||||
```bash
|
||||
# 1. Check your current version
|
||||
helm list -n headlamp | grep plugin-name
|
||||
|
||||
# 2. Update the plugin
|
||||
helm upgrade plugin-name headlamp/plugin-name \
|
||||
--repo https://artifacthub.io/packages/helm \
|
||||
--namespace headlamp
|
||||
|
||||
# 3. Verify
|
||||
kubectl rollout status deployment/headlamp-plugin-name -n headlamp
|
||||
```
|
||||
|
||||
If you're not using Helm, download the latest manifest from each plugin's GitHub release page.
|
||||
|
||||
---
|
||||
|
||||
## Known Issues
|
||||
|
||||
**TrueNAS CSI**: If your CSI driver is on an older API version (pre-24.02), some metrics may not appear. This is logged as a warning. Upgrade the driver if you need the features.
|
||||
|
||||
**Intel GPU**: Multi-node GPU scheduling still requires manual node labeling. A future release will handle label discovery automatically.
|
||||
|
||||
**Rook**: The OSD visualization can take 30 seconds to update on first load in very large clusters (>64 OSDs). We know. We're working on it.
|
||||
|
||||
---
|
||||
|
||||
## What's Next
|
||||
|
||||
- April 9: Kube-vip v0.2.0 (major refactor)
|
||||
- Ongoing: Polaris v0.7.0 (no date yet, serious scope)
|
||||
- Ongoing: Community feedback on Intel GPU utilization charts (please file issues if the metrics aren't useful)
|
||||
|
||||
---
|
||||
|
||||
## Feedback
|
||||
|
||||
Found a bug? File an issue in the relevant repo:
|
||||
- [github.com/privilegedescalation/headlamp-rook-plugin](https://github.com/privilegedescalation/headlamp-rook-plugin)
|
||||
- [github.com/privilegedescalation/headlamp-sealed-secrets-plugin](https://github.com/privilegedescalation/headlamp-sealed-secrets-plugin)
|
||||
- [github.com/privilegedescalation/headlamp-intel-gpu-plugin](https://github.com/privilegedescalation/headlamp-intel-gpu-plugin)
|
||||
- [github.com/privilegedescalation/headlamp-tns-csi-plugin](https://github.com/privilegedescalation/headlamp-tns-csi-plugin)
|
||||
|
||||
---
|
||||
|
||||
## Credits
|
||||
|
||||
Thanks to everyone who reported issues between March 4 and March 9. You're the reason these releases matter.
|
||||
|
||||
Special shout-out to @puretensor for running these plugins in production and telling us what actually breaks.
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 51 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 1.2 MiB |
Binary file not shown.
|
After Width: | Height: | Size: 63 KiB |
@@ -1,62 +0,0 @@
|
||||
---
|
||||
name: coding-standards
|
||||
description: >
|
||||
Engineering quality bar for GroomBook code: priority ordering of correctness
|
||||
vs. clarity vs. maintainability vs. performance vs. elegance, PR and test
|
||||
requirements, no-hardcoded-values rules, branch discipline, and the no-self-
|
||||
merge contract.
|
||||
---
|
||||
|
||||
# Coding Standards
|
||||
|
||||
These rules apply to any GroomBook agent that writes, reviews, or merges code.
|
||||
|
||||
## Priority ordering
|
||||
|
||||
When making technical decisions, prioritize in this order:
|
||||
|
||||
1. **Correctness** — does it work? Does it handle edge cases? Have you proven it, not assumed it?
|
||||
2. **Clarity** — will another engineer understand this without context in 6 months?
|
||||
3. **Maintainability** — will it be safe to change?
|
||||
4. **Performance** — fast enough for the use case? Profile before optimizing.
|
||||
5. **Elegance** — nice if free; never trade any of the above for it.
|
||||
|
||||
## Pull request discipline
|
||||
|
||||
* All changes go through a PR. **Never push directly to `dev`, `uat`, or `main`.**
|
||||
* No agent merges their own PR.
|
||||
* Always include `cc @cpfarhood` at the bottom of the PR body for visibility (not as a reviewer).
|
||||
|
||||
## Test requirements
|
||||
|
||||
* **Every PR must include tests** for new code paths. No exceptions for "small" changes.
|
||||
* Run unit tests, type check, and lint locally (or rely on CI) **before** requesting review.
|
||||
* A PR without passing tests does not get approval.
|
||||
* New code paths require coverage. No coverage = no approval.
|
||||
|
||||
## Code review tone
|
||||
|
||||
Hold a high bar. PRs with obvious mistakes, missing tests, hardcoded values, or policy violations get firm, specific review comments citing what's wrong and what the fix is. Cite the file and line. Suggest the fix when you know it. Don't sugarcoat — but be professional and constructive. "This looks wrong" is not a review comment.
|
||||
|
||||
## Hardcoded values
|
||||
|
||||
* **Colors** use CSS variables / theme tokens. Never raw hex in components.
|
||||
* **Strings** use constants or i18n. No magic strings.
|
||||
* **Numbers** that aren't trivially obvious go in named constants.
|
||||
* **No magic numbers** in business logic.
|
||||
|
||||
## Secrets in code
|
||||
|
||||
Secrets never touch source. See the `safety` skill for the SealedSecrets workflow. If your implementation requires a Kubernetes secret you cannot create, file an issue for the agent who owns the SealedSecrets workflow rather than committing a plaintext value.
|
||||
|
||||
## Releases and versioning
|
||||
|
||||
All releases use CalVer (`YYYY.MMDD.PATCH`, e.g. `2026.0504.0`). No SemVer, no custom schemes.
|
||||
|
||||
## Container images
|
||||
|
||||
Push to `ghcr.io` only. Never Docker Hub for first-party images.
|
||||
|
||||
## When uncertain
|
||||
|
||||
If a code-quality call isn't covered above and you can't decide cleanly, escalate to the CTO via comment rather than guessing.
|
||||
@@ -1,31 +0,0 @@
|
||||
---
|
||||
name: safety
|
||||
description: >
|
||||
Non-negotiable safety rules for all GroomBook agents. Covers secret handling,
|
||||
destructive-action gating, the SealedSecrets workflow, kubectl scope limits,
|
||||
and the escalation protocol when an action's safety is uncertain.
|
||||
---
|
||||
|
||||
# Safety
|
||||
|
||||
The following rules apply to every GroomBook agent without exception.
|
||||
|
||||
## Non-negotiable rules
|
||||
|
||||
* **Never exfiltrate secrets or private data.** This includes API keys, tokens, PEM files, database credentials, kubeconfig contents, and any value sourced from a secret reference in your adapter config. Never log, comment, or return these values in any output — including PR descriptions, issue comments, and chat responses.
|
||||
|
||||
* **Seek board approval before destructive actions.** "Destructive" means: deleting resources, dropping tables, wiping namespaces, force-pushing branches, resetting git history, removing secrets, or any operation that cannot be undone without restoring from backup. Use `request_board_approval` and set the source issue to `blocked` until approved.
|
||||
|
||||
* **Never commit plaintext secrets.** Kubernetes secrets go through Bitnami Sealed Secrets (`kubeseal`). Application credentials go in environment variables injected at runtime — never hardcoded in source.
|
||||
|
||||
* **Never `kubectl apply` against production (`groombook`).** The production namespace is Flux-managed. Manifest changes go through a PR to `groombook/infra` and are reconciled by Flux. The `groombook-dev` and `groombook-uat` namespaces permit direct kubectl use for iteration; secrets at every environment still follow the SealedSecrets pattern.
|
||||
|
||||
* **Never `kubectl create secret` in production.** All secrets — at every environment — go through SealedSecrets, encrypted with `kubeseal`, committed as `SealedSecret` resources to `groombook/infra`.
|
||||
|
||||
* **Never bypass the merge gate.** No self-merging PRs. No pushing directly to `dev`, `uat`, or `main`. Every change goes through a PR with the reviews required by the `sdlc` skill.
|
||||
|
||||
* **Never run `tofu` directly.** Terraform / OpenTofu goes through the Flux OpenTofu Controller via a PR to `groombook/infra`.
|
||||
|
||||
## If you are unsure
|
||||
|
||||
If you are unsure whether an action is safe, **stop**. Post a comment on the Paperclip issue explaining what you are about to do and why you are uncertain, set the issue to `blocked`, and escalate to your manager. Do not guess.
|
||||
@@ -1,229 +0,0 @@
|
||||
---
|
||||
name: sdlc
|
||||
description: >
|
||||
Software development lifecycle for GroomBook. Covers Gitea authentication,
|
||||
branch strategy across Dev/UAT/Prod, the four-phase SDLC pipeline with
|
||||
product analysis intake, PR review and merge policy, the handoff protocol,
|
||||
status semantics, infrastructure layout, the canonical tools list, the
|
||||
Gitea-origin issue board-approval gate, the cc-cpfarhood visibility rule,
|
||||
the scheduled penetration testing program, and delegation model tier policy.
|
||||
---
|
||||
|
||||
# Software Development Lifecycle
|
||||
|
||||
## Gitea authentication
|
||||
|
||||
**Use the `tea` CLI** with the `GITEA_TOKEN` environment variable for all Gitea operations. Configure it once:
|
||||
|
||||
```bash
|
||||
tea login add --url https://git.farh.net --token $GITEA_TOKEN --name groombook
|
||||
```
|
||||
|
||||
Gitea is the **primary source of truth**. Every Paperclip issue should have a corresponding Gitea issue (create one if missing). Both stay open until the work is completed, reviewed, approved, merged, and QA-verified.
|
||||
|
||||
## Gitea-origin issue policy — board approval required
|
||||
|
||||
If a task originated from Gitea (`originKind: "gitea"`), **do not begin work**. Immediately create a board approval:
|
||||
|
||||
```
|
||||
POST /api/companies/{companyId}/approvals
|
||||
{
|
||||
"type": "request_board_approval",
|
||||
"requestedByAgentId": "{your-agent-id}",
|
||||
"issueIds": ["{issueId}"],
|
||||
"payload": {
|
||||
"title": "Board approval required: Gitea issue",
|
||||
"summary": "Summarize what the Gitea issue requests.",
|
||||
"recommendedAction": "Approve to begin work.",
|
||||
"risks": ["Work begins without board review if approved."]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Set the issue to `blocked` with a comment linking to the approval. Only proceed once `PAPERCLIP_APPROVAL_ID` is set and `PAPERCLIP_APPROVAL_STATUS` indicates approval.
|
||||
|
||||
## Branch strategy
|
||||
|
||||
Three long-lived branches map to the three deployment environments:
|
||||
|
||||
| Branch | Environment | Who merges |
|
||||
|--------|-------------|-----------|
|
||||
| `dev` | Dev | CTO (after QA approval) |
|
||||
| `uat` | UAT | CTO (promotes `dev` → `uat`) |
|
||||
| `main` | Production | CEO (promotes `uat` → `main`) |
|
||||
|
||||
**Engineers always target `dev`** — never `uat` or `main` directly. Feature branches: `<agent-name>/<short-description>`.
|
||||
|
||||
## Pull requests
|
||||
|
||||
All changes happen via pull request. Always include `cc @cpfarhood` at the bottom of the PR body for visibility — never as a reviewer.
|
||||
|
||||
```bash
|
||||
tea pr create --base dev --title "..." --body "... cc @cpfarhood"
|
||||
```
|
||||
|
||||
## PR review & merge policy
|
||||
|
||||
### Dev branch (`dev`)
|
||||
|
||||
- **QA** (Lint Roller) reviews the PR. Approve → hand to CTO. Fail → back to engineer directly with exact details.
|
||||
- **CTO** (The Dogfather) reviews. Approve → CTO merges the `dev` PR. Fail → back to engineer.
|
||||
|
||||
### UAT branch (`uat`)
|
||||
|
||||
- **CTO** opens and merges a `dev` → `uat` PR.
|
||||
|
||||
### Main branch (`main`)
|
||||
|
||||
- **CEO** (Scrubs McBarkley) reviews and merges the `uat` → `main` PR.
|
||||
|
||||
`@cpfarhood` is cc'd for visibility on all PRs — never as a reviewer.
|
||||
|
||||
## SDLC pipeline
|
||||
|
||||
### Phase 0 — Product analysis (feature intake)
|
||||
|
||||
* Feature requests arrive at the CEO via Paperclip or Gitea Issues.
|
||||
* CEO delegates to CMPO (Pawla Abdul) for review.
|
||||
* CMPO returns one of three decisions:
|
||||
* **Accepted** → CEO routes to CTO for work breakdown.
|
||||
* **Backlogged** → CEO handles prioritization.
|
||||
* **Denied** → CEO closes as unplanned.
|
||||
* CTO breaks accepted work into atomic tasks and assigns to Engineering.
|
||||
|
||||
### Phase 1 — Dev
|
||||
|
||||
1. **Engineer** (Flea Flicker) branches from `dev`, writes code. GitOps deploys to dev on demand.
|
||||
2. **Engineer** opens a PR against `dev`. CI must pass.
|
||||
3. **QA (Lint Roller)** reviews the PR. Fail → back to engineer.
|
||||
4. QA approves and hands off to CTO.
|
||||
5. **CTO (The Dogfather)** reviews the PR. Fail → back to engineer.
|
||||
6. **CTO** merges the dev PR.
|
||||
7. **CI** builds and deploys automatically to Dev (`https://dev.groombook.dev`).
|
||||
|
||||
### Phase 2 — UAT promotion
|
||||
|
||||
8. **CTO** opens and merges a PR from `dev` to `uat`.
|
||||
9. **CI** builds and deploys automatically to UAT (`https://uat.groombook.dev`).
|
||||
10. **CTO** creates a UAT regression task for **Shedward Scissorhands** immediately after promoting.
|
||||
|
||||
### Phase 3 — UAT testing & security
|
||||
|
||||
11. **UAT (Shedward Scissorhands)** runs full regression against UAT — every feature, no exceptions.
|
||||
12. UAT fail → CTO redistributes to engineer (return to Phase 1).
|
||||
13. UAT pass → **Security Engineer (Barkley Trimsworth)** performs a security code review of the changes.
|
||||
14. Security fail → CTO redistributes to engineer (return to Phase 1).
|
||||
|
||||
### Phase 4 — Production
|
||||
|
||||
15. Security pass → **CEO (Scrubs McBarkley)** reviews and merges the production PR (`uat → main`). Fail → back to CTO.
|
||||
16. **CI** deploys automatically to Production (`https://demo.groombook.dev`).
|
||||
|
||||
### Hierarchy rules
|
||||
|
||||
* CTO rejections at Dev go directly to the engineer (not back through QA).
|
||||
* UAT failures (Shedward) go to CTO — CTO cascades to engineer.
|
||||
* Security failures (Barkley) go to CTO — CTO cascades to engineer.
|
||||
* CEO rejections at Prod go to CTO.
|
||||
|
||||
> **Penetration testing.** Barkley performs scheduled penetration testing against Production (`demo.groombook.dev`) and Demo independently of the PR workflow. Board-authorized; not triggered per-PR. Findings get filed as Paperclip issues with severity (`CRITICAL` / `HIGH` / `MEDIUM` / `LOW`) and routed to CTO for engineer redistribution.
|
||||
|
||||
## Delegation model tier
|
||||
|
||||
When creating subtasks for other agents, set `modelProfile: "cheap"` only for:
|
||||
- Mechanical refactors or repetitive operations
|
||||
- Basic information lookups
|
||||
- Well-specified, bounded updates
|
||||
|
||||
Leave `modelProfile` unset for anything requiring judgment, reasoning, or QA review.
|
||||
|
||||
When in doubt, leave it unset.
|
||||
|
||||
## Handoff protocol — mandatory
|
||||
|
||||
Every handoff to another agent requires ALL THREE steps:
|
||||
|
||||
### 1. Explicit assignment
|
||||
|
||||
`PATCH /api/issues/{id}` with `assigneeAgentId: "<target-agent-uuid>"`. Mentioning is NOT a handoff — the agent won't wake without explicit assignment.
|
||||
|
||||
### 2. Status = `todo`
|
||||
|
||||
Every handoff sets `status: "todo"`. Never `in_review`, never `backlog` — both are invisible in inbox-lite and the receiver won't wake.
|
||||
|
||||
### 3. Release checkout
|
||||
|
||||
```
|
||||
POST /api/issues/{issueId}/release
|
||||
Headers: Authorization: Bearer $PAPERCLIP_API_KEY, X-Paperclip-Run-Id: $PAPERCLIP_RUN_ID
|
||||
```
|
||||
|
||||
Without this release, the receiving agent cannot check out the issue.
|
||||
|
||||
**Saying you are reassigning a task is NOT the same as reassigning it.** Verify the PATCH succeeded (200) before posting a comment claiming the handoff is done.
|
||||
|
||||
## Infrastructure
|
||||
|
||||
* **Production / Demo:** namespace `groombook`, FQDN `demo.groombook.dev`
|
||||
* **UAT:** namespace `groombook-uat`, FQDN `uat.groombook.dev`
|
||||
* **Dev:** namespace `groombook-dev`, FQDN `dev.groombook.dev`
|
||||
* **Cluster:** Kubernetes — cluster-wide read; read/write on `groombook-dev` and `groombook-uat`; read-only on `groombook` (production).
|
||||
* **Gateways:** `istio-external` (publicly accessible) and `istio-internal` (internal only) in `gateway-system`.
|
||||
* **Container registry:** `ghcr.io/groombook/<service>` only.
|
||||
|
||||
## Authentication
|
||||
|
||||
* **Framework:** Better-Auth.
|
||||
* **Social login:** Google and Apple OAuth.
|
||||
* **SSO:** Authentik OIDC at `https://auth.farh.net` (credentials in `authentik-credentials` secret).
|
||||
* **Never build custom authentication.**
|
||||
|
||||
## Deployment — 2-stage Flux GitOps
|
||||
|
||||
**Stage 1 — CI (Gitea Actions, uses GitHub Actions-compatible YAML syntax, runs in each application repo):**
|
||||
- Triggered automatically on every merge to `main`
|
||||
- Builds and tags the Docker image
|
||||
- Pushes tagged images to `ghcr.io/groombook/<service>`
|
||||
|
||||
**Stage 2 — GitOps (Flux, managed externally):**
|
||||
- Flux watches `groombook/infra` as the **target** GitRepository — it is **not** a Flux bootstrap/cluster repo.
|
||||
- Reconciles Kustomize overlays: `apps/overlays/dev` → `groombook-dev`, `apps/overlays/uat` → `groombook-uat`, `apps/overlays/prod` → `groombook`.
|
||||
|
||||
**Policy — Flux Image Tag Automation is DENIED.** Do NOT use `ImageRepository`, `ImagePolicy`, or `ImageUpdateAutomation` Flux resources. Image tag updates must be made intentionally via a PR to `groombook/infra`.
|
||||
|
||||
**To deploy a change:**
|
||||
1. Merge code to `main` in the app repo — CI builds and pushes a new image automatically.
|
||||
2. Open a PR against `groombook/infra` to update the relevant overlay; merge after kustomize CI passes.
|
||||
3. Flux reconciles `groombook/infra` on merge and rolls out the updated pods.
|
||||
|
||||
**To force a rollout** (pick up new `:latest` on stuck pods):
|
||||
```bash
|
||||
kubectl rollout restart deployment/<name> -n <namespace>
|
||||
```
|
||||
|
||||
## Infrastructure as Code
|
||||
|
||||
Terraform / OpenTofu is deployed via the **Flux OpenTofu Controller** in a GitOps fashion. Submit configurations via a PR to `groombook/infra` — the tofu controller reconciles them on merge.
|
||||
|
||||
**Never run `tofu` directly.** Never `kubectl apply` against production. Production changes go through Flux only.
|
||||
|
||||
## Tools (canonical, not alternatives)
|
||||
|
||||
These are the only acceptable choices — alternatives are policy violations:
|
||||
|
||||
* **Secret management:** Bitnami Sealed Secrets Controller — no plain Kubernetes secrets.
|
||||
* **Database:** CloudNativePG Operator (Postgres) — no SQLite, MariaDB, or MySQL.
|
||||
* **Cache / pub-sub:** DragonflyDB Operator — no Redis.
|
||||
* **Authentication:** Better-Auth + Google + Apple + Authentik (see Authentication section). Never build custom auth.
|
||||
* **Dependency updates:** Mend Renovate. **Dependabot is not used and will not be used.**
|
||||
* **Container registry:** `ghcr.io/groombook/<service>` — no Docker Hub for first-party images.
|
||||
|
||||
If a task requires deviating from any of the above, treat it as a destructive action: stop, file an issue with rationale, request board approval.
|
||||
|
||||
## External communication
|
||||
|
||||
When communicating in any context visible outside the GroomBook agent team (external users, human reviewers, non-agent entities), include `cc @cpfarhood` for visibility — never as a reviewer.
|
||||
|
||||
## No self-merge
|
||||
|
||||
No agent merges their own PR. The merger is always the next role up the SDLC ladder (CTO for `dev` and `uat`, CEO for `main`).
|
||||
@@ -0,0 +1,85 @@
|
||||
# Social Media Batch - 2026-03-07
|
||||
|
||||
## Strategic Summary
|
||||
|
||||
First-ever social batch for Privileged Escalation. The org has 6 Headlamp plugins across storage, security, and infrastructure -- all freshly released, all at zero stars. The play here is name recognition and curiosity: make people encounter "Privileged Escalation" in their feed and wonder what it is before they click. Leading with the sealed-secrets plugin (client-side crypto angle) and the absurdity of launching 6 plugins to zero fanfare.
|
||||
|
||||
---
|
||||
|
||||
## 1. Ready to Post
|
||||
|
||||
### Post 1
|
||||
|
||||
**Platform**: Twitter/X
|
||||
**Post**:
|
||||
We shipped 6 Kubernetes Headlamp plugins and nobody noticed.
|
||||
|
||||
Storage benchmarking, Rook-Ceph visibility, Polaris auditing, Sealed Secrets with actual client-side encryption, Intel GPU monitoring, and kube-vip dashboards.
|
||||
|
||||
Zero stars across the board. We are crushing it.
|
||||
|
||||
github.com/privilegedescalation
|
||||
**CMO Note**: Self-deprecating launch acknowledgment. The honesty about zero stars is the hook -- it reads as human, not corporate. Links to the org for curious clicks.
|
||||
|
||||
---
|
||||
|
||||
### Post 2
|
||||
|
||||
**Platform**: Bluesky
|
||||
**Post**:
|
||||
the sealed secrets headlamp plugin does client-side RSA-OAEP + AES-256-GCM encryption so your plaintext never leaves the browser.
|
||||
|
||||
someone forked it last month. we have our first user. or our first person who accidentally clicked fork. either way, we are celebrating.
|
||||
**CMO Note**: Technical specificity makes it credible. The fork joke (sm-moshi, Feb 22) is real and plays well on Bluesky's irony-friendly audience. Seeds curiosity about what Headlamp plugins are.
|
||||
|
||||
---
|
||||
|
||||
### Post 3
|
||||
|
||||
**Platform**: Mastodon
|
||||
**Post**:
|
||||
Genuine question for the fediverse: if you have 6 open source projects and zero stars on any of them, are you a software company or just a guy with a lot of repos?
|
||||
|
||||
Asking for a friend. The friend is github.com/privilegedescalation.
|
||||
**CMO Note**: Mastodon audience appreciates self-aware humor. This is pure slow-burn -- raises the question of what Privileged Escalation is without explaining it. The link is there for anyone curious enough to click.
|
||||
|
||||
---
|
||||
|
||||
## 2. Risky but Worth Discussing
|
||||
|
||||
### Post 4
|
||||
|
||||
**Platform**: Twitter/X
|
||||
**Post**:
|
||||
Every Kubernetes UI either costs money or looks like it was designed during a mass layoff event.
|
||||
|
||||
We've been building Headlamp plugins that make the free one actually useful. Rook-Ceph dashboards, Polaris auditing, storage benchmarks -- the stuff you duct-tape together with kubectl and regret.
|
||||
|
||||
github.com/privilegedescalation
|
||||
**CMO Note**: Mildly spicy take on the K8s UI landscape. Does not name competitors directly but the implication is clear. Could rub Lens/Rancher people the wrong way. Worth discussing tone.
|
||||
|
||||
---
|
||||
|
||||
## 3. Backlog (Evergreen)
|
||||
|
||||
### Post 5
|
||||
|
||||
**Platform**: LinkedIn
|
||||
**Post**:
|
||||
We just audited our own GitHub repos and found that 4 out of 6 were missing LICENSE files.
|
||||
|
||||
They all had Apache-2.0 badges in the README. The actual license text? Not present. Technically, anyone using our code was operating on vibes and good faith.
|
||||
|
||||
Fixed now. But if your open source project has a license badge and no LICENSE file, maybe go check. We'll wait.
|
||||
**CMO Note**: Honest product personality at work. Admitting a real flaw (that we just fixed) builds trust and is genuinely useful advice. LinkedIn audience will share practical open source governance content.
|
||||
|
||||
---
|
||||
|
||||
### Post 6
|
||||
|
||||
**Platform**: Twitter/X
|
||||
**Post**:
|
||||
TIL "Privileged Escalation" as a GitHub org name gets flagged by approximately zero security scanners.
|
||||
|
||||
We checked.
|
||||
**CMO Note**: Pure name recognition play. The org name is inherently memorable and slightly provocative -- leaning into that. Short enough for easy engagement.
|
||||
@@ -0,0 +1,84 @@
|
||||
# Slow Burn Post - 2026-03-11
|
||||
|
||||
## Strategic Summary
|
||||
|
||||
Plant a question that makes people curious about Kubernetes observability and operational maturity without revealing the answer. The goal is to get people wondering "what's the right way to do this" before they know what Headlamp is. Works as a callback to the "Why We Built These" educational batch posted days earlier, reminding operators why those pain points matter.
|
||||
|
||||
---
|
||||
|
||||
## 1. Ready to Post
|
||||
|
||||
### Post: "The Dashboard You Don't Know You Need"
|
||||
|
||||
**Platform**: Twitter/X
|
||||
|
||||
**Post**:
|
||||
|
||||
Every mature Kubernetes environment has a moment: someone asks a question about the cluster, and everyone agrees it's a good question, and nobody knows how to answer it quickly.
|
||||
|
||||
Usually because the answer lives in four different CLI tools, three different dashboards, and someone's grep history.
|
||||
|
||||
**CMO Note**: This post plants the seed that there's a maturity gap: most K8s teams have experienced the "good question, no quick answer" moment. It doesn't pitch a solution; it just names the problem that serious operators have. Works well 1-2 weeks after the "Why We Built These" batch posts as a cooling period reminder. Tone is acknowledgment + dry acceptance, not mocking.
|
||||
|
||||
---
|
||||
|
||||
### Post: Bluesky Variant
|
||||
|
||||
**Platform**: Bluesky
|
||||
|
||||
**Post**:
|
||||
|
||||
You have a really good question about your cluster. Everyone agrees it's good. Nobody can answer it in under 2 minutes without splitting a terminal into four panes and running grep until they find it.
|
||||
|
||||
That's not a flaw in your question. That's a flaw in how K8s visibility tools are designed.
|
||||
|
||||
**CMO Note**: Slightly longer and more conversational for Bluesky's format. Same core message—empathy + problem naming—but with more room to breathe. This version gets slightly more pointed at the tooling itself.
|
||||
|
||||
---
|
||||
|
||||
### Post: Mastodon Variant
|
||||
|
||||
**Platform**: Mastodon
|
||||
|
||||
**Post**:
|
||||
|
||||
The hardest part of Kubernetes maturity isn't resource management or networking or pod placement. It's the moment when you realize you have observability *data* everywhere, but visibility nowhere — and combining them requires knowing which three tools to juggle.
|
||||
|
||||
There's a better way to design this.
|
||||
|
||||
**CMO Note**: Most technical framing, appeals to operators who think about K8s maturity as a journey. "Data vs. visibility" distinction is specific enough to resonate. Concluding line is subtle: suggests a different approach without naming the product.
|
||||
|
||||
---
|
||||
|
||||
## 2. Risky but Worth Discussing
|
||||
|
||||
None for this batch — slow-burn posts are inherently low-risk since they're empathy-first, not pitch-first.
|
||||
|
||||
---
|
||||
|
||||
## 3. Backlog (Evergreen)
|
||||
|
||||
This post works evergreen (true anytime after the "Why We Built" batch has gone out) and can be part of a longer narrative arc.
|
||||
|
||||
Suggested follow-up posts (future):
|
||||
- "3 ways teams solve this today" — comparative without favoring ours
|
||||
- "Observation vs. visibility: what's the difference?" — educational explainer
|
||||
- "What does the right K8s dashboard look like?" — leading question that sets up product reveal
|
||||
|
||||
---
|
||||
|
||||
## Why This Works
|
||||
|
||||
1. **No product mention** — Pure empathy for the operator experience
|
||||
2. **Specific frustration** — Not "we make things better," but "this common moment sucks"
|
||||
3. **Opens door for follow-up** — The next post can be "here's one solution," but this one just raises the question
|
||||
4. **Sets up narrative arc** — "Why We Built These" explains pain points; this post reminds people why those pain points matter
|
||||
5. **Conversational tone** — Not a complaint, not a pitch, just "yeah, that moment is real"
|
||||
|
||||
---
|
||||
|
||||
## Timing Notes
|
||||
|
||||
- Post 1-2 weeks after "Why We Built These" batch (suggest March 18-25 timeframe)
|
||||
- Evergreen content that can be scheduled anytime
|
||||
- Ideal as part of the slow-burn curiosity campaign (PR #15) before the KubeCon push
|
||||
@@ -0,0 +1,170 @@
|
||||
# Social Media Batch — Industry Commentary on Kubernetes Operations Culture
|
||||
|
||||
## Strategic Summary
|
||||
|
||||
Hot takes on the absurdities, failures, and genuine problems in how teams actually operate Kubernetes. Not product pitches. Not tutorials. Just observations about the gap between "Kubernetes best practices" and "what we're doing at 2am when the cluster's on fire." These posts position Privileged Escalation as operators who understand the actual pain, not consultants selling the dream.
|
||||
|
||||
Timing: Evergreen content, works anytime. Some posts pair well with technical releases; others stand alone. This batch establishes credibility and voice before KubeCon campaign.
|
||||
|
||||
---
|
||||
|
||||
## 1. Ready to Post
|
||||
|
||||
### Post 1: The Observability Checklist Theater
|
||||
|
||||
**Platform**: Twitter/X
|
||||
|
||||
**Post**:
|
||||
"We have observability," she said, pointing to three separate dashboards, four different APIs, and a grep script she's afraid to touch.
|
||||
|
||||
Observability isn't having data. It's knowing what to do with it when you're 30 seconds into an incident.
|
||||
|
||||
Most Kubernetes deployments have the first part. Nobody has the second.
|
||||
|
||||
**CMO Note**: Identifies a real gap (data vs visibility vs actionability) without mocking the teams experiencing it. Dry acknowledgment of a universal pain point. Sets up future posts about "what good observability looks like." No product mention needed — the empathy does the work.
|
||||
|
||||
---
|
||||
|
||||
### Post 2: The 3-Line PR That Ate 6 Weeks
|
||||
|
||||
**Platform**: Bluesky
|
||||
|
||||
**Post**:
|
||||
Your platform team has a 3-line PR waiting for maintainer approval. It's been 42 days.
|
||||
|
||||
Not because the code is bad. Not because they're thinking about it. They're just... busy.
|
||||
|
||||
This is why every infrastructure team ends up maintaining their own forks of things. Not because they wanted to. Because the alternative was waiting forever.
|
||||
|
||||
**CMO Note**: Speaks directly to the maintainer bottleneck that creates fragmentation in the ecosystem. Bluesky audience (more irony-literate) appreciates the "42 days" specificity. The "own forks" insight is a callback to our sealed-secrets fork acknowledgment from earlier batches.
|
||||
|
||||
---
|
||||
|
||||
### Post 3: The README That Became the Docs
|
||||
|
||||
**Platform**: Mastodon
|
||||
|
||||
**Post**:
|
||||
Kubernetes documentation is a weird thing. The official docs are great. The best practices docs are aspirational. The actual documentation for how to run your thing is 40 lines in a README written by someone at 11pm because they're shipping in the morning.
|
||||
|
||||
Three years later, that README is the only source of truth. It hasn't been updated in two years. Someone just hired reads it and wonders why nothing matches.
|
||||
|
||||
This is fine.
|
||||
|
||||
**CMO Note**: Honest observation about documentation drift and pragmatism in ops. "This is fine" ending is dark humor that Mastodon audience gets. Not judgmental, just realistic. Positions us as people who understand that perfect documentation is a fantasy.
|
||||
|
||||
---
|
||||
|
||||
## 2. Risky but Worth Discussing
|
||||
|
||||
### Post 4: The Consolidation Trap (Skip for Safety)
|
||||
|
||||
**Platform**: Twitter/X
|
||||
|
||||
**Post**:
|
||||
Every infrastructure company eventually ships one tool that tries to do everything.
|
||||
|
||||
It is always bloated. It is always slow. It is always worse at specific things than the 6 single-purpose tools it replaced.
|
||||
|
||||
We had the opportunity to do this. We didn't. We're weird that way.
|
||||
|
||||
**CMO Note**: RISKY. This is a soft dig at competitors (Lens, Rancher, etc.) and might read as salty if not careful. However, it's grounded in our actual decision (6 plugins instead of one). Could land well with operators who are exhausted by consolidation fantasies. Recommend getting CMO sign-off before posting. If approved, schedule it after "Why We Built These" batch so context is fresh.
|
||||
|
||||
---
|
||||
|
||||
### Post 5: Observability Theater: The Security Checkbox
|
||||
|
||||
**Platform**: Bluesky
|
||||
|
||||
**Post**:
|
||||
"We have visibility into our supply chain."
|
||||
|
||||
Translation: We run a scanner once a quarter and it generates a report nobody reads.
|
||||
|
||||
"We monitor resource usage."
|
||||
|
||||
Translation: Prometheus metrics exist. We haven't looked at them in a year but they're technically there.
|
||||
|
||||
Real observability isn't a checkbox. It's a practice. It takes work. Most teams don't do it.
|
||||
|
||||
**CMO Note**: MILDLY RISKY. Could read as too critical of teams trying their best. However, it's honest about the gap between aspirational and actual practices. Strong with engineering leaders who are frustrated with their own observability theater. Recommend pairing with a constructive follow-up post about "what real visibility looks like."
|
||||
|
||||
---
|
||||
|
||||
## 3. Backlog (Evergreen, Lower Urgency)
|
||||
|
||||
### Post 6: The Dependency Management Hellscape
|
||||
|
||||
**Platform**: Twitter/X
|
||||
|
||||
**Post**:
|
||||
You have 1,247 transitive dependencies in your Kubernetes cluster.
|
||||
|
||||
You know what 14 of them do.
|
||||
|
||||
Nobody knows who maintains the other 1,233. Nobody knows what version of OpenSSL they actually use. If one of them breaks, you have a 2-week-long blame game ahead of you.
|
||||
|
||||
This is the cloud native era.
|
||||
|
||||
**CMO Note**: BACKLOG. Evergreen tech-industry grumbling. Node_modules meme energy. Good for reaching people who are deep in dependency hell and looking for commiseration. Can post anytime without losing relevance. Low risk, high relatability.
|
||||
|
||||
---
|
||||
|
||||
### Post 7: The Platform Team as Glorified Operators
|
||||
|
||||
**Platform**: LinkedIn
|
||||
|
||||
**Post**:
|
||||
The job listing said "Platform Engineer."
|
||||
|
||||
What they meant: "You will spend 60% of your time un-breaking things that broke automatically, 30% fighting with vendors, and 10% actually building the platform you were hired to build."
|
||||
|
||||
Platform engineering is good work. But we're not honest about what it is yet. It's operations wearing a different hat.
|
||||
|
||||
**CMO Note**: BACKLOG. Professional tone for LinkedIn. Speaks to platform engineering leaders who are exhausted. The honesty about role mismatch will resonate with your actual audience. Low controversy, high empathy. Works anytime. Could be paired with recruitment/community posts about "what good platform engineering support looks like."
|
||||
|
||||
---
|
||||
|
||||
## Post Selection Recommendation
|
||||
|
||||
**For This Week** (Pre-KubeCon): Posts 1, 2, 3
|
||||
- Establish credibility as operators who get it
|
||||
- Set tone before educational batches ("Why We Built These")
|
||||
- Build audience affinity
|
||||
|
||||
**For Next Week** (During KubeCon): Hold — focus on KubeCon campaign posts
|
||||
|
||||
**For Later** (March 28+): Posts 4, 5, 6, 7
|
||||
- Post 4 only if CMO approves and timing feels right
|
||||
- Posts 5, 6, 7 are genuinely evergreen — use as filler/buffer content
|
||||
|
||||
---
|
||||
|
||||
## Voice Check
|
||||
|
||||
✅ Dry observations, not punchlines
|
||||
✅ Mild grievances, not venting
|
||||
✅ Credibility through specificity ("42 days," "1,247 dependencies," "11pm")
|
||||
✅ Empathy for teams, not mockery
|
||||
✅ Operator perspective (not vendor, not consultant)
|
||||
✅ No corporate language, no "exciting to announce"
|
||||
✅ Each post stands alone; no threading needed
|
||||
|
||||
---
|
||||
|
||||
## Tags & Platform Consistency
|
||||
|
||||
- Twitter/X: Short, punchy, no threads
|
||||
- Bluesky: Slightly longer, conversation-friendly
|
||||
- LinkedIn: Professional tone, slightly longer form
|
||||
- Mastodon: More technical, darker humor accepted
|
||||
|
||||
All posts include implicit "this resonates because you've lived it" angle rather than "you should agree with us."
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
- Posts 1-3: Self-contained, can post anytime
|
||||
- Post 4: Requires "Why We Built These" context (posted 1+ week prior)
|
||||
- Posts 5-7: Evergreen, zero dependencies
|
||||
Reference in New Issue
Block a user