diff --git a/.agents/skills/prcheckloop/SKILL.md b/.agents/skills/prcheckloop/SKILL.md new file mode 100644 index 00000000..70d9e19a --- /dev/null +++ b/.agents/skills/prcheckloop/SKILL.md @@ -0,0 +1,209 @@ +--- +name: prcheckloop +description: > + Iteratively gets a GitHub pull request's checks green. Detects the PR for the + current branch or uses a provided PR number, waits for every check on the + latest head SHA to appear and finish, investigates failing checks, fixes + actionable code or test issues, pushes, and repeats. Escalates with a precise + blocker when failures are external, flaky, or not safely fixable. Use when a + PR still has unsuccessful checks after review fixes, including after greploop. +--- + +# PRCheckloop + +Get a GitHub PR to a fully green check state, or exit with a concrete blocker. + +## Scope + +- GitHub PRs only. If the repo is GitLab, stop and use `check-pr`. +- Focus on checks for the latest PR head SHA, not old commits. +- Focus on CI/status checks, not review comments or PR template cleanup. +- If the user also wants review-comment cleanup, pair this with `check-pr`. + +## Inputs + +- **PR number** (optional): If not provided, detect the PR for the current branch. +- **Max iterations**: default `5`. + +## Workflow + +### 1. Identify the PR + +If no PR number is provided, detect it from the current branch: + +```bash +gh pr view --json number,headRefName,headRefOid,url,isDraft +``` + +If needed, switch to the PR branch before making changes. + +Stop early if: + +- `gh` is not authenticated +- there is no PR for the branch +- the repo is not hosted on GitHub + +### 2. Track the latest head SHA + +Always work against the current PR head SHA: + +```bash +PR_JSON=$(gh pr view "$PR_NUMBER" --json number,headRefName,headRefOid,url) +HEAD_SHA=$(echo "$PR_JSON" | jq -r .headRefOid) +PR_URL=$(echo "$PR_JSON" | jq -r .url) +``` + +Ignore failing checks from older SHAs. After every push, refresh `HEAD_SHA` and +restart the inspection loop. + +### 3. Inventory checks for that SHA + +Fetch both GitHub check runs and legacy commit status contexts: + +```bash +gh api "repos/{owner}/{repo}/commits/$HEAD_SHA/check-runs?per_page=100" +gh api "repos/{owner}/{repo}/commits/$HEAD_SHA/status" +``` + +For a compact PR-level view, this GraphQL payload is useful: + +```bash +gh api graphql -f query=' +query($owner:String!, $repo:String!, $pr:Int!) { + repository(owner:$owner, name:$repo) { + pullRequest(number:$pr) { + headRefOid + url + statusCheckRollup { + contexts(first:100) { + nodes { + __typename + ... on CheckRun { name status conclusion detailsUrl workflowName } + ... on StatusContext { context state targetUrl description } + } + } + } + } + } +}' -F owner=OWNER -F repo=REPO -F pr="$PR_NUMBER" +``` + +### 4. Wait for checks to actually run + +After a new push, checks can take a moment to appear. Poll every 15-30 seconds +until one of these is true: + +- checks have appeared and every item is in a terminal state +- checks have appeared and at least one failed +- no checks appear after a reasonable wait, usually 2 minutes + +Treat these as terminal success states: + +- check runs: `SUCCESS`, `NEUTRAL`, `SKIPPED` +- status contexts: `SUCCESS` + +Treat these as pending: + +- check runs: `QUEUED`, `PENDING`, `WAITING`, `REQUESTED`, `IN_PROGRESS` +- status contexts: `PENDING` + +Treat these as failures: + +- check runs: `FAILURE`, `TIMED_OUT`, `CANCELLED`, `ACTION_REQUIRED`, `STARTUP_FAILURE`, `STALE` +- status contexts: `FAILURE`, `ERROR` + +If no checks appear for the latest SHA, inspect `.github/workflows/`, workflow +path filters, and branch protection expectations. If the missing check cannot be +caused or fixed from the repo, escalate. + +### 5. Investigate failing checks + +For GitHub Actions failures, inspect runs and failed logs for the current SHA: + +```bash +gh run list --commit "$HEAD_SHA" --json databaseId,workflowName,status,conclusion,url,headSha +gh run view --json databaseId,name,workflowName,status,conclusion,jobs,url,headSha +gh run view --log-failed +``` + +For each failing check, classify it: + +| Failure type | Action | +|---|---| +| Code/test regression | Reproduce locally, fix, and verify | +| Lint/type/build mismatch | Run the matching local command from the workflow and fix it | +| Flake or transient infra issue | Rerun once if evidence supports flakiness | +| External service/status app failure | Escalate with the details URL and owner guess | +| Missing secret/permission/branch protection issue | Escalate immediately | + +Only rerun a failed job once without code changes. Do not loop on reruns. + +### 6. Fix actionable failures + +If the failure is actionable from the checked-out code: + +1. Read the workflow or failing command to identify the real gate. +2. Reproduce locally where reasonable. +3. Make the smallest correct fix. +4. Run focused verification first, then broader verification if needed. +5. Commit in a logical commit. +6. Push before re-checking the PR. + +Do not stop at a local fix. The loop is only complete when the remote PR checks +for the new head SHA are green. + +### 7. Push and repeat + +After each fix: + +```bash +git push +sleep 5 +``` + +Then refresh the PR metadata, get the new `HEAD_SHA`, and restart from Step 3. + +Exit the loop only when: + +- all checks for the latest head SHA are green, or +- a blocker remains after reasonable repair effort, or +- the max iteration count is reached + +### 8. Escalate blockers precisely + +If you cannot get the PR green, report: + +- PR URL +- latest head SHA +- exact failing or missing check names +- details URLs +- what you already tried +- why it is blocked +- who should likely unblock it +- the next concrete action + +Good blocker examples: + +- external status app outage +- missing GitHub secret or permission +- required check name mismatch in branch protection +- persistent flake after one rerun +- failure needs credentials or infrastructure access you do not have + +## Output + +When the skill completes, report: + +- PR URL and branch +- final head SHA +- green/pending/failing check summary +- fixes made and verification run +- whether changes were pushed +- blocker summary if not fully green + +## Notes + +- This skill is intentionally narrower than `check-pr`: it is a repair loop for + PR checks. +- This skill complements `greploop`: Greptile can be perfect while CI is still + red. diff --git a/AGENTS.md b/AGENTS.md index dc5e9432..524c573a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -81,8 +81,8 @@ If you change schema/API behavior, update all impacted layers: 4. Do not replace strategic docs wholesale unless asked. Prefer additive updates. Keep `doc/SPEC.md` and `doc/SPEC-implementation.md` aligned. -5. Keep plan docs dated and centralized. -New plan documents belong in `doc/plans/` and should use `YYYY-MM-DD-slug.md` filenames. +5. Keep repo plan docs dated and centralized. +When you are creating a plan file in the repository itself, new plan documents belong in `doc/plans/` and should use `YYYY-MM-DD-slug.md` filenames. This does not replace Paperclip issue planning: if a Paperclip issue asks for a plan, update the issue `plan` document per the `paperclip` skill instead of creating a repo markdown file. ## 6. Database Change Workflow diff --git a/cli/src/__tests__/worktree.test.ts b/cli/src/__tests__/worktree.test.ts index 1c8ed5e1..3245da05 100644 --- a/cli/src/__tests__/worktree.test.ts +++ b/cli/src/__tests__/worktree.test.ts @@ -9,6 +9,8 @@ import { readSourceAttachmentBody, rebindWorkspaceCwd, resolveSourceConfigPath, + resolveWorktreeReseedSource, + resolveWorktreeReseedTargetPaths, resolveGitWorktreeAddArgs, resolveWorktreeMakeTargetPath, worktreeInitCommand, @@ -482,27 +484,69 @@ describe("worktree helpers", () => { } }); - it("requires an explicit source for worktree reseed", async () => { - const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "paperclip-worktree-reseed-source-")); - const repoRoot = path.join(tempRoot, "repo"); - const originalCwd = process.cwd(); - const originalPaperclipConfig = process.env.PAPERCLIP_CONFIG; + it("requires an explicit reseed source", () => { + expect(() => resolveWorktreeReseedSource({})).toThrow( + "Pass --from or --from-config/--from-instance explicitly so the reseed source is unambiguous.", + ); + }); + + it("rejects mixed reseed source selectors", () => { + expect(() => resolveWorktreeReseedSource({ + from: "current", + fromInstance: "default", + })).toThrow( + "Use either --from or --from-config/--from-data-dir/--from-instance, not both.", + ); + }); + + it("derives worktree reseed target paths from the adjacent env file", () => { + const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "paperclip-worktree-reseed-target-")); + const worktreeRoot = path.join(tempRoot, "repo"); + const configPath = path.join(worktreeRoot, ".paperclip", "config.json"); + const envPath = path.join(worktreeRoot, ".paperclip", ".env"); try { - fs.mkdirSync(repoRoot, { recursive: true }); - delete process.env.PAPERCLIP_CONFIG; - process.chdir(repoRoot); - - await expect(worktreeReseedCommand({ seed: false, yes: true })).rejects.toThrow( - "Reseed requires an explicit source.", + fs.mkdirSync(path.dirname(configPath), { recursive: true }); + fs.writeFileSync(configPath, JSON.stringify(buildSourceConfig()), "utf8"); + fs.writeFileSync( + envPath, + [ + "PAPERCLIP_HOME=/tmp/paperclip-worktrees", + "PAPERCLIP_INSTANCE_ID=pap-1132-chat", + ].join("\n"), + "utf8", ); + expect( + resolveWorktreeReseedTargetPaths({ + configPath, + rootPath: worktreeRoot, + }), + ).toMatchObject({ + cwd: worktreeRoot, + homeDir: "/tmp/paperclip-worktrees", + instanceId: "pap-1132-chat", + }); + } finally { + fs.rmSync(tempRoot, { recursive: true, force: true }); + } + }); + + it("rejects reseed targets without worktree env metadata", () => { + const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "paperclip-worktree-reseed-target-missing-")); + const worktreeRoot = path.join(tempRoot, "repo"); + const configPath = path.join(worktreeRoot, ".paperclip", "config.json"); + + try { + fs.mkdirSync(path.dirname(configPath), { recursive: true }); + fs.writeFileSync(configPath, JSON.stringify(buildSourceConfig()), "utf8"); + fs.writeFileSync(path.join(worktreeRoot, ".paperclip", ".env"), "", "utf8"); + + expect(() => + resolveWorktreeReseedTargetPaths({ + configPath, + rootPath: worktreeRoot, + })).toThrow("does not look like a worktree-local Paperclip instance"); } finally { - process.chdir(originalCwd); - if (originalPaperclipConfig === undefined) { - delete process.env.PAPERCLIP_CONFIG; - } else { - process.env.PAPERCLIP_CONFIG = originalPaperclipConfig; - } fs.rmSync(tempRoot, { recursive: true, force: true }); } }); @@ -529,6 +573,7 @@ describe("worktree helpers", () => { try { fs.mkdirSync(path.dirname(currentPaths.configPath), { recursive: true }); fs.mkdirSync(path.dirname(sourcePaths.configPath), { recursive: true }); + fs.mkdirSync(path.dirname(sourcePaths.secretsKeyFilePath), { recursive: true }); fs.mkdirSync(repoRoot, { recursive: true }); fs.mkdirSync(sourceRoot, { recursive: true }); @@ -546,6 +591,7 @@ describe("worktree helpers", () => { }); fs.writeFileSync(currentPaths.configPath, JSON.stringify(currentConfig, null, 2), "utf8"); fs.writeFileSync(sourcePaths.configPath, JSON.stringify(sourceConfig, null, 2), "utf8"); + fs.writeFileSync(sourcePaths.secretsKeyFilePath, "source-secret", "utf8"); fs.writeFileSync( currentPaths.envPath, [ @@ -562,7 +608,6 @@ describe("worktree helpers", () => { await worktreeReseedCommand({ fromConfig: sourcePaths.configPath, - seed: false, yes: true, }); @@ -584,7 +629,7 @@ describe("worktree helpers", () => { } fs.rmSync(tempRoot, { recursive: true, force: true }); } - }); + }, 20_000); it("restores the current worktree config and instance data if reseed fails", async () => { const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "paperclip-worktree-reseed-rollback-")); diff --git a/cli/src/commands/worktree.ts b/cli/src/commands/worktree.ts index 3025e955..963ae5e8 100644 --- a/cli/src/commands/worktree.ts +++ b/cli/src/commands/worktree.ts @@ -98,22 +98,6 @@ type WorktreeMakeOptions = WorktreeInitOptions & { startPoint?: string; }; -type WorktreeReseedOptions = { - fromConfig?: string; - fromDataDir?: string; - fromInstance?: string; - home?: string; - seedMode?: string; - yes?: boolean; - seed?: boolean; -}; - -type WorktreeReseedBackup = { - tempRoot: string; - repoConfigDirBackup: string | null; - instanceRootBackup: string | null; -}; - type WorktreeEnvOptions = { config?: string; json?: boolean; @@ -133,6 +117,17 @@ type WorktreeMergeHistoryOptions = { yes?: boolean; }; +type WorktreeReseedOptions = { + from?: string; + to?: string; + fromConfig?: string; + fromDataDir?: string; + fromInstance?: string; + seedMode?: string; + yes?: boolean; + allowLiveTarget?: boolean; +}; + type EmbeddedPostgresInstance = { initialise(): Promise; start(): Promise; @@ -738,6 +733,65 @@ export function resolveSourceConfigPath(opts: WorktreeInitOptions): string { return path.resolve(sourceHome, "instances", sourceInstanceId, "config.json"); } +export function resolveWorktreeReseedSource(input: WorktreeReseedOptions): ResolvedWorktreeReseedSource { + const fromSelector = nonEmpty(input.from); + const fromConfig = nonEmpty(input.fromConfig); + const fromDataDir = nonEmpty(input.fromDataDir); + const fromInstance = nonEmpty(input.fromInstance); + const hasExplicitConfigSource = Boolean(fromConfig || fromDataDir || fromInstance); + + if (fromSelector && hasExplicitConfigSource) { + throw new Error( + "Use either --from or --from-config/--from-data-dir/--from-instance, not both.", + ); + } + + if (fromSelector) { + const endpoint = resolveWorktreeEndpointFromSelector(fromSelector, { allowCurrent: true }); + return { + configPath: endpoint.configPath, + label: endpoint.label, + }; + } + + if (hasExplicitConfigSource) { + const configPath = resolveSourceConfigPath({ + fromConfig: fromConfig ?? undefined, + fromDataDir: fromDataDir ?? undefined, + fromInstance: fromInstance ?? undefined, + }); + return { + configPath, + label: configPath, + }; + } + + throw new Error( + "Pass --from or --from-config/--from-instance explicitly so the reseed source is unambiguous.", + ); +} + +export function resolveWorktreeReseedTargetPaths(input: { + configPath: string; + rootPath: string; +}): WorktreeLocalPaths { + const envEntries = readPaperclipEnvEntries(resolvePaperclipEnvFile(input.configPath)); + const homeDir = nonEmpty(envEntries.PAPERCLIP_HOME); + const instanceId = nonEmpty(envEntries.PAPERCLIP_INSTANCE_ID); + + if (!homeDir || !instanceId) { + throw new Error( + `Target config ${input.configPath} does not look like a worktree-local Paperclip instance. Expected PAPERCLIP_HOME and PAPERCLIP_INSTANCE_ID in the adjacent .env.`, + ); + } + + return resolveWorktreeLocalPaths({ + cwd: input.rootPath, + homeDir, + instanceId, + }); +} + function resolveSourceConnectionString(config: PaperclipConfig, envEntries: Record, portOverride?: number): string { if (config.database.mode === "postgres") { const connectionString = nonEmpty(envEntries.DATABASE_URL) ?? nonEmpty(config.database.connectionString); @@ -894,6 +948,8 @@ async function seedWorktreeDatabase(input: { input.sourceConfig.database.embeddedPostgresDataDir, input.sourceConfig.database.embeddedPostgresPort, ); + const sourceAdminConnectionString = `postgres://paperclip:paperclip@127.0.0.1:${sourceHandle.port}/postgres`; + await ensurePostgresDatabase(sourceAdminConnectionString, "paperclip"); } const sourceConnectionString = resolveSourceConnectionString( input.sourceConfig, @@ -1068,160 +1124,6 @@ export async function worktreeInitCommand(opts: WorktreeInitOptions): Promise { - if (!existsSync(sourcePath)) { - return null; - } - await fsPromises.cp(sourcePath, targetPath, { recursive: true }); - return targetPath; -} - -async function snapshotWorktreeReseedState(target: { - repoConfigDir: string; - instanceRoot: string; -}): Promise { - const tempRoot = await fsPromises.mkdtemp(path.join(os.tmpdir(), "paperclip-worktree-reseed-backup-")); - return { - tempRoot, - repoConfigDirBackup: await snapshotDirectory( - target.repoConfigDir, - path.resolve(tempRoot, "repo-config"), - ), - instanceRootBackup: await snapshotDirectory( - target.instanceRoot, - path.resolve(tempRoot, "instance-root"), - ), - }; -} - -async function restoreDirectoryBackup(backupPath: string | null, targetPath: string): Promise { - rmSync(targetPath, { recursive: true, force: true }); - if (!backupPath) { - return; - } - await fsPromises.cp(backupPath, targetPath, { recursive: true }); -} - -async function restoreWorktreeReseedState( - backup: WorktreeReseedBackup, - target: { repoConfigDir: string; instanceRoot: string }, -): Promise { - await restoreDirectoryBackup(backup.repoConfigDirBackup, target.repoConfigDir); - await restoreDirectoryBackup(backup.instanceRootBackup, target.instanceRoot); -} - -export async function worktreeReseedCommand(opts: WorktreeReseedOptions): Promise { - printPaperclipCliBanner(); - p.intro(pc.bgCyan(pc.black(" paperclipai worktree reseed "))); - - if (!hasExplicitSourceSelection(opts)) { - throw new Error( - "Reseed requires an explicit source. Pass --from-config or --from-instance (optionally with --from-data-dir).", - ); - } - - const target = resolveCurrentWorktreeReseedState({ home: opts.home }); - const sourceConfigPath = resolveSourceConfigPath(opts); - if (path.resolve(sourceConfigPath) === target.currentConfigPath) { - throw new Error( - "Source and target Paperclip configs are the same. Pass a different source instance/config when reseeding.", - ); - } - - const seedMode = opts.seedMode ?? "minimal"; - if (!isWorktreeSeedMode(seedMode)) { - throw new Error(`Unsupported seed mode "${seedMode}". Expected one of: minimal, full.`); - } - - const confirmed = opts.yes - ? true - : await p.confirm({ - message: `Reseed the current worktree instance (${target.instanceId}) from ${sourceConfigPath}? This overwrites only the current worktree Paperclip instance data.`, - initialValue: false, - }); - if (p.isCancel(confirmed) || !confirmed) { - p.log.warn("Reseed cancelled."); - return; - } - - const targetPaths = resolveWorktreeLocalPaths({ - cwd: process.cwd(), - homeDir: target.homeDir, - instanceId: target.instanceId, - }); - const backup = await snapshotWorktreeReseedState(targetPaths); - - try { - await runWorktreeInit({ - name: target.worktreeName, - color: target.worktreeColor, - instance: target.instanceId, - home: target.homeDir, - fromConfig: opts.fromConfig, - fromDataDir: opts.fromDataDir, - fromInstance: opts.fromInstance, - sourceConfigPathOverride: sourceConfigPath, - serverPort: target.serverPort, - dbPort: target.dbPort, - seed: opts.seed ?? true, - seedMode, - force: true, - }); - } catch (error) { - await restoreWorktreeReseedState(backup, targetPaths); - throw error; - } finally { - rmSync(backup.tempRoot, { recursive: true, force: true }); - } -} - export async function worktreeMakeCommand(nameArg: string, opts: WorktreeMakeOptions): Promise { printPaperclipCliBanner(); p.intro(pc.bgCyan(pc.black(" paperclipai worktree:make "))); @@ -1326,6 +1228,11 @@ type ResolvedWorktreeEndpoint = { isCurrent: boolean; }; +type ResolvedWorktreeReseedSource = { + configPath: string; + label: string; +}; + function parseGitWorktreeList(cwd: string): GitWorktreeListEntry[] { const raw = execFileSync("git", ["worktree", "list", "--porcelain"], { cwd, @@ -1819,6 +1726,13 @@ function renderMergePlan(plan: Awaited>["pla return lines.join("\n"); } +function resolveRunningEmbeddedPostgresPid(config: PaperclipConfig): number | null { + if (config.database.mode !== "embedded-postgres") { + return null; + } + return readRunningPostmasterPid(path.resolve(config.database.embeddedPostgresDataDir, "postmaster.pid")); +} + async function collectMergePlan(input: { sourceDb: ClosableDb; targetDb: ClosableDb; @@ -2760,6 +2674,89 @@ export async function worktreeMergeHistoryCommand(sourceArg: string | undefined, } } +export async function worktreeReseedCommand(opts: WorktreeReseedOptions): Promise { + printPaperclipCliBanner(); + p.intro(pc.bgCyan(pc.black(" paperclipai worktree reseed "))); + + const seedMode = opts.seedMode ?? "full"; + if (!isWorktreeSeedMode(seedMode)) { + throw new Error(`Unsupported seed mode "${seedMode}". Expected one of: minimal, full.`); + } + + const targetEndpoint = opts.to + ? resolveWorktreeEndpointFromSelector(opts.to, { allowCurrent: true }) + : resolveCurrentEndpoint(); + const source = resolveWorktreeReseedSource(opts); + + if (path.resolve(source.configPath) === path.resolve(targetEndpoint.configPath)) { + throw new Error("Source and target Paperclip configs are the same. Choose different --from/--to values."); + } + if (!existsSync(source.configPath)) { + throw new Error(`Source config not found at ${source.configPath}.`); + } + + const targetConfig = readConfig(targetEndpoint.configPath); + if (!targetConfig) { + throw new Error(`Target config not found at ${targetEndpoint.configPath}.`); + } + const sourceConfig = readConfig(source.configPath); + if (!sourceConfig) { + throw new Error(`Source config not found at ${source.configPath}.`); + } + + const targetPaths = resolveWorktreeReseedTargetPaths({ + configPath: targetEndpoint.configPath, + rootPath: targetEndpoint.rootPath, + }); + const runningTargetPid = resolveRunningEmbeddedPostgresPid(targetConfig); + if (runningTargetPid && !opts.allowLiveTarget) { + throw new Error( + `Target worktree database appears to be running (pid ${runningTargetPid}). Stop Paperclip in ${targetEndpoint.rootPath} before reseeding, or re-run with --allow-live-target if you want to override this guard.`, + ); + } + + const confirmed = opts.yes + ? true + : await p.confirm({ + message: `Overwrite the isolated Paperclip DB for ${targetEndpoint.label} from ${source.label} using ${seedMode} seed mode?`, + initialValue: false, + }); + if (p.isCancel(confirmed) || !confirmed) { + p.log.warn("Reseed cancelled."); + return; + } + + if (runningTargetPid && opts.allowLiveTarget) { + p.log.warning(`Proceeding even though the target embedded PostgreSQL appears to be running (pid ${runningTargetPid}).`); + } + + const spinner = p.spinner(); + spinner.start(`Reseeding ${targetEndpoint.label} from ${source.label} (${seedMode})...`); + try { + const seeded = await seedWorktreeDatabase({ + sourceConfigPath: source.configPath, + sourceConfig, + targetConfig, + targetPaths, + instanceId: targetPaths.instanceId, + seedMode, + }); + spinner.stop(`Reseeded ${targetEndpoint.label} (${seedMode}).`); + p.log.message(pc.dim(`Source: ${source.configPath}`)); + p.log.message(pc.dim(`Target: ${targetEndpoint.configPath}`)); + p.log.message(pc.dim(`Seed snapshot: ${seeded.backupSummary}`)); + for (const rebound of seeded.reboundWorkspaces) { + p.log.message( + pc.dim(`Rebound workspace ${rebound.name}: ${rebound.fromCwd} -> ${rebound.toCwd}`), + ); + } + p.outro(pc.green(`Reseed complete for ${targetEndpoint.label}.`)); + } catch (error) { + spinner.stop(pc.red("Failed to reseed worktree database.")); + throw error; + } +} + export function registerWorktreeCommands(program: Command): void { const worktree = program.command("worktree").description("Worktree-local Paperclip instance helpers"); @@ -2803,17 +2800,6 @@ export function registerWorktreeCommands(program: Command): void { .option("--json", "Print JSON instead of shell exports") .action(worktreeEnvCommand); - worktree - .command("reseed") - .description("Replace the current worktree instance with a fresh seed while preserving this worktree's ports and instance id") - .option("--from-config ", "Source config.json to seed from") - .option("--from-data-dir ", "Source PAPERCLIP_HOME used when deriving the source config") - .option("--from-instance ", "Source instance id when deriving the source config") - .option("--home ", `Home root for worktree instances (env: PAPERCLIP_WORKTREES_DIR, default: ${DEFAULT_WORKTREE_HOME})`) - .option("--seed-mode ", "Seed profile: minimal or full (default: minimal)", "minimal") - .option("--yes", "Skip the destructive confirmation prompt", false) - .action(worktreeReseedCommand); - program .command("worktree:list") .description("List git worktrees visible from this repo and whether they look like Paperclip worktrees") @@ -2833,6 +2819,19 @@ export function registerWorktreeCommands(program: Command): void { .option("--yes", "Skip the interactive confirmation prompt when applying", false) .action(worktreeMergeHistoryCommand); + worktree + .command("reseed") + .description("Re-seed an existing worktree-local instance from another Paperclip instance or worktree") + .option("--from ", "Source worktree path, directory name, branch name, or current") + .option("--to ", "Target worktree path, directory name, branch name, or current (defaults to current)") + .option("--from-config ", "Source config.json to seed from") + .option("--from-data-dir ", "Source PAPERCLIP_HOME used when deriving the source config") + .option("--from-instance ", "Source instance id when deriving the source config") + .option("--seed-mode ", "Seed profile: minimal or full (default: full)", "full") + .option("--yes", "Skip the destructive confirmation prompt", false) + .option("--allow-live-target", "Override the guard that requires the target worktree DB to be stopped first", false) + .action(worktreeReseedCommand); + program .command("worktree:cleanup") .description("Safely remove a worktree, its branch, and its isolated instance data") diff --git a/doc/DEVELOPING.md b/doc/DEVELOPING.md index 6aa30237..724496e9 100644 --- a/doc/DEVELOPING.md +++ b/doc/DEVELOPING.md @@ -232,14 +232,38 @@ pnpm paperclipai worktree init --force --seed-mode minimal \ That rewrites the worktree-local `.paperclip/config.json` + `.paperclip/.env`, recreates the isolated instance under `~/.paperclip-worktrees/instances//`, and preserves the git worktree contents themselves. -For existing worktrees, prefer the dedicated reseed command instead of rebuilding the `worktree init --force` flags manually: +For an already-created worktree where you want to keep the existing repo-local config/env and only overwrite the isolated database, use `worktree reseed` instead. Stop the target worktree's Paperclip server first so the command can replace the DB safely. + +**`pnpm paperclipai worktree reseed [options]`** — Re-seed an existing worktree-local instance from another Paperclip instance or worktree while preserving the target worktree's current config, ports, and instance identity. + +| Option | Description | +|---|---| +| `--from ` | Source worktree path, directory name, branch name, or `current` | +| `--to ` | Target worktree path, directory name, branch name, or `current` (defaults to `current`) | +| `--from-config ` | Source config.json to seed from | +| `--from-data-dir ` | Source `PAPERCLIP_HOME` used when deriving the source config | +| `--from-instance ` | Source instance id when deriving the source config | +| `--seed-mode ` | Seed profile: `minimal` or `full` (default: `full`) | +| `--yes` | Skip the destructive confirmation prompt | +| `--allow-live-target` | Override the guard that requires the target worktree DB to be stopped first | + +Examples: ```sh -cd /path/to/existing/worktree -pnpm paperclipai worktree reseed --from-config /path/to/source/.paperclip/config.json --seed-mode full -``` +# From the main repo, reseed a worktree from the current default/master instance. +cd /path/to/paperclip +pnpm paperclipai worktree reseed \ + --from current \ + --to PAP-1132-assistant-ui-pap-1131-make-issues-comments-be-like-a-chat \ + --seed-mode full \ + --yes -`worktree reseed` preserves the current worktree's instance id, ports, and branding while replacing only that worktree's isolated Paperclip instance data from the chosen source. +# From inside a worktree, reseed it from the default instance config. +cd /path/to/paperclip/.paperclip/worktrees/PAP-1132-assistant-ui-pap-1131-make-issues-comments-be-like-a-chat +pnpm paperclipai worktree reseed \ + --from-instance default \ + --seed-mode full +``` **`pnpm paperclipai worktree:make [options]`** — Create `~/NAME` as a git worktree, then initialize an isolated Paperclip instance inside it. This combines `git worktree add` with `worktree init` in a single step. @@ -267,17 +291,6 @@ pnpm paperclipai worktree:make experiment --no-seed **`pnpm paperclipai worktree env [options]`** — Print shell exports for the current worktree-local Paperclip instance. -**`pnpm paperclipai worktree reseed [options]`** — Replace the current worktree instance with a fresh seed from another Paperclip source while preserving the current worktree's ports and instance id. - -| Option | Description | -|---|---| -| `--from-config ` | Source config.json to seed from | -| `--from-data-dir ` | Source `PAPERCLIP_HOME` used when deriving the source config | -| `--from-instance ` | Source instance id when deriving the source config | -| `--home ` | Home root for worktree instances (default: `~/.paperclip-worktrees`) | -| `--seed-mode ` | Seed profile: `minimal` or `full` (default: `minimal`) | -| `--yes` | Skip the destructive confirmation prompt | - | Option | Description | |---|---| | `-c, --config ` | Path to config file | diff --git a/doc/PUBLISHING.md b/doc/PUBLISHING.md index 83a6c4a7..db56540d 100644 --- a/doc/PUBLISHING.md +++ b/doc/PUBLISHING.md @@ -115,6 +115,38 @@ If the first real publish returns npm `E404`, check npm-side prerequisites befor - The initial publish must include `--access public` for a public scoped package. - npm also requires either account 2FA for publishing or a granular token that is allowed to bypass 2FA. +### Manual first publish for `@paperclipai/mcp-server` + +If you need to publish only the MCP server package once by hand, use: + +- `@paperclipai/mcp-server` + +Recommended flow from the repo root: + +```bash +# optional sanity check: this 404s until the first publish exists +npm view @paperclipai/mcp-server version + +# make sure the build output is fresh +pnpm --filter @paperclipai/mcp-server build + +# confirm your local npm auth before the real publish +npm whoami + +# safe preview of the exact publish payload +cd packages/mcp-server +pnpm publish --dry-run --no-git-checks --access public + +# real publish +pnpm publish --no-git-checks --access public +``` + +Notes: + +- Publish from `packages/mcp-server/`, not the repo root. +- If `npm view @paperclipai/mcp-server version` already returns the same version that is in [`packages/mcp-server/package.json`](../packages/mcp-server/package.json), do not republish. Bump the version or use the normal repo-wide release flow in [`scripts/release.sh`](../scripts/release.sh). +- The same npm-side prerequisites apply as above: valid npm auth, permission to publish to the `@paperclipai` scope, `--access public`, and the required publish auth/2FA policy. + ## Version formats Paperclip uses calendar versions: diff --git a/doc/plans/2026-04-07-issue-detail-speed-and-optimistic-inventory.md b/doc/plans/2026-04-07-issue-detail-speed-and-optimistic-inventory.md new file mode 100644 index 00000000..b9c2f0a8 --- /dev/null +++ b/doc/plans/2026-04-07-issue-detail-speed-and-optimistic-inventory.md @@ -0,0 +1,302 @@ +# 2026-04-07 Issue Detail Speed And Optimistic Inventory + +Status: Proposed +Date: 2026-04-07 +Audience: Product and engineering +Related: +- `ui/src/pages/IssueDetail.tsx` +- `ui/src/components/IssueProperties.tsx` +- `ui/src/api/issues.ts` +- `ui/src/lib/queryKeys.ts` +- `server/src/routes/issues.ts` +- `server/src/services/issues.ts` +- [PAP-1192](/PAP/issues/PAP-1192) +- [PAP-1191](/PAP/issues/PAP-1191) +- [PAP-1188](/PAP/issues/PAP-1188) +- [PAP-1119](/PAP/issues/PAP-1119) +- [PAP-945](/PAP/issues/PAP-945) +- [PAP-1165](/PAP/issues/PAP-1165) +- [PAP-890](/PAP/issues/PAP-890) +- [PAP-254](/PAP/issues/PAP-254) +- [PAP-138](/PAP/issues/PAP-138) + +## 1. Purpose + +This note inventories the Paperclip issues that point to the same UX class of problem: + +- pages feel slow because they over-fetch or refetch too much +- actions feel slow because the UI waits for the round trip before reflecting obvious local intent +- optimistic updates exist in some places, but not in a consistent system + +The immediate trigger is [PAP-1192](/PAP/issues/PAP-1192): the issue detail page now feels very slow. + +## 2. Short Answer + +The issue detail page is not obviously blocked by one pathological endpoint. The main problem is the shape of the page: + +- `IssueDetail` fans out into many independent queries on mount +- some of those queries fetch full company-wide collections for data that is local to one issue +- common mutations invalidate almost every issue-related query, which creates avoidable refetch storms +- the page has only a minimal top-level `Loading...` fallback and very little staged or sectional loading UX + +Measured against the current assigned issue (`PAP-1191`) on local dev, the slowest single request was the full company issues list: + +- `GET /api/issues/:id` about `18ms` +- `GET /api/issues/:id/comments|activity|approvals|attachments` about `6-8ms` +- `GET /api/companies/:companyId/agents|projects` about `9-11ms` +- `GET /api/companies/:companyId/issues` about `76ms` + +That strongly suggests the current pain is aggregate client fan-out plus over-broad invalidation, not one obviously broken endpoint. + +## 3. Similar Issue Inventory + +## 3.1 Issue-detail and issue-action siblings + +- [PAP-1192](/PAP/issues/PAP-1192): issue page feels like it loads forever +- [PAP-1188](/PAP/issues/PAP-1188): assignee changes in the issue properties pane were slow and needed optimistic UI +- [PAP-945](/PAP/issues/PAP-945): optimistic comment rendering +- [PAP-1003](/PAP/issues/PAP-1003): optimistic comments had duplicate draft/pending behavior +- [PAP-947](/PAP/issues/PAP-947): follow-up breakage from optimistic comments +- [PAP-254](/PAP/issues/PAP-254): long issue threads become sluggish when adding comments +- [PAP-189](/PAP/issues/PAP-189): comment semantics while an issue has a live run + +Pattern: the issue page already has a history of needing both optimistic behavior and bounded thread/loading behavior. `PAP-1192` is the same family, not a new category. + +## 3.2 Inbox and list-view siblings + +- [PAP-1119](/PAP/issues/PAP-1119): optimistic archive had fade-out then snap-back +- [PAP-1165](/PAP/issues/PAP-1165): issue search slow +- [PAP-890](/PAP/issues/PAP-890): issue search slow, make it very fast +- [PAP-138](/PAP/issues/PAP-138): inbox loading feels stuck +- [PAP-470](/PAP/issues/PAP-470): create-issue save state felt slow and awkward + +Pattern: Paperclip already has several places where the right fix was "show intent immediately, then reconcile," not "wait for refetch." + +## 3.3 Broader app-loading siblings + +- [PAP-472](/PAP/issues/PAP-472): dashboard charts load very slowly +- [PAP-797](/PAP/issues/PAP-797): reduce loading states through static generation/caching where possible +- [PAP-799](/PAP/issues/PAP-799): embed company data at build time to eliminate loading states +- [PAP-703](/PAP/issues/PAP-703): faster chat and better visual feedback + +Pattern: the product has recurring pressure to reduce blank/loading states across the app, so the issue-detail work should fit that broader direction. + +## 4. Current Issue Detail Findings + +## 4.1 Mount query fan-out is high + +`ui/src/pages/IssueDetail.tsx` mounts all of these data sources up front: + +- issue detail +- comments +- activity +- linked runs +- linked approvals +- attachments +- live runs +- active run +- full company issues list +- agents list +- auth session +- projects list +- feedback votes +- instance general settings +- plugin slots + +This is too much for the initial view of a single issue. + +## 4.2 The page fetches full company issue data just to derive child issues + +`IssueDetail` currently does: + +- `issuesApi.list(selectedCompanyId!)` +- then filters client-side for `parentId === issue.id` + +That is expensive relative to the need. + +Important detail: + +- the server route already supports `parentId` +- `server/src/services/issues.ts` already supports `parentId` +- but `ui/src/api/issues.ts` does not expose `parentId` in the filter type + +So the client is missing an already-supported narrow query path. + +## 4.3 Comments are still fetched as full-thread loads + +`server/src/routes/issues.ts` and `server/src/services/issues.ts` already support: + +- `after` +- `order` +- `limit` + +But `IssueDetail` still calls `issuesApi.listComments(issueId)` with no cursor or limit and then re-invalidates the full thread after common comment actions. + +That means we already have the server-side building blocks for incremental comment loading, but the page is not using them. + +## 4.4 Cache invalidation is broader than necessary + +`invalidateIssue()` in `IssueDetail` invalidates: + +- detail +- activity +- runs +- approvals +- feedback votes +- attachments +- documents +- live runs +- active run +- multiple issue collections +- sidebar badges + +That is acceptable for correctness, but it is expensive for perceived speed and makes optimistic work feel less stable because the page keeps re-painting from fresh network results. + +## 4.5 Live run state is fetched twice + +The page polls both: + +- `issues.liveRuns(issueId)` every 3s +- `issues.activeRun(issueId)` every 3s + +That is duplicate polling for closely related state. + +## 4.6 Properties panel duplicates more list fetching + +`ui/src/components/IssueProperties.tsx` fetches: + +- session +- agents list +- projects list +- labels +- and, when the blocker picker opens, the full company issues list + +The page and panel are each doing their own list work instead of sharing a narrower issue-detail data model. + +## 4.7 The perceived loading UX is too thin + +`IssueDetail` only shows: + +- plain `Loading...` while the main issue query is pending + +After that, many sub-sections can appear empty or incomplete until their own queries resolve. That makes the page feel slower than the raw request times suggest. + +## 5. Recommended Plan + +## 5.1 Phase 1: Fix perceived speed first + +Ship UX changes that make the page feel immediate before deeper backend reshaping: + +- replace the plain `Loading...` state with an issue-detail skeleton +- give comments, activity, attachments, and sub-issues their own skeleton/empty/loading states +- preserve visible stale data during refetch instead of clearing sections +- show explicit pending state for local actions that are already optimistic + +Why first: + +- it improves the user-facing feel immediately +- it reduces the chance that later data changes still feel slow because the page flashes blank + +## 5.2 Phase 2: Stop fetching the full company issues list for child issues + +Add `parentId` to the `issuesApi.list(...)` filter type and switch `IssueDetail` to: + +- fetch child issues only +- stop loading the full company issue collection on page mount + +This is the highest-confidence narrow win because the server path already exists. + +## 5.3 Phase 3: Convert comments to a bounded + incremental model + +Use the existing server support for: + +- latest comment cursor from heartbeat context or issue bootstrap +- incremental fetch with `after` +- bounded initial fetch with `limit` + +Suggested behavior: + +- first load: fetch the latest N comments +- offer `load earlier` for long threads +- after posting or on live updates: append incrementally instead of invalidating the whole thread + +This should address the same performance family as [PAP-254](/PAP/issues/PAP-254). + +## 5.4 Phase 4: Reduce duplicate polling and invalidation + +Tighten the runtime side of the page: + +- collapse `liveRuns` and `activeRun` into one client source if possible +- stop invalidating unrelated issue collections after mutations that only affect the current issue +- merge server responses into cache where we already have enough information + +Examples: + +- posting a comment should not force a broad company issue list refetch unless list-visible metadata changed +- attachment changes should not invalidate approvals or unrelated live-run queries + +## 5.5 Phase 5: Consider an issue-detail bootstrap contract + +If the page is still too chatty after the client fixes, add one tailored bootstrap surface for the issue detail page. + +Potential bootstrap payload: + +- issue core data +- child issue summaries +- latest comment cursor and recent comment page +- live run summary +- attachment summaries +- approval summaries +- any lightweight mention/selector metadata truly needed at first paint + +This should happen after the obvious client overfetch fixes, not before. + +## 6. Concrete Opportunities By Surface + +## 6.1 Issue detail page + +- narrow child issue fetch from full list to `parentId` +- stage loading by section instead of all-or-nothing perception +- bound initial comments payload +- reduce duplicate live-run polling +- replace broad invalidation with targeted cache writes + +## 6.2 Issue properties panel + +- reuse page-level agents/projects data where possible +- fetch blockers lazily and narrowly +- keep local optimistic field updates without broad page invalidation + +## 6.3 Thread/comment UX + +- append optimistic comments directly into the visible thread +- keep queued/pending comment state stable during reconciliation +- fetch only new comments after the last known cursor + +## 6.4 Cross-app optimistic consistency + +The same standards should apply to: + +- issue archive/unarchive +- issue property edits +- create issue/sub-issue flows +- comment posting +- attachment/document actions where the local result is obvious + +## 7. Suggested Execution Order + +1. `PAP-1192`: issue-detail skeletons and staged loading +2. add `parentId` support to `ui/src/api/issues.ts` and switch child-issue fetching to a narrow query +3. move comments to bounded initial load plus incremental updates +4. shrink invalidation and polling scope +5. only then decide whether a new issue-detail bootstrap endpoint is still needed + +## 8. Success Criteria + +This inventory is successful if the follow-up implementation makes the issue page behave like this: + +1. navigating to an issue shows a shaped skeleton immediately, not plain text +2. the page no longer fetches the full company issue list just to render sub-issues +3. long threads do not require full-thread fetches on every load or comment mutation +4. local actions feel immediate and do not snap back because of broad invalidation +5. the issue page feels faster even when absolute backend timings are already reasonable diff --git a/doc/plans/2026-04-07-pi-hooks-survey.md b/doc/plans/2026-04-07-pi-hooks-survey.md new file mode 100644 index 00000000..bc67a704 --- /dev/null +++ b/doc/plans/2026-04-07-pi-hooks-survey.md @@ -0,0 +1,248 @@ +# Pi Hook Survey + +Status: investigation note +Date: 2026-04-07 + +## Why this exists + +We were asked to find the hook surfaces exposed by `pi` and `pi-mono`, then decide which ideas transfer cleanly into Paperclip. + +This note is based on direct source inspection of: + +- `badlogic/pi` default branch and `pi2` branch +- `badlogic/pi-mono` `packages/coding-agent` +- current Paperclip plugin and adapter surfaces in this repo + +## Short answer + +- Current `pi` does not expose a comparable extension hook API. What it exposes today is a JSON event stream from `pi-agent`. +- `pi-mono` does expose a real extension hook system. It is broad, typed, and intentionally allows mutation of agent/runtime behavior. +- Paperclip should copy only the safe subset: + - typed event subscriptions + - read-only run lifecycle events + - explicit worker lifecycle hooks + - plugin-to-plugin events +- Paperclip should not copy the dangerous subset: + - arbitrary mutation hooks on core control-plane decisions + - project-local plugin loading + - built-in tool shadowing by name collision + +## What `pi` has today + +Current `badlogic/pi` is primarily a GPU pod manager plus a lightweight agent runner. It does not expose a `pi.on(...)`-style extension API like `pi-mono`. + +The closest thing to hooks is the `pi-agent --json` event stream: + +- `session_start` +- `user_message` +- `assistant_start` +- `assistant_message` +- `thinking` +- `tool_call` +- `tool_result` +- `token_usage` +- `error` +- `interrupted` + +That makes `pi` useful as an event producer, but not as a host for third-party runtime interception. + +## What `pi-mono` has + +`pi-mono` exposes a real extension API through `packages/coding-agent/src/core/extensions/types.ts`. + +### Extension event hooks + +Verified `pi.on(...)` hook names: + +- `resources_discover` +- `session_start` +- `session_before_switch` +- `session_before_fork` +- `session_before_compact` +- `session_compact` +- `session_shutdown` +- `session_before_tree` +- `session_tree` +- `context` +- `before_provider_request` +- `before_agent_start` +- `agent_start` +- `agent_end` +- `turn_start` +- `turn_end` +- `message_start` +- `message_update` +- `message_end` +- `tool_execution_start` +- `tool_execution_update` +- `tool_execution_end` +- `model_select` +- `tool_call` +- `tool_result` +- `user_bash` +- `input` + +### Other extension surfaces + +`pi-mono` extensions can also: + +- `registerTool(...)` +- `registerCommand(...)` +- `registerShortcut(...)` +- `registerFlag(...)` +- `registerMessageRenderer(...)` +- `registerProvider(...)` +- `unregisterProvider(...)` +- use an inter-extension event bus via `pi.events` + +### Important behavior + +`pi-mono` hooks are not just observers. Several can actively mutate behavior: + +- `before_agent_start` can rewrite the effective system prompt and inject messages +- `context` can replace the message set before an LLM call +- `before_provider_request` can rewrite the serialized provider payload +- `tool_call` can mutate tool inputs and block execution +- `tool_result` can rewrite tool output +- `user_bash` can replace shell execution entirely +- `input` can transform or fully handle user input before normal processing + +That is a good fit for a local coding harness. It is not automatically a good fit for a company control plane. + +## What Paperclip already has + +Paperclip already has several hook-like surfaces, but they are much narrower and safer: + +- plugin worker lifecycle hooks such as `setup()` and `onHealth()` +- declared webhook endpoints for plugins +- scheduled jobs +- a typed plugin event bus with filtering and plugin namespacing +- adapter runtime hooks for logs/status/usage in the run pipeline + +The plugin event bus is already pointed in the right direction: + +- core domain events can be subscribed to +- filters are applied server-side +- plugin-emitted events are namespaced under `plugin..*` +- plugins do not override core behavior by name collision + +## What transfers well to Paperclip + +These ideas from `pi-mono` fit Paperclip with little conceptual risk: + +### 1. Read-only run lifecycle subscriptions + +Paperclip should continue exposing run and transcript events to plugins, for example: + +- run started / finished +- tool started / finished +- usage reported +- issue comment created + +This matches Paperclip's control-plane posture: observe, react, automate. + +### 2. Plugin-to-plugin events + +Paperclip already has this. It is worth keeping and extending. + +This is the clean replacement for many ad hoc hook chains. + +### 3. Explicit worker lifecycle hooks + +Paperclip already has `setup()` and `onHealth()`. That is the right shape. + +If more lifecycle is needed, it should stay explicit and host-controlled. + +### 4. Trusted adapter-level prompt/runtime middleware + +Some `pi-mono` ideas do belong in Paperclip, but only inside trusted adapter/runtime code: + +- prompt shaping before a run starts +- provider request customization +- tool execution wrappers for local coding adapters + +This should be an adapter surface, not a general company plugin surface. + +## What should not transfer directly + +These `pi-mono` capabilities are a bad fit for Paperclip core: + +### 1. Arbitrary mutation hooks on control-plane decisions + +Paperclip should not let general plugins rewrite: + +- issue checkout semantics +- approval outcomes +- budget enforcement +- assignment rules +- company scoping + +Those are core invariants. + +### 2. Tool shadowing by name collision + +`pi-mono`'s low-friction override model is great for a personal coding harness. + +Paperclip should keep plugin tools namespaced and non-shadowing. + +### 3. Project-local plugin loading + +Paperclip is an operator-controlled control plane. Repo-local plugin auto-loading would make behavior too implicit and too hard to govern. + +### 4. UI-session-specific hooks as first-class product surface + +Hooks like: + +- `session_before_switch` +- `session_before_fork` +- `session_before_tree` +- `model_select` +- `input` +- `user_bash` + +are tied to `pi-mono` being an interactive terminal coding harness. + +They do not map directly to Paperclip's board-and-issues model. + +## Recommended Paperclip direction + +If we want a "hooks" story inspired by `pi-mono`, it should split into two layers: + +### Layer 1: safe control-plane plugins + +Allowed surfaces: + +- typed domain event subscriptions +- jobs +- webhooks +- plugin-to-plugin events +- UI slots and bridge actions +- plugin-owned tools and data endpoints + +Disallowed: + +- mutation of core issue/approval/budget invariants + +### Layer 2: trusted runtime middleware + +For adapters and other trusted runtime packages only: + +- prompt assembly hooks +- provider payload hooks +- tool execution wrappers +- transcript rendering helpers + +This is where the best `pi-mono` runtime ideas belong. + +## Bottom line + +If the question is "what hooks do `pi` and `pi-mono` have?": + +- `pi`: JSON output events, not a general extension hook system +- `pi-mono`: a broad extension hook API with 27 named event hooks plus tool/command/provider registration + +If the question is "what works for Paperclip too?": + +- yes: typed event subscriptions, worker lifecycle hooks, namespaced plugin events, read-only run lifecycle events +- maybe, but trusted-only: prompt/provider/tool middleware around adapter execution +- no: arbitrary mutation hooks on control-plane invariants, project-local plugin loading, tool shadowing diff --git a/doc/plans/2026-04-08-agent-browser-process-cleanup-plan.md b/doc/plans/2026-04-08-agent-browser-process-cleanup-plan.md new file mode 100644 index 00000000..42024730 --- /dev/null +++ b/doc/plans/2026-04-08-agent-browser-process-cleanup-plan.md @@ -0,0 +1,238 @@ +# PAP-1231 Agent Browser Process Cleanup Plan + +Status: Proposed +Date: 2026-04-08 +Related issue: `PAP-1231` +Audience: Engineering + +## Goal + +Explain why browser processes accumulate during local agent runs and define a cleanup plan that fixes the general process-ownership problem rather than treating `agent-browser` as a one-off. + +## Short answer + +Yes, there is a likely root cause in Paperclip's local execution model. + +Today, heartbeat-run local adapters persist and manage only the top-level spawned PID. Their timeout/cancel path uses direct `child.kill()` semantics. That is weaker than the runtime-service path, which already tracks and terminates whole process groups. + +If Codex, Claude, Cursor, or a skill launched through them starts Chrome or Chromium helpers, Paperclip can lose ownership of those descendants even when it still believes it handled the run correctly. + +## Observed implementation facts + +### 1. Heartbeat-run local adapters track only one PID + +`packages/adapter-utils/src/server-utils.ts` + +- `runChildProcess()` spawns the adapter command and records only `child.pid` +- timeout handling sends `SIGTERM` and then `SIGKILL` to the direct child +- there is no process-group creation or process-group kill path there today + +`packages/db/src/schema/heartbeat_runs.ts` + +- `heartbeat_runs` stores `process_pid` +- there is no persisted `process_group_id` + +`server/src/services/heartbeat.ts` + +- cancellation logic uses the in-memory child handle and calls `child.kill()` +- orphaned-run recovery checks whether the recorded direct PID is alive +- the recovery model is built around one tracked process, not a descendant tree + +### 2. Workspace runtime already uses stronger ownership + +`server/src/services/workspace-runtime.ts` + +- runtime services are spawned with `detached: process.platform !== "win32"` +- the service record stores `processGroupId` +- shutdown calls `terminateLocalService()` with group-aware killing + +`server/src/services/local-service-supervisor.ts` + +- `terminateLocalService()` prefers `process.kill(-processGroupId, signal)` on POSIX +- it escalates from `SIGTERM` to `SIGKILL` + +This is the clearest internal comparison point: Paperclip already has one local-process subsystem that treats process-group ownership as the right abstraction. + +### 3. The current recovery path explains why leaks would be visible but hard to reason about + +If the direct adapter process exits, hangs, or is cancelled after launching a browser subtree: + +- Paperclip may think it cancelled the run because the parent process is gone +- descendant Chrome helpers may still be running +- orphan recovery has no persisted process-group identity to reconcile or reap later + +That makes the failure look like an `agent-browser` problem when the more general bug is "executor descendants are not owned strongly enough." + +## Why `agent-browser` makes the problem obvious + +Inference: + +- Chromium is intentionally multi-process +- browser automation often leaves a browser process plus renderer, GPU, utility, and crashpad/helper children +- skills that open browsers repeatedly amplify the symptom because each run can produce several descendant processes + +So `agent-browser` is probably not the root cause. It is the workload that exposes the weak ownership model fastest. + +## Success condition + +This work is successful when Paperclip can: + +1. start a local adapter run and own the full descendant tree it created +2. cancel, timeout, or recover that run without leaving Chrome descendants behind on POSIX +3. detect and clean up stale local descendants after server restarts +4. expose enough metadata that operators can see which run owns which spawned process tree + +## Non-goals + +Do not: + +- special-case `agent-browser` only +- depend on manual `pkill chrome` cleanup as the primary fix +- require every skill author to add bespoke browser teardown logic before Paperclip can clean up correctly +- change remote/http adapter behavior as part of the first pass + +## Proposed plan + +### Phase 0: reproduce and instrument + +Objective: + +- make the leak measurable from Paperclip's side before changing execution semantics + +Work: + +- add a reproducible local test script or fixture that launches a child process which itself launches descendants and ignores normal parent exit +- capture parent PID, descendant PIDs, and run ID in logs during local adapter execution +- document current behavior separately for: + - normal completion + - timeout + - explicit cancellation + - server restart during run + +Deliverable: + +- one short repro note attached to the implementation issue or child issue + +### Phase 1: give heartbeat-run local adapters process-group ownership + +Objective: + +- align adapter-run execution with the stronger runtime-service model + +Work: + +- update `runChildProcess()` to create a dedicated process group on POSIX +- persist both: + - direct PID + - process-group ID +- update the run cancellation and timeout paths to kill the group first, then escalate +- keep direct-PID fallback behavior for platforms where group kill is not available + +Likely touched surfaces: + +- `packages/adapter-utils/src/server-utils.ts` +- `packages/db/src/schema/heartbeat_runs.ts` +- `packages/shared/src/types/heartbeat.ts` +- `server/src/services/heartbeat.ts` + +Important design choice: + +- use the same ownership model for all local child-process adapters, not just Codex or Claude + +### Phase 2: make restart recovery group-aware + +Objective: + +- prevent stale descendants from surviving server crashes or restarts indefinitely + +Work: + +- teach orphan reconciliation to inspect the persisted process-group ID, not only the direct PID +- if the direct parent is gone but the group still exists, mark the run as detached-orphaned with clearer metadata +- decide whether restart recovery should: + - adopt the still-running group, or + - terminate it as unrecoverable + +Recommendation: + +- for heartbeat runs, prefer terminating unrecoverable orphan groups rather than adopting them unless we can prove the adapter session remains safe and observable + +Reason: + +- runtime services are long-lived and adoptable +- heartbeat runs are task executions with stricter audit and cancellation semantics + +### Phase 3: add operator-visible cleanup tools + +Objective: + +- make the system diagnosable when ownership still fails + +Work: + +- surface the tracked process metadata in run details or debug endpoints +- add a control-plane cleanup action or CLI utility for stale local run processes owned by Paperclip +- scope cleanup by run/agent/company instead of broad browser-name matching + +This should replace ad hoc scripts as the general-purpose escape hatch. + +### Phase 4: cover platform and regression cases + +Objective: + +- keep the fix from regressing and define platform behavior explicitly + +Tests to add: + +- unit tests around process-group-aware cancellation in adapter execution utilities +- heartbeat recovery tests for: + - surviving descendant tree after parent loss + - timeout cleanup + - cancellation cleanup +- platform-conditional behavior notes for Windows, where negative-PID group kill does not apply + +## Recommended first implementation slice + +The first shipping slice should be narrow: + +1. introduce process-group ownership for local heartbeat-run adapters on POSIX +2. persist group metadata on `heartbeat_runs` +3. switch timeout/cancel paths from direct-child kill to group kill +4. add one regression test that proves descendants die with the parent run + +That should address the main Chrome accumulation path without taking on the full restart-recovery design in the same patch. + +## Risks + +### 1. Over-killing unrelated processes + +If process-group boundaries are created incorrectly, cleanup could terminate more than the run owns. + +Mitigation: + +- create a fresh process group only for the spawned adapter command +- persist and target that exact group + +### 2. Cross-platform differences + +Windows does not support the POSIX negative-PID kill pattern used elsewhere in the repo. + +Mitigation: + +- ship POSIX-first +- keep direct-child fallback on Windows +- document Windows as partial until job-object or equivalent handling is designed + +### 3. Session recovery complexity + +Adopting a still-running orphaned group may look attractive but can break observability if stdout/stderr pipes are already gone. + +Mitigation: + +- default to deterministic cleanup for heartbeat runs unless adoption is explicitly proven safe + +## Recommendation + +Treat this as a Paperclip executor ownership bug, not an `agent-browser` bug. + +`agent-browser` should remain a useful repro case, but the implementation should be shared across all local child-process adapters so any descendant process tree spawned by Codex, Claude, Cursor, Gemini, Pi, or OpenCode is owned and cleaned up consistently. diff --git a/doc/plans/2026-04-08-agent-os-follow-up-plan.md b/doc/plans/2026-04-08-agent-os-follow-up-plan.md new file mode 100644 index 00000000..52029943 --- /dev/null +++ b/doc/plans/2026-04-08-agent-os-follow-up-plan.md @@ -0,0 +1,261 @@ +# PAP-1229 Agent OS Follow-up Plan + +Date: 2026-04-08 +Related issue: `PAP-1229` +Companion analysis: `doc/plans/2026-04-08-agent-os-technical-report.md` + +## Goal + +Turn the `agent-os` research into a low-risk Paperclip execution plan that preserves Paperclip's control-plane model while testing the few runtime ideas that appear worth adopting. + +## Decision summary + +Paperclip should not absorb `agent-os` as a product model or orchestration layer. + +Paperclip should evaluate `agent-os` in three narrow areas: + +1. optional agent runtime for selected local adapters +2. capability-based runtime permission vocabulary +3. snapshot-backed disposable execution roots + +Everything else should stay out of scope unless those three experiments produce strong evidence. + +## Success condition + +This work is successful when Paperclip has: + +- a clear yes/no answer on whether `agent-os` is worth supporting as an execution substrate +- a concrete adapter/runtime experiment with measurable results +- a proposed runtime capability model that fits current Paperclip adapters +- a clear decision on whether snapshot-backed execution roots are worth integrating + +## Non-goals + +Do not: + +- replace Paperclip heartbeats, issues, comments, approvals, or budgets with `agent-os` primitives +- introduce Rust/sidecar requirements for all local execution paths +- migrate all adapters at once +- add runtime workflow/queue abstractions to Paperclip core + +## Existing Paperclip integration points + +The plan should stay anchored to these existing surfaces: + +- `packages/adapter-utils/src/types.ts` + - adapter contract, runtime service reporting, session metadata, and capability normalization targets +- `server/src/services/heartbeat.ts` + - execution entry point, log capture, issue comment summaries, and cost reporting +- `server/src/services/execution-workspaces.ts` + - current workspace lifecycle and git-oriented cleanup/readiness model +- `server/src/services/plugin-loader.ts` + - typed host capability boundary and extension loading patterns +- local adapter implementations in `packages/adapters/*/src/server/` + - current execution behavior to compare against an `agent-os`-backed path + +## Phase plan + +### Phase 0: constraints and experiment design + +Objective: + +- make the evaluation falsifiable before writing integration code + +Deliverables: + +- short experiment brief added to this document or a child issue +- chosen first runtime target: `pi_local` or `opencode_local` +- baseline metrics definition + +Questions to lock down: + +- what exact developer experience should improve +- what security/isolation property we expect to gain +- what failure modes are unacceptable +- whether the prototype is adapter-only or a deeper internal runtime abstraction spike + +Exit criteria: + +- a single first target chosen +- measurable comparison criteria agreed on + +Recommended metrics: + +- cold start latency +- session resume reliability across heartbeats +- transcript/log quality +- implementation complexity +- operational complexity on local dev machines + +### Phase 1: `agentos_local` spike + +Objective: + +- prove that Paperclip can drive one local agent through an `agent-os` runtime without breaking heartbeat semantics + +Suggested scope: + +- implement a new experimental adapter, `agentos_local`, or a feature-flagged runtime path under one existing adapter +- start with `pi_local` or `opencode_local` +- keep Paperclip's existing heartbeat, issue, workspace, and comment flow authoritative + +Minimum implementation shape: + +- adapter accepts model/runtime config +- `server/src/services/heartbeat.ts` still owns run lifecycle +- execution result still maps into existing `AdapterExecutionResult` +- session state still fits current `sessionParams` / `sessionDisplayId` flow + +What to verify: + +- checkout and heartbeat flow still work end to end +- resume across multiple heartbeats works +- logs/transcripts remain readable in the UI +- failure paths surface cleanly in issue comments and run logs + +Exit criteria: + +- one agent type can run reliably through the new path +- documented comparison against the existing local adapter path +- explicit recommendation: continue, pause, or abandon + +### Phase 2: capability-based runtime permissions + +Objective: + +- introduce a Paperclip-native capability vocabulary without coupling the product to `agent-os` + +Suggested scope: + +- extend adapter config schema vocabulary for runtime permissions +- prototype normalized capabilities such as: + - `fs.read` + - `fs.write` + - `network.fetch` + - `network.listen` + - `process.spawn` + - `env.read` + +Integration targets: + +- `packages/adapter-utils/src/types.ts` +- adapter config-schema support +- server-side runtime config validation +- future board-facing UI for permissions, if needed + +What to avoid: + +- building a full human policy UI before the vocabulary is proven useful +- forcing every adapter to implement capability enforcement immediately + +Exit criteria: + +- documented capability schema +- one adapter path using it meaningfully +- clear compatibility story for non-`agent-os` adapters + +### Phase 3: snapshot-backed execution root experiment + +Objective: + +- determine whether a layered/snapshotted root model improves some Paperclip workloads + +Suggested scope: + +- evaluate it only for disposable or non-repo-heavy tasks first +- keep git worktree-based repo editing as the default for codebase tasks + +Promising use cases: + +- routine-style runs +- ephemeral preview/test environments +- isolated document/artifact generation +- tasks that do not need full git history or branch semantics + +Integration targets: + +- `server/src/services/execution-workspaces.ts` +- workspace realization paths called from `server/src/services/heartbeat.ts` + +Exit criteria: + +- clear statement on which workload classes benefit +- clear statement on which workloads should stay on worktrees +- go/no-go decision for broader implementation + +### Phase 4: typed host tool evaluation + +Objective: + +- identify where Paperclip should prefer explicit typed tools over ambient shell access + +Suggested scope: + +- compare `agent-os` host-toolkit ideas with existing plugin and runtime-service surfaces +- choose 1-2 sensitive operations that should become typed tools + +Good candidates: + +- git metadata/status inspection +- runtime service inspection +- deployment/preview status retrieval +- generated artifact publishing + +Exit criteria: + +- one concrete proposal for typed-tool adoption in Paperclip +- clear statement on whether this belongs in plugins, adapters, or core services + +## Recommended sequencing + +Recommended order: + +1. Phase 0 +2. Phase 1 +3. Phase 2 +4. Phase 3 +5. Phase 4 + +Reasoning: + +- Phase 1 is the fastest way to invalidate or validate the entire `agent-os` direction +- Phase 2 is valuable even if Phase 1 is abandoned +- Phase 3 should wait until there is confidence that the runtime approach is operationally worthwhile +- Phase 4 is useful independently but should be informed by what Phase 1 and Phase 2 expose + +## Risks + +### Technical risk + +- `agent-os` introduces Rust sidecar and packaging complexity that may outweigh runtime benefits + +### Product risk + +- runtime experimentation could blur the boundary between Paperclip as control plane and Paperclip as execution platform + +### Integration risk + +- session semantics, log formatting, and failure behavior may degrade relative to current local adapters + +### Scope risk + +- a small runtime spike could expand into an adapter-system rewrite if not kept tightly bounded + +## Guardrails + +To keep this effort controlled: + +- keep all experiments behind a clearly experimental adapter or feature flag +- do not change issue/comment/approval/budget semantics to suit the runtime +- measure against current local adapters instead of judging in isolation +- stop after Phase 1 if the operational burden is already clearly too high + +## Proposed next action + +The next concrete action should be a small implementation spike issue: + +- title: `Prototype experimental agentos_local runtime for one local adapter` +- target adapter: `opencode_local` unless `pi_local` is materially easier +- expected output: code spike, short verification notes, and a continue/stop recommendation + +If leadership wants planning only and no spike yet, this document is the handoff artifact for that decision. diff --git a/doc/plans/2026-04-08-agent-os-technical-report.md b/doc/plans/2026-04-08-agent-os-technical-report.md new file mode 100644 index 00000000..923cf7d2 --- /dev/null +++ b/doc/plans/2026-04-08-agent-os-technical-report.md @@ -0,0 +1,397 @@ +# Agent OS Technical Report for Paperclip + +Date: 2026-04-08 +Analyzed upstream: `rivet-dev/agent-os` at commit `0063cdccd1dcb1c8e211670cd05482d70d26a5c4` (`0063cdc`), dated 2026-04-06 + +## Executive summary + +`agent-os` is not a competitor to Paperclip's core product. It is an execution substrate: an embedded, VM-like runtime for agents, tools, filesystems, and session orchestration. Paperclip is a control plane: company scoping, task hierarchy, approvals, budgets, activity logs, workspaces, and governance. + +The strongest takeaway is not "copy agent-os wholesale." The strongest takeaway is that Paperclip could selectively use its runtime ideas to improve local agent execution safety, reproducibility, and portability while keeping all company/task/governance logic in Paperclip. + +My recommendation is: + +1. Do not merge agent-os concepts into the Paperclip core product model. +2. Do evaluate an optional `agentos_local` execution adapter or internal runtime experiment. +3. Borrow a few design patterns aggressively: + - layered/snapshotted execution filesystems + - explicit capability-based runtime permissions + - a better host-tools bridge for controlled tool execution + - a normalized session capability model for agent adapters +4. Do not import its workflow/cron/queue abstractions into Paperclip core until they are reconciled with Paperclip's issue/comment/governance model. + +## What agent-os actually is + +From the repo layout and implementation, `agent-os` is a mixed TypeScript/Rust system that provides: + +- an `AgentOs` TypeScript API for creating isolated agent VMs +- a Rust kernel/sidecar that virtualizes filesystem, processes, PTYs, pipes, permissions, and networking +- an ACP-based session model for agent runtimes such as Pi, OpenCode, and Claude-style adapters +- a registry of WASM command packages and mount plugins +- optional host toolkits, cron scheduling, and filesystem mounts + +The repo is substantial already: + +- monorepo with `packages/`, `crates/`, and `registry/` +- roughly 1,200 files just across `packages/`, `crates/`, and `registry/` +- mixed implementation model: TypeScript public API plus Rust kernel/sidecar internals + +## Architecture notes + +### 1. Public runtime surface + +The main API lives in `packages/core/src/agent-os.ts` and exports an `AgentOs` class with methods such as: + +- `create()` +- `createSession()` +- `prompt()` +- `exec()` +- `spawn()` +- `snapshotRootFilesystem()` +- cron scheduling helpers + +This is an execution API, not a coordination API. + +### 2. Virtualized kernel model + +The kernel is implemented in Rust under `crates/kernel/src/`. It models: + +- virtual filesystem +- process table +- PTYs and pipes +- resource accounting +- permissioned filesystem access +- network permission checks + +That gives `agent-os` a much stronger isolation story than Paperclip's current "launch a host CLI in a workspace" local adapter approach. + +### 3. Layered filesystem and snapshots + +The filesystem design is one of the most reusable ideas. `agent-os` uses: + +- a bundled base filesystem +- a writable overlay +- optional mounted filesystems +- snapshot export/import for reusing root states + +This is cleaner than treating every execution workspace as a mutable checkout plus ad hoc cleanup. It enables reproducible starting states and cheap isolation. + +### 4. Capability-based permissions + +The kernel-level permission vocabulary is strong and concrete: + +- filesystem operations +- network operations +- child-process execution +- environment access + +The Rust kernel defaults are deny-oriented, but the high-level JS API currently serializes permissive defaults unless the caller provides a policy. That is an important nuance: the primitive is security-minded, but the product surface is still convenience-first. + +### 5. Host-tools bridge + +`agent-os` exposes host-side tools via a toolkit abstraction (`hostTool`, `toolKit`) and a local RPC bridge. This is a strong pattern because it gives the agent explicit, typed tools rather than ambient shell access to everything on the host. + +### 6. ACP session abstraction + +The session model is more uniform than most agent wrappers. It includes: + +- capabilities +- mode/config options +- permission requests +- sequenced session events +- JSON-RPC transport through ACP adapters + +This is directly relevant to Paperclip because our adapter layer still normalizes each CLI agent in a fairly bespoke way. + +## Paperclip anchor points + +The most relevant current Paperclip surfaces for any future `agent-os` integration are: + +- `packages/adapter-utils/src/types.ts` + - shared adapter contract, session metadata, runtime service reporting, environment tests, and optional `detectModel()` +- `server/src/services/heartbeat.ts` + - heartbeat execution, adapter invocation, cost capture, workspace realization, and issue-comment summaries +- `server/src/services/execution-workspaces.ts` + - execution workspace lifecycle and git readiness/cleanup logic +- `server/src/services/plugin-loader.ts` + - dynamic plugin activation, host capability boundaries, and runtime extension loading +- local adapters such as `packages/adapters/codex-local/src/server/execute.ts` and peers + - current host-CLI execution model that an `agent-os` runtime experiment would complement or replace for selected agents + +## What Paperclip can learn from it + +### 1. A safer local execution substrate + +Paperclip's local adapters currently run host CLIs in managed workspaces and rely on adapter-specific behavior plus process-level controls. That is pragmatic, but weakly isolated. + +`agent-os` shows a path toward: + +- running local agent tooling in a constrained runtime +- applying explicit network/filesystem/env policies +- reducing accidental host leakage +- making adapter behavior more portable across machines + +Best use in Paperclip: + +- as an optional runtime beneath local adapters +- or as a new adapter family for agents that can run inside ACP-compatible `agent-os` sessions + +This fits Paperclip because it improves execution safety without changing the control-plane model. + +### 2. Snapshotted execution roots instead of only mutable workspaces + +Paperclip already has strong execution-workspace concepts, but they are repo/worktree-centric. `agent-os` adds a stronger "start from known lower layers, write into a disposable upper layer" model. + +That could improve: + +- reproducible issue starts +- disposable task sandboxes +- faster reset/cleanup +- "resume from snapshot" behavior for recurring routines +- safe preview environments for risky agent operations + +This is especially interesting for tasks that do not need a full git worktree. + +### 3. A capability vocabulary for runtime governance + +Paperclip has governance at the company/task level: + +- approvals +- budgets +- activity logs +- actor permissions +- company scoping + +It has less structure at the runtime capability level. `agent-os` offers a clear vocabulary that Paperclip could adopt even without adopting the runtime itself: + +- `fs.read`, `fs.write`, `fs.mount_sensitive` +- `network.fetch`, `network.http`, `network.listen`, `network.dns` +- child process execution +- env access + +That vocabulary would improve: + +- adapter configuration schemas +- policy UIs +- execution review surfaces +- future approval gates for governed actions + +### 4. Typed host tools instead of shelling out for everything + +Paperclip's plugin system and adapters already have the beginnings of a controlled extension surface. `agent-os` reinforces the value of exposing capabilities as typed tools rather than raw shell access. + +Concrete Paperclip uses: + +- board-approved toolkits for sensitive operations +- company-scoped service tools +- plugin-defined tools with explicit schemas +- safer execution for common actions like git metadata inspection, preview lookups, deployment status checks, or document generation + +This aligns well with Paperclip's governance story. + +### 5. Better adapter normalization around sessions and capabilities + +Paperclip's adapter contract already supports execution results, session params, environment tests, skill syncing, quota windows, and optional `detectModel()`. But much of the per-agent behavior is still adapter-specific. + +`agent-os` suggests a cleaner normalization target: + +- a standard capability map +- a consistent event stream model +- explicit mode/config surfaces +- explicit permission request semantics + +Paperclip does not need ACP everywhere, but it would benefit from a more formal internal session capability model inspired by this. + +### 6. On-demand heavy sandbox escalation + +One of the best architectural choices in `agent-os` is that it does not pretend every workload fits the lightweight runtime. It has a sandbox extension for workloads that need a fuller environment. + +Paperclip can adopt that philosophy directly: + +- lightweight execution by default +- escalate to full worktree / container / remote sandbox only when needed +- keep the escalation explicit in the issue/run model + +That is better than forcing all tasks into the heaviest environment up front. + +## What does not fit Paperclip well + +### 1. Its built-in orchestration primitives overlap the wrong layer + +`agent-os` includes cron/session/workflow style primitives inside the runtime package. Paperclip already has higher-level orchestration concepts: + +- issues/comments +- heartbeat runs +- approvals +- company/org structure +- execution workspaces +- budget enforcement + +If Paperclip copied `agent-os` cron/workflow/queue ideas directly into core, we would likely duplicate orchestration across two layers. That would blur ownership and make debugging harder. + +Paperclip should keep orchestration authoritative at the control-plane layer. + +### 2. It is not company-scoped or governance-native + +`agent-os` is runtime-first, not company-first. It has no native concepts for: + +- company boundaries +- board/operator actor types +- audit logs for business actions +- issue hierarchy +- approval routing +- budget hard-stop behavior + +Those are Paperclip's differentiators. They should not be displaced by runtime abstractions. + +### 3. It introduces meaningful implementation complexity + +Adopting `agent-os` deeply would add: + +- Rust build/runtime complexity +- sidecar lifecycle management +- new failure modes across JS/Rust boundaries +- more packaging and platform compatibility work +- another abstraction layer for debugging already-complex local adapters + +This is justified only if we want stronger local isolation or portability. It is not justified as a general refactor. + +### 4. Its security model is not a drop-in governance solution + +The permission model is good, but it is low-level. Paperclip would still need to answer: + +- who can authorize a capability +- how approval decisions are logged +- how policies are scoped by company/project/issue/agent +- how runtime permissions interact with budgets and task status + +In other words, `agent-os` can supply enforcement primitives, not the control policy system itself. + +### 5. The agent compatibility story is still selective + +The repo is explicit that some runtimes are planned, partial, or still being adapted. In practice this means: + +- good ideas for ACP-native or compatible agents +- less certainty for every CLI agent we support today +- real integration work for Codex/Cursor/Gemini-style Paperclip adapters + +So the main near-term value is not universal replacement. It is selective use where compatibility is strong. + +## Concrete recommendations for Paperclip + +### Recommendation A: prototype an optional `agentos_local` adapter + +This is the highest-value experiment. + +Goal: + +- run one supported agent type inside `agent-os` +- keep Paperclip heartbeat/task/workspace/budget logic unchanged +- evaluate startup time, isolation, transcript quality, and operational complexity + +Good first target: + +- `pi_local` or `opencode_local` + +Why not start with Codex: + +- Paperclip's Codex adapter is already important and carries repo-specific behavior +- `agent-os`'s Codex story is present in the registry/docs, but the safest path is to validate the runtime on a less central adapter first + +Success criteria: + +- heartbeat can invoke the adapter reliably +- session resume works across heartbeats +- Paperclip still records logs, summaries, cost metadata, and issue comments normally +- runtime permissions can be configured without breaking common tasks + +### Recommendation B: adopt capability vocabulary into adapter configs + +Even without using `agent-os`, Paperclip should consider standardizing adapter/runtime permissions around a vocabulary like: + +- filesystem +- network +- subprocess/tool execution +- environment access + +This would improve: + +- schema-driven adapter UIs +- future approvals +- observability +- policy portability across adapters + +### Recommendation C: explore snapshot-backed execution workspaces + +Paperclip should evaluate whether some execution workspaces can be backed by: + +- a reusable lower snapshot +- a disposable upper layer +- optional mounts for project data or artifacts + +This is most valuable for: + +- non-repo tasks +- repeatable routines +- preview/test environments +- isolation-heavy local execution + +It is less urgent for full repo editing flows that already benefit from git worktrees. + +### Recommendation D: strengthen typed tool surfaces + +Paperclip plugins and adapters should continue moving toward explicit typed tools over ad hoc shell access. `agent-os` confirms that this is the right direction. + +This is a good fit for: + +- plugin tools +- workspace runtime services +- governed operations that need approval or auditability + +### Recommendation E: do not import runtime-level workflows into Paperclip core + +Paperclip should not copy `agent-os` cron/workflow/queue concepts into core orchestration yet. + +If we want them later, they must map cleanly onto: + +- issues +- comments +- heartbeats +- approvals +- budgets +- activity logs + +Without that mapping, they would create a second orchestration system inside the product. + +## A practical integration map + +### Best near-term fits + +- optional local adapter runtime +- runtime capability schema +- typed host-tool ideas for plugins/adapters +- snapshot ideas for disposable execution roots + +### Medium-term fits + +- stronger session capability normalization across adapters +- policy-aware runtime permission UI +- selective ACP-inspired event normalization + +### Poor fits right now + +- moving Paperclip orchestration into agent-os workflows +- replacing company/task/governance models with runtime constructs +- making Rust sidecars a mandatory dependency for all local execution + +## Bottom line + +`agent-os` is useful to Paperclip as an execution technology reference, not as a product model. + +Paperclip should treat it the same way it treats sandboxes or agent CLIs: + +- execution substrate underneath the control plane +- optional where the tradeoff is worth it +- never the source of truth for company/task/governance state + +If we do one thing from this report, it should be a narrowly scoped `agentos_local` experiment plus a design pass on capability-based runtime permissions. Those two ideas have the best upside and the lowest architectural risk. diff --git a/scripts/ensure-workspace-package-links.ts b/scripts/ensure-workspace-package-links.ts deleted file mode 100644 index 8ff86b71..00000000 --- a/scripts/ensure-workspace-package-links.ts +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env -S node --import tsx -import fs from "node:fs/promises"; -import { existsSync, readdirSync, readFileSync, realpathSync } from "node:fs"; -import path from "node:path"; -import { repoRoot } from "./dev-service-profile.ts"; - -type WorkspaceLinkMismatch = { - workspaceDir: string; - packageName: string; - expectedPath: string; - actualPath: string | null; -}; - -function readJsonFile(filePath: string): Record { - return JSON.parse(readFileSync(filePath, "utf8")) as Record; -} - -function discoverWorkspacePackagePaths(rootDir: string): Map { - const packagePaths = new Map(); - const ignoredDirNames = new Set([".git", ".paperclip", "dist", "node_modules"]); - - function visit(dirPath: string) { - const packageJsonPath = path.join(dirPath, "package.json"); - if (existsSync(packageJsonPath)) { - const packageJson = readJsonFile(packageJsonPath); - if (typeof packageJson.name === "string" && packageJson.name.length > 0) { - packagePaths.set(packageJson.name, dirPath); - } - } - - for (const entry of readdirSync(dirPath, { withFileTypes: true })) { - if (!entry.isDirectory()) continue; - if (ignoredDirNames.has(entry.name)) continue; - visit(path.join(dirPath, entry.name)); - } - } - - visit(path.join(rootDir, "packages")); - visit(path.join(rootDir, "server")); - visit(path.join(rootDir, "ui")); - visit(path.join(rootDir, "cli")); - - return packagePaths; -} - -const workspacePackagePaths = discoverWorkspacePackagePaths(repoRoot); - -function findWorkspaceLinkMismatches(workspaceDir: string): WorkspaceLinkMismatch[] { - const packageJson = readJsonFile(path.join(repoRoot, workspaceDir, "package.json")); - const dependencies = { - ...(packageJson.dependencies as Record | undefined), - ...(packageJson.devDependencies as Record | undefined), - }; - const mismatches: WorkspaceLinkMismatch[] = []; - - for (const [packageName, version] of Object.entries(dependencies)) { - if (typeof version !== "string" || !version.startsWith("workspace:")) continue; - - const expectedPath = workspacePackagePaths.get(packageName); - if (!expectedPath) continue; - - const linkPath = path.join(repoRoot, workspaceDir, "node_modules", ...packageName.split("/")); - const actualPath = existsSync(linkPath) ? path.resolve(realpathSync(linkPath)) : null; - if (actualPath === path.resolve(expectedPath)) continue; - - mismatches.push({ - workspaceDir, - packageName, - expectedPath: path.resolve(expectedPath), - actualPath, - }); - } - - return mismatches; -} - -async function ensureWorkspaceLinksCurrent(workspaceDir: string) { - const mismatches = findWorkspaceLinkMismatches(workspaceDir); - if (mismatches.length === 0) return; - - console.log(`[paperclip] detected stale workspace package links for ${workspaceDir}; relinking dependencies...`); - for (const mismatch of mismatches) { - console.log( - `[paperclip] ${mismatch.packageName}: ${mismatch.actualPath ?? "missing"} -> ${mismatch.expectedPath}`, - ); - } - - for (const mismatch of mismatches) { - const linkPath = path.join(repoRoot, mismatch.workspaceDir, "node_modules", ...mismatch.packageName.split("/")); - await fs.mkdir(path.dirname(linkPath), { recursive: true }); - await fs.rm(linkPath, { recursive: true, force: true }); - await fs.symlink(mismatch.expectedPath, linkPath); - } - - const remainingMismatches = findWorkspaceLinkMismatches(workspaceDir); - if (remainingMismatches.length === 0) return; - - throw new Error( - `Workspace relink did not repair all ${workspaceDir} package links: ${remainingMismatches.map((item) => item.packageName).join(", ")}`, - ); -} - -for (const workspaceDir of ["server", "ui"]) { - await ensureWorkspaceLinksCurrent(workspaceDir); -} diff --git a/scripts/kill-agent-browsers.sh b/scripts/kill-agent-browsers.sh new file mode 100755 index 00000000..c89fa96e --- /dev/null +++ b/scripts/kill-agent-browsers.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# +# Kill all "Google Chrome for Testing" processes (agent headless browsers). +# +# Usage: +# scripts/kill-agent-browsers.sh # kill all +# scripts/kill-agent-browsers.sh --dry # preview what would be killed +# + +set -euo pipefail + +DRY_RUN=false +if [[ "${1:-}" == "--dry" || "${1:-}" == "--dry-run" || "${1:-}" == "-n" ]]; then + DRY_RUN=true +fi + +pids=() +lines=() + +while IFS= read -r line; do + [[ -z "$line" ]] && continue + pid=$(echo "$line" | awk '{print $2}') + pids+=("$pid") + lines+=("$line") +done < <(ps aux | grep 'Google Chrome for Testing' | grep -v grep || true) + +if [[ ${#pids[@]} -eq 0 ]]; then + echo "No Google Chrome for Testing processes found." + exit 0 +fi + +echo "Found ${#pids[@]} Google Chrome for Testing process(es):" +echo "" + +for i in "${!pids[@]}"; do + line="${lines[$i]}" + pid=$(echo "$line" | awk '{print $2}') + start=$(echo "$line" | awk '{print $9}') + cmd=$(echo "$line" | awk '{for(i=11;i<=NF;i++) printf "%s ", $i; print ""}') + cmd=$(echo "$cmd" | sed "s|$HOME/||g") + printf " PID %-7s started %-10s %s\n" "$pid" "$start" "$cmd" +done + +echo "" + +if [[ "$DRY_RUN" == true ]]; then + echo "Dry run — re-run without --dry to kill these processes." + exit 0 +fi + +echo "Sending SIGTERM..." +for pid in "${pids[@]}"; do + kill -TERM "$pid" 2>/dev/null && echo " signaled $pid" || echo " $pid already gone" +done + +sleep 2 + +for pid in "${pids[@]}"; do + if kill -0 "$pid" 2>/dev/null; then + echo " $pid still alive, sending SIGKILL..." + kill -KILL "$pid" 2>/dev/null || true + fi +done + +echo "Done." diff --git a/scripts/kill-dev.sh b/scripts/kill-dev.sh index 9a53498a..b6dec9d8 100755 --- a/scripts/kill-dev.sh +++ b/scripts/kill-dev.sh @@ -24,6 +24,8 @@ node_lines=() pg_pids=() pg_pidfiles=() pg_data_dirs=() +browser_pids=() +browser_lines=() is_pid_running() { local pid="$1" @@ -87,6 +89,14 @@ while IFS= read -r line; do node_lines+=("$line") done < <(ps aux | grep -E '/paperclip(-[^/]+)?/' | grep node | grep -v grep || true) +# --- Agent browser processes (headless Chrome from ~/.agent-browser) --- +while IFS= read -r line; do + [[ -z "$line" ]] && continue + pid=$(echo "$line" | awk '{print $2}') + browser_pids+=("$pid") + browser_lines+=("$line") +done < <(ps aux | grep -E 'agent-browser/browsers/chrome-.*/Google Chrome for Testing' | grep -v grep || true) + candidate_pidfiles=() candidate_pidfiles+=( "$HOME"/.paperclip/instances/*/db/postmaster.pid @@ -107,7 +117,7 @@ for pidfile in "${candidate_pidfiles[@]:-}"; do append_postgres_from_pidfile "$pidfile" done -if [[ ${#node_pids[@]} -eq 0 && ${#pg_pids[@]} -eq 0 ]]; then +if [[ ${#node_pids[@]} -eq 0 && ${#pg_pids[@]} -eq 0 && ${#browser_pids[@]} -eq 0 ]]; then echo "No Paperclip dev processes found." exit 0 fi @@ -144,6 +154,22 @@ if [[ ${#pg_pids[@]} -gt 0 ]]; then echo "" fi +if [[ ${#browser_pids[@]} -gt 0 ]]; then + echo "Found ${#browser_pids[@]} agent browser process(es):" + echo "" + + for i in "${!browser_pids[@]:-}"; do + line="${browser_lines[$i]}" + pid=$(echo "$line" | awk '{print $2}') + start=$(echo "$line" | awk '{print $9}') + cmd=$(echo "$line" | awk '{for(i=11;i<=NF;i++) printf "%s ", $i; print ""}') + cmd=$(echo "$cmd" | sed "s|$HOME/||g") + printf " PID %-7s started %-10s %s\n" "$pid" "$start" "$cmd" + done + + echo "" +fi + if [[ "$DRY_RUN" == true ]]; then echo "Dry run — re-run without --dry to kill these processes." exit 0 @@ -158,6 +184,13 @@ if [[ ${#node_pids[@]} -gt 0 ]]; then sleep 2 fi +if [[ ${#browser_pids[@]} -gt 0 ]]; then + echo "Sending SIGTERM to agent browser processes..." + for pid in "${browser_pids[@]}"; do + kill -TERM "$pid" 2>/dev/null && echo " signaled $pid" || echo " $pid already gone" + done +fi + leftover_pg_pids=() leftover_pg_data_dirs=() for i in "${!pg_pids[@]:-}"; do @@ -203,4 +236,13 @@ if [[ ${#pg_pids[@]} -gt 0 ]]; then done fi +if [[ ${#browser_pids[@]} -gt 0 ]]; then + for pid in "${browser_pids[@]:-}"; do + if kill -0 "$pid" 2>/dev/null; then + echo " agent browser $pid still alive, sending SIGKILL..." + kill -KILL "$pid" 2>/dev/null || true + fi + done +fi + echo "Done." diff --git a/server/package.json b/server/package.json index a4d1407d..dac65fa7 100644 --- a/server/package.json +++ b/server/package.json @@ -32,16 +32,15 @@ "skills" ], "scripts": { - "preflight:workspace-links": "tsx ../scripts/ensure-workspace-package-links.ts", - "dev": "pnpm run preflight:workspace-links && tsx src/index.ts", - "dev:watch": "pnpm run preflight:workspace-links && cross-env PAPERCLIP_MIGRATION_PROMPT=never PAPERCLIP_MIGRATION_AUTO_APPLY=true tsx ./scripts/dev-watch.ts", + "dev": "tsx src/index.ts", + "dev:watch": "cross-env PAPERCLIP_MIGRATION_PROMPT=never PAPERCLIP_MIGRATION_AUTO_APPLY=true tsx ./scripts/dev-watch.ts", "prepare:ui-dist": "bash ../scripts/prepare-server-ui-dist.sh", - "build": "pnpm run preflight:workspace-links && tsc && mkdir -p dist/onboarding-assets && cp -R src/onboarding-assets/. dist/onboarding-assets/", + "build": "tsc && mkdir -p dist/onboarding-assets && cp -R src/onboarding-assets/. dist/onboarding-assets/", "prepack": "pnpm run prepare:ui-dist", "postpack": "rm -rf ui-dist", "clean": "rm -rf dist", "start": "node dist/index.js", - "typecheck": "pnpm run preflight:workspace-links && pnpm --filter @paperclipai/plugin-sdk build && tsc --noEmit" + "typecheck": "pnpm --filter @paperclipai/plugin-sdk build && tsc --noEmit" }, "dependencies": { "@aws-sdk/client-s3": "^3.888.0", diff --git a/server/src/__tests__/workspace-runtime.test.ts b/server/src/__tests__/workspace-runtime.test.ts index 5f4fc645..ff492ad2 100644 --- a/server/src/__tests__/workspace-runtime.test.ts +++ b/server/src/__tests__/workspace-runtime.test.ts @@ -200,6 +200,7 @@ describe("ensureServerWorkspaceLinksCurrent", () => { await fs.mkdir(expectedPackageDir, { recursive: true }); await fs.mkdir(stalePackageDir, { recursive: true }); await fs.mkdir(serverNodeModulesScopeDir, { recursive: true }); + await fs.writeFile(path.join(repoRoot, ".git"), "gitdir: /tmp/paperclip-main/.git/worktrees/runtime-links\n", "utf8"); await fs.writeFile(path.join(repoRoot, "pnpm-workspace.yaml"), "packages:\n - packages/*\n - server\n", "utf8"); await fs.writeFile( path.join(repoRoot, "server", "package.json"), @@ -235,6 +236,7 @@ describe("ensureServerWorkspaceLinksCurrent", () => { await fs.mkdir(path.join(repoRoot, "server"), { recursive: true }); await fs.mkdir(expectedPackageDir, { recursive: true }); await fs.mkdir(serverNodeModulesScopeDir, { recursive: true }); + await fs.writeFile(path.join(repoRoot, ".git"), "gitdir: /tmp/paperclip-main/.git/worktrees/runtime-links-current\n", "utf8"); await fs.writeFile(path.join(repoRoot, "pnpm-workspace.yaml"), "packages:\n - packages/*\n - server\n", "utf8"); await fs.writeFile( path.join(repoRoot, "server", "package.json"), @@ -255,6 +257,45 @@ describe("ensureServerWorkspaceLinksCurrent", () => { await ensureServerWorkspaceLinksCurrent(path.join(repoRoot, "server")); }); + + it("skips relinking outside linked git worktrees", async () => { + const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-runtime-links-non-worktree-")); + const staleRoot = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-runtime-links-non-worktree-stale-")); + const serverNodeModulesScopeDir = path.join(repoRoot, "server", "node_modules", "@paperclipai"); + const expectedPackageDir = path.join(repoRoot, "packages", "db"); + const stalePackageDir = path.join(staleRoot, "db"); + + await fs.mkdir(path.join(repoRoot, ".git"), { recursive: true }); + await fs.mkdir(path.join(repoRoot, "server"), { recursive: true }); + await fs.mkdir(expectedPackageDir, { recursive: true }); + await fs.mkdir(stalePackageDir, { recursive: true }); + await fs.mkdir(serverNodeModulesScopeDir, { recursive: true }); + await fs.writeFile(path.join(repoRoot, "pnpm-workspace.yaml"), "packages:\n - packages/*\n - server\n", "utf8"); + await fs.writeFile( + path.join(repoRoot, "server", "package.json"), + JSON.stringify({ + name: "@paperclipai/server", + dependencies: { + "@paperclipai/db": "workspace:*", + }, + }), + "utf8", + ); + await fs.writeFile( + path.join(expectedPackageDir, "package.json"), + JSON.stringify({ name: "@paperclipai/db" }), + "utf8", + ); + await fs.writeFile( + path.join(stalePackageDir, "package.json"), + JSON.stringify({ name: "@paperclipai/db" }), + "utf8", + ); + await fs.symlink(stalePackageDir, path.join(serverNodeModulesScopeDir, "db")); + + await ensureServerWorkspaceLinksCurrent(path.join(repoRoot, "server")); + expect(await fs.realpath(path.join(serverNodeModulesScopeDir, "db"))).toBe(await fs.realpath(stalePackageDir)); + }); }); describe("realizeExecutionWorkspace", () => { diff --git a/server/src/services/workspace-runtime.ts b/server/src/services/workspace-runtime.ts index fc75d0d5..c0447bcc 100644 --- a/server/src/services/workspace-runtime.ts +++ b/server/src/services/workspace-runtime.ts @@ -1,5 +1,5 @@ import { spawn, type ChildProcess } from "node:child_process"; -import { existsSync, readdirSync, readFileSync, realpathSync } from "node:fs"; +import { existsSync, lstatSync, readdirSync, readFileSync, realpathSync } from "node:fs"; import fs from "node:fs/promises"; import net from "node:net"; import { createHash, randomUUID } from "node:crypto"; @@ -157,6 +157,16 @@ function findWorkspaceRoot(startCwd: string) { } } +function isLinkedGitWorktreeCheckout(rootDir: string) { + const gitMetadataPath = path.join(rootDir, ".git"); + if (!existsSync(gitMetadataPath)) return false; + + const stat = lstatSync(gitMetadataPath); + if (!stat.isFile()) return false; + + return readFileSync(gitMetadataPath, "utf8").trimStart().startsWith("gitdir:"); +} + function discoverWorkspacePackagePaths(rootDir: string): Map { const packagePaths = new Map(); const ignoredDirNames = new Set([".git", ".paperclip", "dist", "node_modules"]); @@ -228,6 +238,7 @@ export async function ensureServerWorkspaceLinksCurrent( ) { const workspaceRoot = findWorkspaceRoot(startCwd); if (!workspaceRoot) return; + if (!isLinkedGitWorktreeCheckout(workspaceRoot)) return; const mismatches = findServerWorkspaceLinkMismatches(workspaceRoot); if (mismatches.length === 0) return; diff --git a/skills/paperclip/SKILL.md b/skills/paperclip/SKILL.md index 82dd0c38..0148248b 100644 --- a/skills/paperclip/SKILL.md +++ b/skills/paperclip/SKILL.md @@ -27,6 +27,8 @@ Manual local CLI mode (outside heartbeat runs): use `paperclipai agent local-cli Follow these steps every time you wake up: +**Scoped-wake fast path.** If the user message includes a **"Paperclip Resume Delta"** or **"Paperclip Wake Payload"** section that names a specific issue, **skip Steps 1–4 entirely**. Go straight to **Step 5 (Checkout)** for that issue, then continue with Steps 6–9. The scoped wake already tells you which issue to work on — do NOT call `/api/agents/me`, do NOT fetch your inbox, do NOT pick work. Just checkout, read the wake context, do the work, and update. + **Step 1 — Identity.** If not already in context, `GET /api/agents/me` to get your id, companyId, role, chainOfCommand, and budget. **Step 2 — Approval follow-up (when triggered).** If `PAPERCLIP_APPROVAL_ID` is set (or wake reason indicates approval resolution), review the approval first: