diff --git a/cli/src/commands/client/issue.ts b/cli/src/commands/client/issue.ts index 921c077e..afef1923 100644 --- a/cli/src/commands/client/issue.ts +++ b/cli/src/commands/client/issue.ts @@ -61,6 +61,7 @@ interface IssueUpdateOptions extends BaseClientOptions { interface IssueCommentOptions extends BaseClientOptions { body: string; reopen?: boolean; + resume?: boolean; } interface IssueCheckoutOptions extends BaseClientOptions { @@ -241,12 +242,14 @@ export function registerIssueCommands(program: Command): void { .argument("", "Issue ID") .requiredOption("--body ", "Comment body") .option("--reopen", "Reopen if issue is done/cancelled") + .option("--resume", "Request explicit follow-up and wake the assignee when resumable") .action(async (issueId: string, opts: IssueCommentOptions) => { try { const ctx = resolveCommandContext(opts); const payload = addIssueCommentSchema.parse({ body: opts.body, reopen: opts.reopen, + resume: opts.resume, }); const comment = await ctx.api.post(`/api/issues/${issueId}/comments`, payload); printOutput(comment, { json: ctx.json }); diff --git a/doc/SPEC-implementation.md b/doc/SPEC-implementation.md index 09c247f0..222044cb 100644 --- a/doc/SPEC-implementation.md +++ b/doc/SPEC-implementation.md @@ -37,7 +37,7 @@ These decisions close open questions from `SPEC.md` for V1. | Visibility | Full visibility to board and all agents in same company | | Communication | Tasks + comments only (no separate chat system) | | Task ownership | Single assignee; atomic checkout required for `in_progress` transition | -| Recovery | No automatic reassignment; work recovery stays manual/explicit | +| Recovery | No automatic reassignment; control-plane recovery may retry lost execution continuity once, then uses explicit recovery issues or human escalation | | Agent adapters | Built-in `process` and `http` adapters | | Auth | Mode-dependent human auth (`local_trusted` implicit board in current code; authenticated mode uses sessions), API keys for agents | | Budget period | Monthly UTC calendar window | @@ -395,7 +395,7 @@ Side effects: - entering `done` sets `completed_at` - entering `cancelled` sets `cancelled_at` -Detailed ownership, execution, blocker, and crash-recovery semantics are documented in `doc/execution-semantics.md`. +Detailed ownership, execution, blocker, active-run watchdog, and crash-recovery semantics are documented in `doc/execution-semantics.md`. ## 8.3 Approval Status diff --git a/doc/execution-semantics.md b/doc/execution-semantics.md index 9cfdf1b9..38e57bab 100644 --- a/doc/execution-semantics.md +++ b/doc/execution-semantics.md @@ -1,7 +1,7 @@ # Execution Semantics Status: Current implementation guide -Date: 2026-04-13 +Date: 2026-04-23 Audience: Product and engineering This document explains how Paperclip interprets issue assignment, issue status, execution runs, wakeups, parent/sub-issue structure, and blocker relationships. @@ -218,15 +218,81 @@ This is an active-work continuity recovery. Startup recovery and periodic recovery are different from normal wakeup delivery. -On startup and on the periodic recovery loop, Paperclip now does three things in sequence: +On startup and on the periodic recovery loop, Paperclip now does four things in sequence: 1. reap orphaned `running` runs 2. resume persisted `queued` runs 3. reconcile stranded assigned work +4. scan silent active runs and create or update explicit watchdog review issues -That last step is what closes the gap where issue state survives a crash but the wake/run path does not. +The stranded-work pass closes the gap where issue state survives a crash but the wake/run path does not. The silent-run scan covers the separate case where a live process exists but has stopped producing observable output. -## 10. What This Does Not Mean +## 10. Silent Active-Run Watchdog + +An active run can still be unhealthy even when its process is `running`. Paperclip treats prolonged output silence as a watchdog signal, not as proof that the run is failed. + +The recovery service owns this contract: + +- classify active-run output silence as `ok`, `suspicious`, `critical`, `snoozed`, or `not_applicable` +- collect bounded evidence from run logs, recent run events, child issues, and blockers +- preserve redaction and truncation before evidence is written to issue descriptions +- create at most one open `stale_active_run_evaluation` issue per run +- honor active snooze decisions before creating more review work +- build the `outputSilence` summary shown by live-run and active-run API responses + +Suspicious silence creates a medium-priority review issue for the selected recovery owner. Critical silence raises that review issue to high priority and blocks the source issue on the explicit evaluation task without cancelling the active process. + +Watchdog decisions are explicit operator/recovery-owner decisions: + +- `snooze` records an operator-chosen future quiet-until time and suppresses scan-created review work during that window +- `continue` records that the current evidence is acceptable, does not cancel or mutate the active run, and sets a 30-minute default re-arm window before the watchdog evaluates the still-silent run again +- `dismissed_false_positive` records why the review was not actionable + +Operators should prefer `snooze` for known time-bounded quiet periods. `continue` is only a short acknowledgement of the current evidence; if the run remains silent after the re-arm window, the periodic watchdog scan can create or update review work again. + +The board can record watchdog decisions. The assigned owner of the watchdog evaluation issue can also record them. Other agents cannot. + +## 11. Auto-Recover vs Explicit Recovery vs Human Escalation + +Paperclip uses three different recovery outcomes, depending on how much it can safely infer. + +### Auto-Recover + +Auto-recovery is allowed when ownership is clear and the control plane only lost execution continuity. + +Examples: + +- requeue one dispatch wake for an assigned `todo` issue whose latest run failed, timed out, or was cancelled +- requeue one continuation wake for an assigned `in_progress` issue whose live execution path disappeared +- assign an orphan blocker back to its creator when that blocker is already preventing other work + +Auto-recovery preserves the existing owner. It does not choose a replacement agent. + +### Explicit Recovery Issue + +Paperclip creates an explicit recovery issue when the system can identify a problem but cannot safely complete the work itself. + +Examples: + +- automatic stranded-work retry was already exhausted +- a dependency graph has an invalid/uninvokable owner, unassigned blocker, or invalid review participant +- an active run is silent past the watchdog threshold + +The source issue remains visible and blocked on the recovery issue when blocking is necessary for correctness. The recovery owner must restore a live path, resolve the source issue manually, or record the reason it is a false positive. + +### Human Escalation + +Human escalation is required when the next safe action depends on board judgment, budget/approval policy, or information unavailable to the control plane. + +Examples: + +- all candidate recovery owners are paused, terminated, pending approval, or budget-blocked +- the issue is human-owned rather than agent-owned +- the run is intentionally quiet but needs an operator decision before cancellation or continuation + +In these cases Paperclip should leave a visible issue/comment trail instead of silently retrying. + +## 12. What This Does Not Mean These semantics do not change V1 into an auto-reassignment system. @@ -240,9 +306,10 @@ The recovery model is intentionally conservative: - preserve ownership - retry once when the control plane lost execution continuity +- create explicit recovery work when the system can identify a bounded recovery owner/action - escalate visibly when the system cannot safely keep going -## 11. Practical Interpretation +## 13. Practical Interpretation For a board operator, the intended meaning is: diff --git a/packages/adapter-utils/src/server-utils.ts b/packages/adapter-utils/src/server-utils.ts index a66ccdc0..297d57c7 100644 --- a/packages/adapter-utils/src/server-utils.ts +++ b/packages/adapter-utils/src/server-utils.ts @@ -92,6 +92,7 @@ export const DEFAULT_PAPERCLIP_AGENT_PROMPT_TEMPLATE = [ "- If woken by a human comment on a dependency-blocked issue, respond or triage the comment without treating the blocked deliverable work as unblocked.", "- Create child issues directly when you know what needs to be done; use issue-thread interactions when the board/user must choose suggested tasks, answer structured questions, or confirm a proposal.", "- To ask for that input, create an interaction on the current issue with POST /api/issues/{issueId}/interactions using kind suggest_tasks, ask_user_questions, or request_confirmation. Use continuationPolicy wake_assignee when you need to resume after a response; for request_confirmation this resumes only after acceptance.", + "- When you intentionally restart follow-up work on a completed assigned issue, include structured `resume: true` with the POST /api/issues/{issueId}/comments or PATCH /api/issues/{issueId} comment payload. Generic agent comments on closed issues are inert by default.", "- For plan approval, update the plan document first, then create request_confirmation targeting the latest plan revision with idempotencyKey confirmation:{issueId}:plan:{revisionId}. Wait for acceptance before creating implementation subtasks, and create a fresh confirmation after superseding board/user comments if approval is still needed.", "- If blocked, mark the issue blocked and name the unblock owner and action.", "- Respect budget, pause/cancel, approval gates, and company boundaries.", diff --git a/packages/db/src/migrations/0069_liveness_recovery_dedupe.sql b/packages/db/src/migrations/0069_liveness_recovery_dedupe.sql new file mode 100644 index 00000000..bcd4cbc8 --- /dev/null +++ b/packages/db/src/migrations/0069_liveness_recovery_dedupe.sql @@ -0,0 +1,13 @@ +CREATE UNIQUE INDEX IF NOT EXISTS "issues_active_liveness_recovery_incident_uq" + ON "issues" USING btree ("company_id","origin_kind","origin_id") + WHERE "origin_kind" = 'harness_liveness_escalation' + AND "origin_id" IS NOT NULL + AND "hidden_at" IS NULL + AND "status" NOT IN ('done', 'cancelled'); +--> statement-breakpoint +CREATE UNIQUE INDEX IF NOT EXISTS "issues_active_liveness_recovery_leaf_uq" + ON "issues" USING btree ("company_id","origin_kind","origin_fingerprint") + WHERE "origin_kind" = 'harness_liveness_escalation' + AND "origin_fingerprint" <> 'default' + AND "hidden_at" IS NULL + AND "status" NOT IN ('done', 'cancelled'); diff --git a/packages/db/src/migrations/0070_active_run_output_watchdog.sql b/packages/db/src/migrations/0070_active_run_output_watchdog.sql new file mode 100644 index 00000000..47d68fe5 --- /dev/null +++ b/packages/db/src/migrations/0070_active_run_output_watchdog.sql @@ -0,0 +1,70 @@ +ALTER TABLE "heartbeat_runs" ADD COLUMN IF NOT EXISTS "last_output_at" timestamp with time zone; +--> statement-breakpoint +ALTER TABLE "heartbeat_runs" ADD COLUMN IF NOT EXISTS "last_output_seq" integer DEFAULT 0 NOT NULL; +--> statement-breakpoint +ALTER TABLE "heartbeat_runs" ADD COLUMN IF NOT EXISTS "last_output_stream" text; +--> statement-breakpoint +ALTER TABLE "heartbeat_runs" ADD COLUMN IF NOT EXISTS "last_output_bytes" bigint; +--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "heartbeat_runs_company_status_last_output_idx" + ON "heartbeat_runs" USING btree ("company_id","status","last_output_at"); +--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "heartbeat_runs_company_status_process_started_idx" + ON "heartbeat_runs" USING btree ("company_id","status","process_started_at"); +--> statement-breakpoint +CREATE TABLE IF NOT EXISTS "heartbeat_run_watchdog_decisions" ( + "id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL, + "company_id" uuid NOT NULL, + "run_id" uuid NOT NULL, + "evaluation_issue_id" uuid, + "decision" text NOT NULL, + "snoozed_until" timestamp with time zone, + "reason" text, + "created_by_agent_id" uuid, + "created_by_user_id" text, + "created_by_run_id" uuid, + "created_at" timestamp with time zone DEFAULT now() NOT NULL +); +--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_company_id_companies_id_fk" FOREIGN KEY ("company_id") REFERENCES "public"."companies"("id") ON DELETE no action ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$; +--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_run_id_heartbeat_runs_id_fk" FOREIGN KEY ("run_id") REFERENCES "public"."heartbeat_runs"("id") ON DELETE cascade ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$; +--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_evaluation_issue_id_issues_id_fk" FOREIGN KEY ("evaluation_issue_id") REFERENCES "public"."issues"("id") ON DELETE set null ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$; +--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_created_by_agent_id_agents_id_fk" FOREIGN KEY ("created_by_agent_id") REFERENCES "public"."agents"("id") ON DELETE set null ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$; +--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_created_by_run_id_heartbeat_runs_id_fk" FOREIGN KEY ("created_by_run_id") REFERENCES "public"."heartbeat_runs"("id") ON DELETE set null ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$; +--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "heartbeat_run_watchdog_decisions_company_run_created_idx" + ON "heartbeat_run_watchdog_decisions" USING btree ("company_id","run_id","created_at"); +--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "heartbeat_run_watchdog_decisions_company_run_snooze_idx" + ON "heartbeat_run_watchdog_decisions" USING btree ("company_id","run_id","snoozed_until"); +--> statement-breakpoint +CREATE UNIQUE INDEX IF NOT EXISTS "issues_active_stale_run_evaluation_uq" + ON "issues" USING btree ("company_id","origin_kind","origin_id") + WHERE "origin_kind" = 'stale_active_run_evaluation' + AND "origin_id" IS NOT NULL + AND "hidden_at" IS NULL + AND "status" NOT IN ('done', 'cancelled'); diff --git a/packages/db/src/migrations/meta/_journal.json b/packages/db/src/migrations/meta/_journal.json index d429f9e0..765875c4 100644 --- a/packages/db/src/migrations/meta/_journal.json +++ b/packages/db/src/migrations/meta/_journal.json @@ -484,6 +484,20 @@ "when": 1776959400000, "tag": "0068_environment_local_driver_unique", "breakpoints": true + }, + { + "idx": 69, + "version": "7", + "when": 1776780003000, + "tag": "0069_liveness_recovery_dedupe", + "breakpoints": true + }, + { + "idx": 70, + "version": "7", + "when": 1776780004000, + "tag": "0070_active_run_output_watchdog", + "breakpoints": true } ] } diff --git a/packages/db/src/schema/heartbeat_run_watchdog_decisions.ts b/packages/db/src/schema/heartbeat_run_watchdog_decisions.ts new file mode 100644 index 00000000..46e17a51 --- /dev/null +++ b/packages/db/src/schema/heartbeat_run_watchdog_decisions.ts @@ -0,0 +1,34 @@ +import { index, pgTable, text, timestamp, uuid } from "drizzle-orm/pg-core"; +import { agents } from "./agents.js"; +import { companies } from "./companies.js"; +import { heartbeatRuns } from "./heartbeat_runs.js"; +import { issues } from "./issues.js"; + +export const heartbeatRunWatchdogDecisions = pgTable( + "heartbeat_run_watchdog_decisions", + { + id: uuid("id").primaryKey().defaultRandom(), + companyId: uuid("company_id").notNull().references(() => companies.id), + runId: uuid("run_id").notNull().references(() => heartbeatRuns.id, { onDelete: "cascade" }), + evaluationIssueId: uuid("evaluation_issue_id").references(() => issues.id, { onDelete: "set null" }), + decision: text("decision").notNull(), + snoozedUntil: timestamp("snoozed_until", { withTimezone: true }), + reason: text("reason"), + createdByAgentId: uuid("created_by_agent_id").references(() => agents.id, { onDelete: "set null" }), + createdByUserId: text("created_by_user_id"), + createdByRunId: uuid("created_by_run_id").references(() => heartbeatRuns.id, { onDelete: "set null" }), + createdAt: timestamp("created_at", { withTimezone: true }).notNull().defaultNow(), + }, + (table) => ({ + companyRunCreatedIdx: index("heartbeat_run_watchdog_decisions_company_run_created_idx").on( + table.companyId, + table.runId, + table.createdAt, + ), + companyRunSnoozeIdx: index("heartbeat_run_watchdog_decisions_company_run_snooze_idx").on( + table.companyId, + table.runId, + table.snoozedUntil, + ), + }), +); diff --git a/packages/db/src/schema/heartbeat_runs.ts b/packages/db/src/schema/heartbeat_runs.ts index 4010e2b6..c975892a 100644 --- a/packages/db/src/schema/heartbeat_runs.ts +++ b/packages/db/src/schema/heartbeat_runs.ts @@ -34,6 +34,10 @@ export const heartbeatRuns = pgTable( processPid: integer("process_pid"), processGroupId: integer("process_group_id"), processStartedAt: timestamp("process_started_at", { withTimezone: true }), + lastOutputAt: timestamp("last_output_at", { withTimezone: true }), + lastOutputSeq: integer("last_output_seq").notNull().default(0), + lastOutputStream: text("last_output_stream"), + lastOutputBytes: bigint("last_output_bytes", { mode: "number" }), retryOfRunId: uuid("retry_of_run_id").references((): AnyPgColumn => heartbeatRuns.id, { onDelete: "set null", }), @@ -64,5 +68,15 @@ export const heartbeatRuns = pgTable( table.livenessState, table.createdAt, ), + companyStatusLastOutputIdx: index("heartbeat_runs_company_status_last_output_idx").on( + table.companyId, + table.status, + table.lastOutputAt, + ), + companyStatusProcessStartedIdx: index("heartbeat_runs_company_status_process_started_idx").on( + table.companyId, + table.status, + table.processStartedAt, + ), }), ); diff --git a/packages/db/src/schema/index.ts b/packages/db/src/schema/index.ts index fcf2ecc9..322a326d 100644 --- a/packages/db/src/schema/index.ts +++ b/packages/db/src/schema/index.ts @@ -53,6 +53,7 @@ export { documentRevisions } from "./document_revisions.js"; export { issueDocuments } from "./issue_documents.js"; export { heartbeatRuns } from "./heartbeat_runs.js"; export { heartbeatRunEvents } from "./heartbeat_run_events.js"; +export { heartbeatRunWatchdogDecisions } from "./heartbeat_run_watchdog_decisions.js"; export { costEvents } from "./cost_events.js"; export { financeEvents } from "./finance_events.js"; export { approvals } from "./approvals.js"; diff --git a/packages/db/src/schema/issues.ts b/packages/db/src/schema/issues.ts index d4dae91a..d1e50656 100644 --- a/packages/db/src/schema/issues.ts +++ b/packages/db/src/schema/issues.ts @@ -91,5 +91,29 @@ export const issues = pgTable( and ${table.executionRunId} is not null and ${table.status} in ('backlog', 'todo', 'in_progress', 'in_review', 'blocked')`, ), + activeLivenessRecoveryIncidentIdx: uniqueIndex("issues_active_liveness_recovery_incident_uq") + .on(table.companyId, table.originKind, table.originId) + .where( + sql`${table.originKind} = 'harness_liveness_escalation' + and ${table.originId} is not null + and ${table.hiddenAt} is null + and ${table.status} not in ('done', 'cancelled')`, + ), + activeLivenessRecoveryLeafIdx: uniqueIndex("issues_active_liveness_recovery_leaf_uq") + .on(table.companyId, table.originKind, table.originFingerprint) + .where( + sql`${table.originKind} = 'harness_liveness_escalation' + and ${table.originFingerprint} <> 'default' + and ${table.hiddenAt} is null + and ${table.status} not in ('done', 'cancelled')`, + ), + activeStaleRunEvaluationIdx: uniqueIndex("issues_active_stale_run_evaluation_uq") + .on(table.companyId, table.originKind, table.originId) + .where( + sql`${table.originKind} = 'stale_active_run_evaluation' + and ${table.originId} is not null + and ${table.hiddenAt} is null + and ${table.status} not in ('done', 'cancelled')`, + ), }), ); diff --git a/packages/db/src/test-embedded-postgres.ts b/packages/db/src/test-embedded-postgres.ts index 04fa642d..11ab9669 100644 --- a/packages/db/src/test-embedded-postgres.ts +++ b/packages/db/src/test-embedded-postgres.ts @@ -33,77 +33,56 @@ export type EmbeddedPostgresTestDatabase = { let embeddedPostgresSupportPromise: Promise | null = null; +const DEFAULT_PAPERCLIP_EMBEDDED_POSTGRES_PORT = 54329; + +function getReservedTestPorts(): Set { + const configuredPorts = [ + DEFAULT_PAPERCLIP_EMBEDDED_POSTGRES_PORT, + Number.parseInt(process.env.PAPERCLIP_EMBEDDED_POSTGRES_PORT ?? "", 10), + ...String(process.env.PAPERCLIP_TEST_POSTGRES_RESERVED_PORTS ?? "") + .split(",") + .map((value) => Number.parseInt(value.trim(), 10)), + ]; + return new Set(configuredPorts.filter((port) => Number.isInteger(port) && port > 0 && port <= 65535)); +} + async function getEmbeddedPostgresCtor(): Promise { const mod = await import("embedded-postgres"); return mod.default as EmbeddedPostgresCtor; } async function getAvailablePort(): Promise { - return await new Promise((resolve, reject) => { - const server = net.createServer(); - server.unref(); - server.on("error", reject); - server.listen(0, "127.0.0.1", () => { - const address = server.address(); - if (!address || typeof address === "string") { - server.close(() => reject(new Error("Failed to allocate test port"))); - return; - } - const { port } = address; - server.close((error) => { - if (error) reject(error); - else resolve(port); + const reservedPorts = getReservedTestPorts(); + for (let attempt = 0; attempt < 20; attempt += 1) { + const port = await new Promise((resolve, reject) => { + const server = net.createServer(); + server.unref(); + server.on("error", reject); + server.listen(0, "127.0.0.1", () => { + const address = server.address(); + if (!address || typeof address === "string") { + server.close(() => reject(new Error("Failed to allocate test port"))); + return; + } + const { port } = address; + server.close((error) => { + if (error) reject(error); + else resolve(port); + }); }); }); - }); -} -function formatEmbeddedPostgresError(error: unknown): string { - if (error instanceof Error && error.message.length > 0) return error.message; - if (typeof error === "string" && error.length > 0) return error; - return "embedded Postgres startup failed"; -} - -async function probeEmbeddedPostgresSupport(): Promise { - const dataDir = fs.mkdtempSync(path.join(os.tmpdir(), "paperclip-embedded-postgres-probe-")); - const port = await getAvailablePort(); - const EmbeddedPostgres = await getEmbeddedPostgresCtor(); - const instance = new EmbeddedPostgres({ - databaseDir: dataDir, - user: "paperclip", - password: "paperclip", - port, - persistent: true, - initdbFlags: ["--encoding=UTF8", "--locale=C", "--lc-messages=C"], - onLog: () => {}, - onError: () => {}, - }); - - try { - await instance.initialise(); - await instance.start(); - return { supported: true }; - } catch (error) { - return { - supported: false, - reason: formatEmbeddedPostgresError(error), - }; - } finally { - await instance.stop().catch(() => {}); - fs.rmSync(dataDir, { recursive: true, force: true }); + if (!reservedPorts.has(port)) return port; } + + throw new Error( + `Failed to allocate embedded Postgres test port outside reserved Paperclip ports: ${[ + ...reservedPorts, + ].join(", ")}`, + ); } -export async function getEmbeddedPostgresTestSupport(): Promise { - if (!embeddedPostgresSupportPromise) { - embeddedPostgresSupportPromise = probeEmbeddedPostgresSupport(); - } - return await embeddedPostgresSupportPromise; -} - -export async function startEmbeddedPostgresTestDatabase( - tempDirPrefix: string, -): Promise { +async function createEmbeddedPostgresTestInstance(tempDirPrefix: string) { const dataDir = fs.mkdtempSync(path.join(os.tmpdir(), tempDirPrefix)); const port = await getAvailablePort(); const EmbeddedPostgres = await getEmbeddedPostgresCtor(); @@ -118,6 +97,51 @@ export async function startEmbeddedPostgresTestDatabase( onError: () => {}, }); + return { dataDir, port, instance }; +} + +function cleanupEmbeddedPostgresTestDirs(dataDir: string) { + fs.rmSync(dataDir, { recursive: true, force: true }); +} + +function formatEmbeddedPostgresError(error: unknown): string { + if (error instanceof Error && error.message.length > 0) return error.message; + if (typeof error === "string" && error.length > 0) return error; + return "embedded Postgres startup failed"; +} + +async function probeEmbeddedPostgresSupport(): Promise { + const { dataDir, instance } = await createEmbeddedPostgresTestInstance( + "paperclip-embedded-postgres-probe-", + ); + + try { + await instance.initialise(); + await instance.start(); + return { supported: true }; + } catch (error) { + return { + supported: false, + reason: formatEmbeddedPostgresError(error), + }; + } finally { + await instance.stop().catch(() => {}); + cleanupEmbeddedPostgresTestDirs(dataDir); + } +} + +export async function getEmbeddedPostgresTestSupport(): Promise { + if (!embeddedPostgresSupportPromise) { + embeddedPostgresSupportPromise = probeEmbeddedPostgresSupport(); + } + return await embeddedPostgresSupportPromise; +} + +export async function startEmbeddedPostgresTestDatabase( + tempDirPrefix: string, +): Promise { + const { dataDir, port, instance } = await createEmbeddedPostgresTestInstance(tempDirPrefix); + try { await instance.initialise(); await instance.start(); @@ -131,12 +155,12 @@ export async function startEmbeddedPostgresTestDatabase( connectionString, cleanup: async () => { await instance.stop().catch(() => {}); - fs.rmSync(dataDir, { recursive: true, force: true }); + cleanupEmbeddedPostgresTestDirs(dataDir); }, }; } catch (error) { await instance.stop().catch(() => {}); - fs.rmSync(dataDir, { recursive: true, force: true }); + cleanupEmbeddedPostgresTestDirs(dataDir); throw new Error( `Failed to start embedded PostgreSQL test database: ${formatEmbeddedPostgresError(error)}`, ); diff --git a/packages/mcp-server/src/tools.ts b/packages/mcp-server/src/tools.ts index f101f3fe..54ca904f 100644 --- a/packages/mcp-server/src/tools.ts +++ b/packages/mcp-server/src/tools.ts @@ -450,7 +450,7 @@ export function createToolDefinitions(client: PaperclipApiClient): ToolDefinitio ), makeTool( "paperclipUpdateIssue", - "Patch an issue, optionally including a comment", + "Patch an issue, optionally including a comment; include resume=true when intentionally requesting follow-up on resumable closed work", updateIssueToolSchema, async ({ issueId, ...body }) => client.requestJson("PATCH", `/issues/${encodeURIComponent(issueId)}`, { body }), @@ -475,7 +475,7 @@ export function createToolDefinitions(client: PaperclipApiClient): ToolDefinitio ), makeTool( "paperclipAddComment", - "Add a comment to an issue", + "Add a comment to an issue; include resume=true when intentionally requesting follow-up on resumable closed work", addCommentToolSchema, async ({ issueId, ...body }) => client.requestJson("POST", `/issues/${encodeURIComponent(issueId)}/comments`, { body }), diff --git a/packages/shared/src/constants.ts b/packages/shared/src/constants.ts index c712117b..df99bb33 100644 --- a/packages/shared/src/constants.ts +++ b/packages/shared/src/constants.ts @@ -162,7 +162,7 @@ export const ISSUE_THREAD_INTERACTION_CONTINUATION_POLICIES = [ export type IssueThreadInteractionContinuationPolicy = (typeof ISSUE_THREAD_INTERACTION_CONTINUATION_POLICIES)[number]; -export const ISSUE_ORIGIN_KINDS = ["manual", "routine_execution"] as const; +export const ISSUE_ORIGIN_KINDS = ["manual", "routine_execution", "stale_active_run_evaluation"] as const; export type BuiltInIssueOriginKind = (typeof ISSUE_ORIGIN_KINDS)[number]; export type PluginIssueOriginKind = `plugin:${string}`; export type IssueOriginKind = BuiltInIssueOriginKind | PluginIssueOriginKind; diff --git a/packages/shared/src/index.ts b/packages/shared/src/index.ts index c661bdae..2814e410 100644 --- a/packages/shared/src/index.ts +++ b/packages/shared/src/index.ts @@ -324,6 +324,9 @@ export type { IssueWorkProductReviewState, Issue, IssueAssigneeAdapterOverrides, + IssueBlockerAttention, + IssueBlockerAttentionReason, + IssueBlockerAttentionState, IssueReferenceSource, IssueRelatedWorkItem, IssueRelatedWorkSummary, diff --git a/packages/shared/src/types/heartbeat.ts b/packages/shared/src/types/heartbeat.ts index 98073b5e..5aee87af 100644 --- a/packages/shared/src/types/heartbeat.ts +++ b/packages/shared/src/types/heartbeat.ts @@ -37,6 +37,10 @@ export interface HeartbeatRun { processPid: number | null; processGroupId?: number | null; processStartedAt: Date | null; + lastOutputAt: Date | null; + lastOutputSeq: number; + lastOutputStream: "stdout" | "stderr" | null; + lastOutputBytes: number | null; retryOfRunId: string | null; processLossRetryCount: number; scheduledRetryAt?: Date | null; @@ -51,6 +55,28 @@ export interface HeartbeatRun { contextSnapshot: Record | null; createdAt: Date; updatedAt: Date; + outputSilence?: HeartbeatRunOutputSilence; +} + +export type HeartbeatRunOutputSilenceLevel = + | "not_applicable" + | "ok" + | "suspicious" + | "critical" + | "snoozed"; + +export interface HeartbeatRunOutputSilence { + lastOutputAt: Date | string | null; + lastOutputSeq: number; + lastOutputStream: "stdout" | "stderr" | null; + silenceStartedAt: Date | string | null; + silenceAgeMs: number | null; + level: HeartbeatRunOutputSilenceLevel; + suspicionThresholdMs: number; + criticalThresholdMs: number; + snoozedUntil: Date | string | null; + evaluationIssueId: string | null; + evaluationIssueIdentifier: string | null; } export interface AgentWakeupSkipped { diff --git a/packages/shared/src/types/index.ts b/packages/shared/src/types/index.ts index f3336dce..fd4de78f 100644 --- a/packages/shared/src/types/index.ts +++ b/packages/shared/src/types/index.ts @@ -118,6 +118,9 @@ export type { export type { Issue, IssueAssigneeAdapterOverrides, + IssueBlockerAttention, + IssueBlockerAttentionReason, + IssueBlockerAttentionState, IssueReferenceSource, IssueRelatedWorkItem, IssueRelatedWorkSummary, diff --git a/packages/shared/src/types/instance.ts b/packages/shared/src/types/instance.ts index 41686664..73e690ef 100644 --- a/packages/shared/src/types/instance.ts +++ b/packages/shared/src/types/instance.ts @@ -27,6 +27,7 @@ export interface InstanceExperimentalSettings { enableEnvironments: boolean; enableIsolatedWorkspaces: boolean; autoRestartDevServerWhenIdle: boolean; + enableIssueGraphLivenessAutoRecovery: boolean; } export interface InstanceSettings { diff --git a/packages/shared/src/types/issue.ts b/packages/shared/src/types/issue.ts index b49094d0..deeae205 100644 --- a/packages/shared/src/types/issue.ts +++ b/packages/shared/src/types/issue.ts @@ -116,6 +116,24 @@ export interface IssueRelationIssueSummary { priority: IssuePriority; assigneeAgentId: string | null; assigneeUserId: string | null; + terminalBlockers?: IssueRelationIssueSummary[]; +} + +export type IssueBlockerAttentionState = "none" | "covered" | "needs_attention"; + +export type IssueBlockerAttentionReason = + | "active_child" + | "active_dependency" + | "attention_required" + | null; + +export interface IssueBlockerAttention { + state: IssueBlockerAttentionState; + reason: IssueBlockerAttentionReason; + unresolvedBlockerCount: number; + coveredBlockerCount: number; + attentionBlockerCount: number; + sampleBlockerIdentifier: string | null; } export interface IssueRelation { @@ -242,6 +260,7 @@ export interface Issue { labels?: IssueLabel[]; blockedBy?: IssueRelationIssueSummary[]; blocks?: IssueRelationIssueSummary[]; + blockerAttention?: IssueBlockerAttention; relatedWork?: IssueRelatedWorkSummary; referencedIssueIdentifiers?: string[]; planDocument?: IssueDocument | null; @@ -267,6 +286,7 @@ export interface IssueComment { authorAgentId: string | null; authorUserId: string | null; body: string; + followUpRequested?: boolean; createdAt: Date; updatedAt: Date; } diff --git a/packages/shared/src/validators/instance.ts b/packages/shared/src/validators/instance.ts index f7638a88..94d78226 100644 --- a/packages/shared/src/validators/instance.ts +++ b/packages/shared/src/validators/instance.ts @@ -36,6 +36,7 @@ export const instanceExperimentalSettingsSchema = z.object({ enableEnvironments: z.boolean().default(false), enableIsolatedWorkspaces: z.boolean().default(false), autoRestartDevServerWhenIdle: z.boolean().default(false), + enableIssueGraphLivenessAutoRecovery: z.boolean().default(false), }).strict(); export const patchInstanceExperimentalSettingsSchema = instanceExperimentalSettingsSchema.partial(); diff --git a/packages/shared/src/validators/issue.ts b/packages/shared/src/validators/issue.ts index 0db2e1bc..b4451cf9 100644 --- a/packages/shared/src/validators/issue.ts +++ b/packages/shared/src/validators/issue.ts @@ -171,6 +171,7 @@ export const updateIssueSchema = createIssueSchema.partial().extend({ comment: z.string().min(1).optional(), reviewRequest: issueReviewRequestSchema.optional().nullable(), reopen: z.boolean().optional(), + resume: z.boolean().optional(), interrupt: z.boolean().optional(), hiddenAt: z.string().datetime().nullable().optional(), }); @@ -188,6 +189,7 @@ export type CheckoutIssue = z.infer; export const addIssueCommentSchema = z.object({ body: z.string().min(1), reopen: z.boolean().optional(), + resume: z.boolean().optional(), interrupt: z.boolean().optional(), }); diff --git a/server/src/__tests__/README.md b/server/src/__tests__/README.md new file mode 100644 index 00000000..86b8c2e2 --- /dev/null +++ b/server/src/__tests__/README.md @@ -0,0 +1,10 @@ +# Server Tests + +Server tests that need a real PostgreSQL process must use +`./helpers/embedded-postgres.ts` instead of constructing `embedded-postgres` +directly. + +The shared helper creates a throwaway data directory and a reserved-safe +loopback port for each test database. This protects the live Paperclip +control-plane Postgres from server vitest runs; see PAP-2033 for the incident +that introduced this guard. diff --git a/server/src/__tests__/activity-service.test.ts b/server/src/__tests__/activity-service.test.ts index 1cd46483..6948d838 100644 --- a/server/src/__tests__/activity-service.test.ts +++ b/server/src/__tests__/activity-service.test.ts @@ -424,7 +424,7 @@ describeEmbeddedPostgres("activity service", () => { expect(backfilledRun).toMatchObject({ runId, livenessState: "plan_only", - livenessReason: "Run described future work without concrete action evidence", + livenessReason: "Run described runnable future work without concrete action evidence", lastUsefulActionAt: null, }); }); @@ -530,7 +530,7 @@ describeEmbeddedPostgres("activity service", () => { expect(backfilledRun).toMatchObject({ runId, livenessState: "plan_only", - livenessReason: "Run described future work without concrete action evidence", + livenessReason: "Run described runnable future work without concrete action evidence", lastUsefulActionAt: null, }); }); diff --git a/server/src/__tests__/agent-live-run-routes.test.ts b/server/src/__tests__/agent-live-run-routes.test.ts index 0fe559a1..0bfa209c 100644 --- a/server/src/__tests__/agent-live-run-routes.test.ts +++ b/server/src/__tests__/agent-live-run-routes.test.ts @@ -10,6 +10,7 @@ const mockHeartbeatService = vi.hoisted(() => ({ buildRunOutputSilence: vi.fn(), getRunIssueSummary: vi.fn(), getActiveRunIssueSummaryForAgent: vi.fn(), + buildRunOutputSilence: vi.fn(), getRunLogAccess: vi.fn(), readLog: vi.fn(), })); @@ -173,6 +174,7 @@ describe("agent live run routes", () => { issueId: "issue-1", }); mockHeartbeatService.getActiveRunIssueSummaryForAgent.mockResolvedValue(null); + mockHeartbeatService.buildRunOutputSilence.mockResolvedValue(null); mockHeartbeatService.getRunLogAccess.mockResolvedValue({ id: "run-1", companyId: "company-1", @@ -209,6 +211,7 @@ describe("agent live run routes", () => { issueId: "issue-1", agentName: "Builder", adapterType: "codex_local", + outputSilence: null, }); expect(res.body).not.toHaveProperty("resultJson"); expect(res.body).not.toHaveProperty("contextSnapshot"); diff --git a/server/src/__tests__/feedback-service.test.ts b/server/src/__tests__/feedback-service.test.ts index 7a3a36d8..8a979d84 100644 --- a/server/src/__tests__/feedback-service.test.ts +++ b/server/src/__tests__/feedback-service.test.ts @@ -1,6 +1,5 @@ import { randomUUID } from "node:crypto"; import fs from "node:fs"; -import net from "node:net"; import os from "node:os"; import path from "node:path"; import { eq } from "drizzle-orm"; @@ -8,14 +7,12 @@ import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from "vitest import { writePaperclipSkillSyncPreference } from "@paperclipai/adapter-utils/server-utils"; import { agents, - applyPendingMigrations, companies, companySkills, costEvents, createDb, documents, documentRevisions, - ensurePostgresDatabase, feedbackExports, feedbackVotes, heartbeatRuns, @@ -25,72 +22,7 @@ import { issues, } from "@paperclipai/db"; import { feedbackService } from "../services/feedback.ts"; - -type EmbeddedPostgresInstance = { - initialise(): Promise; - start(): Promise; - stop(): Promise; -}; - -type EmbeddedPostgresCtor = new (opts: { - databaseDir: string; - user: string; - password: string; - port: number; - persistent: boolean; - initdbFlags?: string[]; - onLog?: (message: unknown) => void; - onError?: (message: unknown) => void; -}) => EmbeddedPostgresInstance; - -async function getEmbeddedPostgresCtor(): Promise { - const mod = await import("embedded-postgres"); - return mod.default as EmbeddedPostgresCtor; -} - -async function getAvailablePort(): Promise { - return await new Promise((resolve, reject) => { - const server = net.createServer(); - server.unref(); - server.on("error", reject); - server.listen(0, "127.0.0.1", () => { - const address = server.address(); - if (!address || typeof address === "string") { - server.close(() => reject(new Error("Failed to allocate test port"))); - return; - } - const { port } = address; - server.close((error) => { - if (error) reject(error); - else resolve(port); - }); - }); - }); -} - -async function startTempDatabase() { - const dataDir = fs.mkdtempSync(path.join(os.tmpdir(), "paperclip-feedback-service-")); - const port = await getAvailablePort(); - const EmbeddedPostgres = await getEmbeddedPostgresCtor(); - const instance = new EmbeddedPostgres({ - databaseDir: dataDir, - user: "paperclip", - password: "paperclip", - port, - persistent: true, - initdbFlags: ["--encoding=UTF8", "--locale=C", "--lc-messages=C"], - onLog: () => {}, - onError: () => {}, - }); - await instance.initialise(); - await instance.start(); - - const adminConnectionString = `postgres://paperclip:paperclip@127.0.0.1:${port}/postgres`; - await ensurePostgresDatabase(adminConnectionString, "paperclip"); - const connectionString = `postgres://paperclip:paperclip@127.0.0.1:${port}/paperclip`; - await applyPendingMigrations(connectionString); - return { connectionString, dataDir, instance }; -} +import { startEmbeddedPostgresTestDatabase } from "./helpers/embedded-postgres.ts"; async function closeDbClient(db: ReturnType | undefined) { await db?.$client?.end?.({ timeout: 0 }); @@ -99,17 +31,15 @@ async function closeDbClient(db: ReturnType | undefined) { describe("feedbackService.saveIssueVote", () => { let db!: ReturnType; let svc!: ReturnType; - let instance: EmbeddedPostgresInstance | null = null; - let dataDir = ""; + let tempDb: Awaited> | null = null; let tempDirs: string[] = []; beforeAll(async () => { - const started = await startTempDatabase(); + const started = await startEmbeddedPostgresTestDatabase("paperclip-feedback-service-"); db = createDb(started.connectionString); svc = feedbackService(db); - instance = started.instance; - dataDir = started.dataDir; - }, 20_000); + tempDb = started; + }, 120_000); afterEach(async () => { await db.delete(feedbackExports); @@ -134,10 +64,7 @@ describe("feedbackService.saveIssueVote", () => { afterAll(async () => { await closeDbClient(db); - await instance?.stop(); - if (dataDir) { - fs.rmSync(dataDir, { recursive: true, force: true }); - } + await tempDb?.cleanup(); }); async function seedIssueWithAgentComment() { diff --git a/server/src/__tests__/heartbeat-active-run-output-watchdog.test.ts b/server/src/__tests__/heartbeat-active-run-output-watchdog.test.ts new file mode 100644 index 00000000..45ba2c5d --- /dev/null +++ b/server/src/__tests__/heartbeat-active-run-output-watchdog.test.ts @@ -0,0 +1,549 @@ +import { randomUUID } from "node:crypto"; +import { and, eq, sql } from "drizzle-orm"; +import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from "vitest"; +import { + agents, + companies, + createDb, + heartbeatRunWatchdogDecisions, + heartbeatRuns, + issueRelations, + issues, +} from "@paperclipai/db"; +import { + getEmbeddedPostgresTestSupport, + startEmbeddedPostgresTestDatabase, +} from "./helpers/embedded-postgres.js"; +import { + ACTIVE_RUN_OUTPUT_CONTINUE_REARM_MS, + ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS, + ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS, + heartbeatService, +} from "../services/heartbeat.ts"; +import { recoveryService } from "../services/recovery/service.ts"; +import { getRunLogStore } from "../services/run-log-store.ts"; + +const mockAdapterExecute = vi.hoisted(() => + vi.fn(async () => ({ + exitCode: 0, + signal: null, + timedOut: false, + errorMessage: null, + summary: "Acknowledged stale-run evaluation.", + provider: "test", + model: "test-model", + })), +); + +vi.mock("../telemetry.ts", () => ({ + getTelemetryClient: () => ({ track: vi.fn() }), +})); + +vi.mock("@paperclipai/shared/telemetry", async () => { + const actual = await vi.importActual( + "@paperclipai/shared/telemetry", + ); + return { + ...actual, + trackAgentFirstHeartbeat: vi.fn(), + }; +}); + +vi.mock("../adapters/index.ts", async () => { + const actual = await vi.importActual("../adapters/index.ts"); + return { + ...actual, + getServerAdapter: vi.fn(() => ({ + supportsLocalAgentJwt: false, + execute: mockAdapterExecute, + })), + }; +}); + +const embeddedPostgresSupport = await getEmbeddedPostgresTestSupport(); +const describeEmbeddedPostgres = embeddedPostgresSupport.supported ? describe : describe.skip; + +if (!embeddedPostgresSupport.supported) { + console.warn( + `Skipping embedded Postgres active-run output watchdog tests on this host: ${embeddedPostgresSupport.reason ?? "unsupported environment"}`, + ); +} + +describeEmbeddedPostgres("active-run output watchdog", () => { + let tempDb: Awaited> | null = null; + let db: ReturnType; + + beforeAll(async () => { + tempDb = await startEmbeddedPostgresTestDatabase("paperclip-active-run-output-watchdog-"); + db = createDb(tempDb.connectionString); + }, 30_000); + + afterEach(async () => { + for (let attempt = 0; attempt < 100; attempt += 1) { + const activeRuns = await db + .select({ id: heartbeatRuns.id }) + .from(heartbeatRuns) + .where(sql`${heartbeatRuns.status} in ('queued', 'running')`); + if (activeRuns.length === 0) break; + await new Promise((resolve) => setTimeout(resolve, 25)); + } + await db.execute(sql.raw(`TRUNCATE TABLE "companies" CASCADE`)); + }); + + afterAll(async () => { + await tempDb?.cleanup(); + }); + + async function seedRunningRun(opts: { now: Date; ageMs: number; withOutput?: boolean; logChunk?: string }) { + const companyId = randomUUID(); + const managerId = randomUUID(); + const coderId = randomUUID(); + const issueId = randomUUID(); + const runId = randomUUID(); + const issuePrefix = `W${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`; + const startedAt = new Date(opts.now.getTime() - opts.ageMs); + const lastOutputAt = opts.withOutput ? new Date(opts.now.getTime() - 5 * 60 * 1000) : null; + + await db.insert(companies).values({ + id: companyId, + name: "Watchdog Co", + issuePrefix, + requireBoardApprovalForNewAgents: false, + }); + await db.insert(agents).values([ + { + id: managerId, + companyId, + name: "CTO", + role: "cto", + status: "idle", + adapterType: "codex_local", + adapterConfig: {}, + runtimeConfig: {}, + permissions: {}, + }, + { + id: coderId, + companyId, + name: "Coder", + role: "engineer", + status: "running", + reportsTo: managerId, + adapterType: "codex_local", + adapterConfig: {}, + runtimeConfig: {}, + permissions: {}, + }, + ]); + await db.insert(issues).values({ + id: issueId, + companyId, + title: "Long running implementation", + status: "in_progress", + priority: "medium", + assigneeAgentId: coderId, + issueNumber: 1, + identifier: `${issuePrefix}-1`, + updatedAt: startedAt, + createdAt: startedAt, + }); + await db.insert(heartbeatRuns).values({ + id: runId, + companyId, + agentId: coderId, + status: "running", + invocationSource: "assignment", + triggerDetail: "system", + startedAt, + processStartedAt: startedAt, + lastOutputAt, + lastOutputSeq: opts.withOutput ? 3 : 0, + lastOutputStream: opts.withOutput ? "stdout" : null, + contextSnapshot: { issueId }, + stdoutExcerpt: "OPENAI_API_KEY=sk-test-secret-value should not leak", + logBytes: 0, + }); + if (opts.logChunk) { + const store = getRunLogStore(); + const handle = await store.begin({ companyId, agentId: coderId, runId }); + const logBytes = await store.append(handle, { + stream: "stdout", + chunk: opts.logChunk, + ts: startedAt.toISOString(), + }); + await db + .update(heartbeatRuns) + .set({ + logStore: handle.store, + logRef: handle.logRef, + logBytes, + }) + .where(eq(heartbeatRuns.id, runId)); + } + await db.update(issues).set({ executionRunId: runId }).where(eq(issues.id, issueId)); + return { companyId, managerId, coderId, issueId, runId, issuePrefix }; + } + + it("creates one medium-priority evaluation issue for a suspicious silent run", async () => { + const now = new Date("2026-04-22T20:00:00.000Z"); + const { companyId, managerId, runId } = await seedRunningRun({ + now, + ageMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS + 60_000, + }); + const heartbeat = heartbeatService(db); + + const first = await heartbeat.scanSilentActiveRuns({ now, companyId }); + const second = await heartbeat.scanSilentActiveRuns({ now, companyId }); + + expect(first.created).toBe(1); + expect(second.created).toBe(0); + expect(second.existing).toBe(1); + + const evaluations = await db + .select() + .from(issues) + .where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stale_active_run_evaluation"))); + expect(evaluations).toHaveLength(1); + expect(["todo", "in_progress"]).toContain(evaluations[0]?.status); + expect(evaluations[0]).toMatchObject({ + priority: "medium", + assigneeAgentId: managerId, + originId: runId, + originFingerprint: `stale_active_run:${companyId}:${runId}`, + }); + expect(evaluations[0]?.description).toContain("Decision Checklist"); + expect(evaluations[0]?.description).not.toContain("sk-test-secret-value"); + }); + + it("redacts sensitive values from actual run-log evidence", async () => { + const now = new Date("2026-04-22T20:00:00.000Z"); + const leakedJwt = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"; + const leakedGithubToken = "ghp_1234567890abcdefghijklmnopqrstuvwxyz"; + const { companyId } = await seedRunningRun({ + now, + ageMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS + 60_000, + logChunk: [ + "Authorization: Bearer live-bearer-token-value", + `POST payload {"apiKey":"json-secret-value","token":"${leakedJwt}"}`, + `GITHUB_TOKEN=${leakedGithubToken}`, + ].join("\n"), + }); + const heartbeat = heartbeatService(db); + + await heartbeat.scanSilentActiveRuns({ now, companyId }); + + const [evaluation] = await db + .select() + .from(issues) + .where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stale_active_run_evaluation"))); + expect(evaluation?.description).toContain("***REDACTED***"); + expect(evaluation?.description).not.toContain("live-bearer-token-value"); + expect(evaluation?.description).not.toContain("json-secret-value"); + expect(evaluation?.description).not.toContain(leakedJwt); + expect(evaluation?.description).not.toContain(leakedGithubToken); + }); + + it("raises critical stale-run evaluations and blocks the source issue", async () => { + const now = new Date("2026-04-22T20:00:00.000Z"); + const { companyId, issueId } = await seedRunningRun({ + now, + ageMs: ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS + 60_000, + }); + const heartbeat = heartbeatService(db); + + const result = await heartbeat.scanSilentActiveRuns({ now, companyId }); + + expect(result.created).toBe(1); + const [evaluation] = await db + .select() + .from(issues) + .where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stale_active_run_evaluation"))); + expect(evaluation?.priority).toBe("high"); + + const [blocker] = await db + .select() + .from(issueRelations) + .where(and(eq(issueRelations.companyId, companyId), eq(issueRelations.relatedIssueId, issueId))); + expect(blocker?.issueId).toBe(evaluation?.id); + + const [source] = await db.select().from(issues).where(eq(issues.id, issueId)); + expect(source?.status).toBe("blocked"); + }); + + it("skips snoozed runs and healthy noisy runs", async () => { + const now = new Date("2026-04-22T20:00:00.000Z"); + const stale = await seedRunningRun({ + now, + ageMs: ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS + 60_000, + }); + const noisy = await seedRunningRun({ + now, + ageMs: ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS + 60_000, + withOutput: true, + }); + await db.insert(heartbeatRunWatchdogDecisions).values({ + companyId: stale.companyId, + runId: stale.runId, + decision: "snooze", + snoozedUntil: new Date(now.getTime() + 60 * 60 * 1000), + reason: "Intentional quiet run", + }); + const heartbeat = heartbeatService(db); + + const staleResult = await heartbeat.scanSilentActiveRuns({ now, companyId: stale.companyId }); + const noisyResult = await heartbeat.scanSilentActiveRuns({ now, companyId: noisy.companyId }); + + expect(staleResult).toMatchObject({ created: 0, snoozed: 1 }); + expect(noisyResult).toMatchObject({ scanned: 0, created: 0 }); + }); + + it("records watchdog decisions through recovery owner authorization", async () => { + const now = new Date("2026-04-22T20:00:00.000Z"); + const { companyId, managerId, runId } = await seedRunningRun({ + now, + ageMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS + 60_000, + }); + const heartbeat = heartbeatService(db); + const recovery = recoveryService(db, { enqueueWakeup: vi.fn() }); + + const scan = await heartbeat.scanSilentActiveRuns({ now, companyId }); + const evaluationIssueId = scan.evaluationIssueIds[0]; + expect(evaluationIssueId).toBeTruthy(); + + await expect( + recovery.recordWatchdogDecision({ + runId, + actor: { type: "agent", agentId: randomUUID() }, + decision: "continue", + evaluationIssueId, + reason: "not my recovery issue", + }), + ).rejects.toMatchObject({ status: 403 }); + + const snoozedUntil = new Date(now.getTime() + 60 * 60 * 1000); + const decision = await recovery.recordWatchdogDecision({ + runId, + actor: { type: "agent", agentId: managerId }, + decision: "snooze", + evaluationIssueId, + reason: "Long compile with no output", + snoozedUntil, + }); + + expect(decision).toMatchObject({ + runId, + evaluationIssueId, + decision: "snooze", + createdByAgentId: managerId, + }); + await expect(recovery.buildRunOutputSilence({ + id: runId, + companyId, + status: "running", + lastOutputAt: null, + lastOutputSeq: 0, + lastOutputStream: null, + processStartedAt: new Date(now.getTime() - ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS - 60_000), + startedAt: new Date(now.getTime() - ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS - 60_000), + createdAt: new Date(now.getTime() - ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS - 60_000), + }, now)).resolves.toMatchObject({ + level: "snoozed", + snoozedUntil, + evaluationIssueId, + }); + }); + + it("re-arms continue decisions after the default quiet window", async () => { + const now = new Date("2026-04-22T20:00:00.000Z"); + const { companyId, managerId, runId } = await seedRunningRun({ + now, + ageMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS + 60_000, + }); + const heartbeat = heartbeatService(db); + const recovery = recoveryService(db, { enqueueWakeup: vi.fn() }); + + const scan = await heartbeat.scanSilentActiveRuns({ now, companyId }); + const evaluationIssueId = scan.evaluationIssueIds[0]; + expect(evaluationIssueId).toBeTruthy(); + + const decision = await recovery.recordWatchdogDecision({ + runId, + actor: { type: "agent", agentId: managerId }, + decision: "continue", + evaluationIssueId, + reason: "Current evidence is acceptable; keep watching.", + now, + }); + const rearmAt = new Date(now.getTime() + ACTIVE_RUN_OUTPUT_CONTINUE_REARM_MS); + expect(decision).toMatchObject({ + runId, + evaluationIssueId, + decision: "continue", + createdByAgentId: managerId, + }); + expect(decision.snoozedUntil?.toISOString()).toBe(rearmAt.toISOString()); + + await db.update(issues).set({ status: "done" }).where(eq(issues.id, evaluationIssueId)); + + const beforeRearm = await heartbeat.scanSilentActiveRuns({ + now: new Date(rearmAt.getTime() - 60_000), + companyId, + }); + expect(beforeRearm).toMatchObject({ created: 0, snoozed: 1 }); + + const afterRearm = await heartbeat.scanSilentActiveRuns({ + now: new Date(rearmAt.getTime() + 60_000), + companyId, + }); + expect(afterRearm.created).toBe(1); + expect(afterRearm.evaluationIssueIds[0]).not.toBe(evaluationIssueId); + + const evaluations = await db + .select() + .from(issues) + .where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stale_active_run_evaluation"))); + expect(evaluations.filter((issue) => !["done", "cancelled"].includes(issue.status))).toHaveLength(1); + }); + + it("rejects agent watchdog decisions using issues not bound to the target run", async () => { + const now = new Date("2026-04-22T20:00:00.000Z"); + const { companyId, managerId, coderId, runId, issuePrefix } = await seedRunningRun({ + now, + ageMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS + 60_000, + }); + const heartbeat = heartbeatService(db); + const recovery = recoveryService(db, { enqueueWakeup: vi.fn() }); + + const scan = await heartbeat.scanSilentActiveRuns({ now, companyId }); + const evaluationIssueId = scan.evaluationIssueIds[0]; + expect(evaluationIssueId).toBeTruthy(); + + const unrelatedIssueId = randomUUID(); + await db.insert(issues).values({ + id: unrelatedIssueId, + companyId, + title: "Assigned but unrelated", + status: "todo", + priority: "medium", + assigneeAgentId: managerId, + issueNumber: 20, + identifier: `${issuePrefix}-20`, + }); + + const otherRunId = randomUUID(); + const otherEvaluationIssueId = randomUUID(); + await db.insert(heartbeatRuns).values({ + id: otherRunId, + companyId, + agentId: coderId, + status: "running", + invocationSource: "assignment", + triggerDetail: "system", + startedAt: new Date(now.getTime() - ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS - 120_000), + processStartedAt: new Date(now.getTime() - ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS - 120_000), + lastOutputAt: null, + lastOutputSeq: 0, + lastOutputStream: null, + contextSnapshot: {}, + logBytes: 0, + }); + await db.insert(issues).values({ + id: otherEvaluationIssueId, + companyId, + title: "Other run evaluation", + status: "todo", + priority: "medium", + assigneeAgentId: managerId, + issueNumber: 21, + identifier: `${issuePrefix}-21`, + originKind: "stale_active_run_evaluation", + originId: otherRunId, + originFingerprint: `stale_active_run:${companyId}:${otherRunId}`, + }); + + const attempts = [ + { decision: "continue" as const, evaluationIssueId: unrelatedIssueId }, + { decision: "dismissed_false_positive" as const, evaluationIssueId: unrelatedIssueId }, + { + decision: "snooze" as const, + evaluationIssueId: unrelatedIssueId, + snoozedUntil: new Date(now.getTime() + 60 * 60 * 1000), + }, + { decision: "continue" as const, evaluationIssueId: otherEvaluationIssueId }, + ]; + + for (const attempt of attempts) { + await expect( + recovery.recordWatchdogDecision({ + runId, + actor: { type: "agent", agentId: managerId }, + reason: "malicious or stale binding", + ...attempt, + }), + ).rejects.toMatchObject({ status: 403 }); + } + + await db.update(issues).set({ status: "done" }).where(eq(issues.id, evaluationIssueId)); + await expect( + recovery.recordWatchdogDecision({ + runId, + actor: { type: "agent", agentId: managerId }, + decision: "continue", + evaluationIssueId, + reason: "closed evaluation should not authorize", + }), + ).rejects.toMatchObject({ status: 403 }); + }); + + it("validates createdByRunId before storing watchdog decisions", async () => { + const now = new Date("2026-04-22T20:00:00.000Z"); + const { companyId, managerId, runId } = await seedRunningRun({ + now, + ageMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS + 60_000, + }); + const heartbeat = heartbeatService(db); + const recovery = recoveryService(db, { enqueueWakeup: vi.fn() }); + + const scan = await heartbeat.scanSilentActiveRuns({ now, companyId }); + const evaluationIssueId = scan.evaluationIssueIds[0]; + expect(evaluationIssueId).toBeTruthy(); + + await expect( + recovery.recordWatchdogDecision({ + runId, + actor: { type: "agent", agentId: managerId }, + decision: "continue", + evaluationIssueId, + reason: "client supplied another agent run", + createdByRunId: runId, + }), + ).rejects.toMatchObject({ status: 403 }); + + const managerRunId = randomUUID(); + await db.insert(heartbeatRuns).values({ + id: managerRunId, + companyId, + agentId: managerId, + status: "running", + invocationSource: "assignment", + triggerDetail: "system", + startedAt: now, + processStartedAt: now, + lastOutputAt: now, + lastOutputSeq: 1, + lastOutputStream: "stdout", + contextSnapshot: {}, + logBytes: 0, + }); + + const decision = await recovery.recordWatchdogDecision({ + runId, + actor: { type: "agent", agentId: managerId, runId: managerRunId }, + decision: "continue", + evaluationIssueId, + reason: "valid current actor run", + createdByRunId: randomUUID(), + }); + expect(decision.createdByRunId).toBe(managerRunId); + }); +}); diff --git a/server/src/__tests__/heartbeat-comment-wake-batching.test.ts b/server/src/__tests__/heartbeat-comment-wake-batching.test.ts index 4c3887d3..de1b8005 100644 --- a/server/src/__tests__/heartbeat-comment-wake-batching.test.ts +++ b/server/src/__tests__/heartbeat-comment-wake-batching.test.ts @@ -1,8 +1,4 @@ import { randomUUID } from "node:crypto"; -import fs from "node:fs"; -import net from "node:net"; -import os from "node:os"; -import path from "node:path"; import { createServer } from "node:http"; import { and, asc, eq } from "drizzle-orm"; import { WebSocketServer } from "ws"; @@ -10,81 +6,14 @@ import { afterAll, beforeAll, describe, expect, it } from "vitest"; import { agents, agentWakeupRequests, - applyPendingMigrations, companies, createDb, - ensurePostgresDatabase, heartbeatRuns, issueComments, issues, } from "@paperclipai/db"; import { heartbeatService } from "../services/heartbeat.ts"; - -type EmbeddedPostgresInstance = { - initialise(): Promise; - start(): Promise; - stop(): Promise; -}; - -type EmbeddedPostgresCtor = new (opts: { - databaseDir: string; - user: string; - password: string; - port: number; - persistent: boolean; - initdbFlags?: string[]; - onLog?: (message: unknown) => void; - onError?: (message: unknown) => void; -}) => EmbeddedPostgresInstance; - -async function getEmbeddedPostgresCtor(): Promise { - const mod = await import("embedded-postgres"); - return mod.default as EmbeddedPostgresCtor; -} - -async function getAvailablePort(): Promise { - return await new Promise((resolve, reject) => { - const server = net.createServer(); - server.unref(); - server.on("error", reject); - server.listen(0, "127.0.0.1", () => { - const address = server.address(); - if (!address || typeof address === "string") { - server.close(() => reject(new Error("Failed to allocate test port"))); - return; - } - const { port } = address; - server.close((error) => { - if (error) reject(error); - else resolve(port); - }); - }); - }); -} - -async function startTempDatabase() { - const dataDir = fs.mkdtempSync(path.join(os.tmpdir(), "paperclip-heartbeat-comment-wake-")); - const port = await getAvailablePort(); - const EmbeddedPostgres = await getEmbeddedPostgresCtor(); - const instance = new EmbeddedPostgres({ - databaseDir: dataDir, - user: "paperclip", - password: "paperclip", - port, - persistent: true, - initdbFlags: ["--encoding=UTF8", "--locale=C", "--lc-messages=C"], - onLog: () => {}, - onError: () => {}, - }); - await instance.initialise(); - await instance.start(); - - const adminConnectionString = `postgres://paperclip:paperclip@127.0.0.1:${port}/postgres`; - await ensurePostgresDatabase(adminConnectionString, "paperclip"); - const connectionString = `postgres://paperclip:paperclip@127.0.0.1:${port}/paperclip`; - await applyPendingMigrations(connectionString); - return { connectionString, instance, dataDir }; -} +import { startEmbeddedPostgresTestDatabase } from "./helpers/embedded-postgres.ts"; async function waitFor(condition: () => boolean | Promise, timeoutMs = 10_000, intervalMs = 50) { const startedAt = Date.now(); @@ -218,22 +147,17 @@ async function createControlledGatewayServer() { describe("heartbeat comment wake batching", () => { let db!: ReturnType; - let instance: EmbeddedPostgresInstance | null = null; - let dataDir = ""; + let tempDb: Awaited> | null = null; beforeAll(async () => { - const started = await startTempDatabase(); + const started = await startEmbeddedPostgresTestDatabase("paperclip-heartbeat-comment-wake-"); db = createDb(started.connectionString); - instance = started.instance; - dataDir = started.dataDir; - }, 45_000); + tempDb = started; + }, 120_000); afterAll(async () => { await closeDbClient(db); - await instance?.stop(); - if (dataDir) { - fs.rmSync(dataDir, { recursive: true, force: true }); - } + await tempDb?.cleanup(); }); it("defers approval-approved wakes for a running issue so the assignee resumes after the run", async () => { @@ -862,6 +786,206 @@ describe("heartbeat comment wake batching", () => { } }, 120_000); + it("does not reopen a finished issue when the deferred comment wake came from another agent", async () => { + const gateway = await createControlledGatewayServer(); + const companyId = randomUUID(); + const assigneeAgentId = randomUUID(); + const mentionedAgentId = randomUUID(); + const issueId = randomUUID(); + const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`; + const heartbeat = heartbeatService(db); + + try { + await db.insert(companies).values({ + id: companyId, + name: "Paperclip", + issuePrefix, + requireBoardApprovalForNewAgents: false, + }); + + await db.insert(agents).values([ + { + id: assigneeAgentId, + companyId, + name: "Primary Agent", + role: "engineer", + status: "idle", + adapterType: "openclaw_gateway", + adapterConfig: { + url: gateway.url, + headers: { + "x-openclaw-token": "gateway-token", + }, + payloadTemplate: { + message: "wake now", + }, + waitTimeoutMs: 2_000, + }, + runtimeConfig: {}, + permissions: {}, + }, + { + id: mentionedAgentId, + companyId, + name: "Mentioned Agent", + role: "engineer", + status: "idle", + adapterType: "openclaw_gateway", + adapterConfig: { + url: gateway.url, + headers: { + "x-openclaw-token": "gateway-token", + }, + payloadTemplate: { + message: "wake now", + }, + waitTimeoutMs: 2_000, + }, + runtimeConfig: {}, + permissions: {}, + }, + ]); + + await db.insert(issues).values({ + id: issueId, + companyId, + title: "Do not reopen from agent mention", + status: "todo", + priority: "medium", + assigneeAgentId, + issueNumber: 1, + identifier: `${issuePrefix}-1`, + }); + + const firstRun = await heartbeat.wakeup(assigneeAgentId, { + source: "assignment", + triggerDetail: "system", + reason: "issue_assigned", + payload: { issueId }, + contextSnapshot: { + issueId, + taskId: issueId, + wakeReason: "issue_assigned", + }, + requestedByActorType: "system", + requestedByActorId: null, + }); + + expect(firstRun).not.toBeNull(); + await waitFor(async () => { + const run = await db + .select({ status: heartbeatRuns.status }) + .from(heartbeatRuns) + .where(eq(heartbeatRuns.id, firstRun!.id)) + .then((rows) => rows[0] ?? null); + return run?.status === "running"; + }); + + const comment = await db + .insert(issueComments) + .values({ + companyId, + issueId, + authorAgentId: assigneeAgentId, + createdByRunId: firstRun?.id ?? null, + body: "@Mentioned Agent please review after I finish", + }) + .returning() + .then((rows) => rows[0]); + + const deferredRun = await heartbeat.wakeup(mentionedAgentId, { + source: "automation", + triggerDetail: "system", + reason: "issue_comment_mentioned", + payload: { issueId, commentId: comment.id }, + contextSnapshot: { + issueId, + taskId: issueId, + commentId: comment.id, + wakeCommentId: comment.id, + wakeReason: "issue_comment_mentioned", + source: "comment.mention", + }, + requestedByActorType: "agent", + requestedByActorId: assigneeAgentId, + }); + + expect(deferredRun).toBeNull(); + + await waitFor(async () => { + const deferred = await db + .select() + .from(agentWakeupRequests) + .where( + and( + eq(agentWakeupRequests.companyId, companyId), + eq(agentWakeupRequests.agentId, mentionedAgentId), + eq(agentWakeupRequests.status, "deferred_issue_execution"), + ), + ) + .then((rows) => rows[0] ?? null); + return Boolean(deferred); + }); + + await db + .update(issues) + .set({ + status: "done", + completedAt: new Date(), + executionRunId: null, + executionAgentNameKey: null, + executionLockedAt: null, + updatedAt: new Date(), + }) + .where(eq(issues.id, issueId)); + + gateway.releaseFirstWait(); + + await waitFor(() => gateway.getAgentPayloads().length === 2, 90_000); + await waitFor(async () => { + const runs = await db + .select() + .from(heartbeatRuns) + .where(eq(heartbeatRuns.companyId, companyId)); + return runs.length === 2 && runs.every((run) => run.status === "succeeded"); + }, 90_000); + + const issueAfterPromotion = await db + .select({ + status: issues.status, + completedAt: issues.completedAt, + }) + .from(issues) + .where(eq(issues.id, issueId)) + .then((rows) => rows[0] ?? null); + + expect(issueAfterPromotion).toMatchObject({ + status: "done", + }); + expect(issueAfterPromotion?.completedAt).not.toBeNull(); + + const secondPayload = gateway.getAgentPayloads()[1] ?? {}; + expect(secondPayload.paperclip).toMatchObject({ + wake: { + reason: "issue_comment_mentioned", + commentIds: [comment.id], + latestCommentId: comment.id, + issue: { + id: issueId, + identifier: `${issuePrefix}-1`, + title: "Do not reopen from agent mention", + status: "done", + priority: "medium", + }, + }, + }); + expect(String(secondPayload.message ?? "")).toContain("please review after I finish"); + } finally { + gateway.releaseFirstWait(); + await gateway.close(); + } + }, 120_000); + it("queues exactly one follow-up run when an issue-bound run exits without a comment", async () => { const gateway = await createControlledGatewayServer(); const companyId = randomUUID(); @@ -1172,6 +1296,20 @@ describe("heartbeat comment wake batching", () => { wakeReason: "issue_comment_mentioned", }); + const issueAfterMention = await db + .select({ + assigneeAgentId: issues.assigneeAgentId, + executionRunId: issues.executionRunId, + executionAgentNameKey: issues.executionAgentNameKey, + }) + .from(issues) + .where(eq(issues.id, issueId)) + .then((rows) => rows[0] ?? null); + + expect(issueAfterMention?.assigneeAgentId).toBe(primaryAgentId); + expect(issueAfterMention?.executionRunId).not.toBe(mentionedRuns[0]?.id); + expect(issueAfterMention?.executionAgentNameKey).not.toBe("mentioned agent"); + const primaryRuns = await db .select() .from(heartbeatRuns) @@ -1198,6 +1336,155 @@ describe("heartbeat comment wake batching", () => { await gateway.close(); } }, 120_000); + + it("does not mark a direct mentioned-agent run as the issue execution owner", async () => { + const gateway = await createControlledGatewayServer(); + const companyId = randomUUID(); + const primaryAgentId = randomUUID(); + const mentionedAgentId = randomUUID(); + const issueId = randomUUID(); + const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`; + const heartbeat = heartbeatService(db); + + try { + await db.insert(companies).values({ + id: companyId, + name: "Paperclip", + issuePrefix, + requireBoardApprovalForNewAgents: false, + }); + + await db.insert(agents).values([ + { + id: primaryAgentId, + companyId, + name: "Primary Agent", + role: "engineer", + status: "idle", + adapterType: "openclaw_gateway", + adapterConfig: { + url: gateway.url, + headers: { + "x-openclaw-token": "gateway-token", + }, + payloadTemplate: { + message: "wake now", + }, + waitTimeoutMs: 2_000, + }, + runtimeConfig: {}, + permissions: {}, + }, + { + id: mentionedAgentId, + companyId, + name: "Mentioned Agent", + role: "engineer", + status: "idle", + adapterType: "openclaw_gateway", + adapterConfig: { + url: gateway.url, + headers: { + "x-openclaw-token": "gateway-token", + }, + payloadTemplate: { + message: "wake now", + }, + waitTimeoutMs: 2_000, + }, + runtimeConfig: {}, + permissions: {}, + }, + ]); + + await db.insert(issues).values({ + id: issueId, + companyId, + title: "Mention should not steal execution ownership", + status: "todo", + priority: "medium", + assigneeAgentId: primaryAgentId, + issueNumber: 1, + identifier: `${issuePrefix}-1`, + }); + + const mentionComment = await db + .insert(issueComments) + .values({ + companyId, + issueId, + authorUserId: "user-1", + body: "@Mentioned Agent please inspect this.", + }) + .returning() + .then((rows) => rows[0]); + + const mentionRun = await heartbeat.wakeup(mentionedAgentId, { + source: "automation", + triggerDetail: "system", + reason: "issue_comment_mentioned", + payload: { issueId, commentId: mentionComment.id }, + contextSnapshot: { + issueId, + taskId: issueId, + commentId: mentionComment.id, + wakeCommentId: mentionComment.id, + wakeReason: "issue_comment_mentioned", + source: "comment.mention", + }, + requestedByActorType: "user", + requestedByActorId: "user-1", + }); + + expect(mentionRun).not.toBeNull(); + await waitFor(() => gateway.getAgentPayloads().length === 1); + + const issueDuringMention = await db + .select({ + assigneeAgentId: issues.assigneeAgentId, + executionRunId: issues.executionRunId, + executionAgentNameKey: issues.executionAgentNameKey, + }) + .from(issues) + .where(eq(issues.id, issueId)) + .then((rows) => rows[0] ?? null); + + expect(issueDuringMention).toMatchObject({ + assigneeAgentId: primaryAgentId, + executionRunId: null, + executionAgentNameKey: null, + }); + + gateway.releaseFirstWait(); + await waitFor(async () => { + const run = await db + .select({ status: heartbeatRuns.status }) + .from(heartbeatRuns) + .where(eq(heartbeatRuns.id, mentionRun!.id)) + .then((rows) => rows[0] ?? null); + return run?.status === "succeeded"; + }, 90_000); + + const issueAfterMention = await db + .select({ + assigneeAgentId: issues.assigneeAgentId, + executionRunId: issues.executionRunId, + executionAgentNameKey: issues.executionAgentNameKey, + }) + .from(issues) + .where(eq(issues.id, issueId)) + .then((rows) => rows[0] ?? null); + + expect(issueAfterMention).toMatchObject({ + assigneeAgentId: primaryAgentId, + executionRunId: null, + executionAgentNameKey: null, + }); + } finally { + gateway.releaseFirstWait(); + await gateway.close(); + } + }, 120_000); it("treats the automatic run summary as fallback-only when the run already posted a comment", async () => { const gateway = await createControlledGatewayServer(); const companyId = randomUUID(); diff --git a/server/src/__tests__/heartbeat-dependency-scheduling.test.ts b/server/src/__tests__/heartbeat-dependency-scheduling.test.ts index 8dbe36cc..52106f7e 100644 --- a/server/src/__tests__/heartbeat-dependency-scheduling.test.ts +++ b/server/src/__tests__/heartbeat-dependency-scheduling.test.ts @@ -347,6 +347,198 @@ describeEmbeddedPostgres("heartbeat dependency-aware queued run selection", () = expect(blockedWakeRequestCount).toBeGreaterThanOrEqual(2); }); + it("cancels stale queued runs when issue blockers are still unresolved", async () => { + const companyId = randomUUID(); + const agentId = randomUUID(); + const blockerId = randomUUID(); + const blockedIssueId = randomUUID(); + const readyIssueId = randomUUID(); + const blockedWakeupRequestId = randomUUID(); + const readyWakeupRequestId = randomUUID(); + const blockedRunId = randomUUID(); + const readyRunId = randomUUID(); + + await db.insert(companies).values({ + id: companyId, + name: "Paperclip", + issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`, + requireBoardApprovalForNewAgents: false, + }); + await db.insert(agents).values({ + id: agentId, + companyId, + name: "QAChecker", + role: "qa", + status: "active", + adapterType: "codex_local", + adapterConfig: {}, + runtimeConfig: { + heartbeat: { + wakeOnDemand: true, + maxConcurrentRuns: 2, + }, + }, + permissions: {}, + }); + await db.insert(issues).values([ + { + id: blockerId, + companyId, + title: "Security review", + status: "blocked", + priority: "high", + }, + { + id: blockedIssueId, + companyId, + title: "QA validation", + status: "blocked", + priority: "medium", + assigneeAgentId: agentId, + }, + { + id: readyIssueId, + companyId, + title: "Ready QA task", + status: "todo", + priority: "low", + assigneeAgentId: agentId, + }, + ]); + await db.insert(issueRelations).values({ + companyId, + issueId: blockerId, + relatedIssueId: blockedIssueId, + type: "blocks", + }); + await db.insert(agentWakeupRequests).values([ + { + id: blockedWakeupRequestId, + companyId, + agentId, + source: "automation", + triggerDetail: "system", + reason: "transient_failure_retry", + payload: { issueId: blockedIssueId }, + status: "queued", + }, + { + id: readyWakeupRequestId, + companyId, + agentId, + source: "assignment", + triggerDetail: "system", + reason: "issue_assigned", + payload: { issueId: readyIssueId }, + status: "queued", + }, + ]); + await db.insert(heartbeatRuns).values([ + { + id: blockedRunId, + companyId, + agentId, + invocationSource: "automation", + triggerDetail: "system", + status: "queued", + wakeupRequestId: blockedWakeupRequestId, + contextSnapshot: { + issueId: blockedIssueId, + wakeReason: "transient_failure_retry", + }, + }, + { + id: readyRunId, + companyId, + agentId, + invocationSource: "assignment", + triggerDetail: "system", + status: "queued", + wakeupRequestId: readyWakeupRequestId, + contextSnapshot: { + issueId: readyIssueId, + wakeReason: "issue_assigned", + }, + }, + ]); + await db + .update(agentWakeupRequests) + .set({ runId: blockedRunId }) + .where(eq(agentWakeupRequests.id, blockedWakeupRequestId)); + await db + .update(agentWakeupRequests) + .set({ runId: readyRunId }) + .where(eq(agentWakeupRequests.id, readyWakeupRequestId)); + await db + .update(issues) + .set({ + executionRunId: blockedRunId, + executionAgentNameKey: "qa-checker", + executionLockedAt: new Date(), + }) + .where(eq(issues.id, blockedIssueId)); + + await heartbeat.resumeQueuedRuns(); + + await waitForCondition(async () => { + const run = await db + .select({ status: heartbeatRuns.status }) + .from(heartbeatRuns) + .where(eq(heartbeatRuns.id, readyRunId)) + .then((rows) => rows[0] ?? null); + return run?.status === "succeeded"; + }); + + const [blockedRun, blockedWakeup, blockedIssue, readyRun] = await Promise.all([ + db + .select({ + status: heartbeatRuns.status, + errorCode: heartbeatRuns.errorCode, + finishedAt: heartbeatRuns.finishedAt, + resultJson: heartbeatRuns.resultJson, + }) + .from(heartbeatRuns) + .where(eq(heartbeatRuns.id, blockedRunId)) + .then((rows) => rows[0] ?? null), + db + .select({ + status: agentWakeupRequests.status, + error: agentWakeupRequests.error, + }) + .from(agentWakeupRequests) + .where(eq(agentWakeupRequests.id, blockedWakeupRequestId)) + .then((rows) => rows[0] ?? null), + db + .select({ + executionRunId: issues.executionRunId, + executionAgentNameKey: issues.executionAgentNameKey, + executionLockedAt: issues.executionLockedAt, + }) + .from(issues) + .where(eq(issues.id, blockedIssueId)) + .then((rows) => rows[0] ?? null), + db + .select({ status: heartbeatRuns.status }) + .from(heartbeatRuns) + .where(eq(heartbeatRuns.id, readyRunId)) + .then((rows) => rows[0] ?? null), + ]); + + expect(blockedRun?.status).toBe("cancelled"); + expect(blockedRun?.errorCode).toBe("issue_dependencies_blocked"); + expect(blockedRun?.finishedAt).toBeTruthy(); + expect(blockedRun?.resultJson).toMatchObject({ stopReason: "issue_dependencies_blocked" }); + expect(blockedWakeup?.status).toBe("skipped"); + expect(blockedWakeup?.error).toContain("dependencies are still blocked"); + expect(blockedIssue).toMatchObject({ + executionRunId: null, + executionAgentNameKey: null, + executionLockedAt: null, + }); + expect(readyRun?.status).toBe("succeeded"); + expect(mockAdapterExecute).toHaveBeenCalledTimes(1); + }); + it("suppresses normal wakeups while allowing comment interaction wakes under a pause hold", async () => { const companyId = randomUUID(); const agentId = randomUUID(); @@ -425,12 +617,39 @@ describeEmbeddedPostgres("heartbeat dependency-aware queued run selection", () = .then((rows) => rows[0] ?? null); expect(skippedWake).toMatchObject({ status: "skipped", reason: "issue_tree_hold_active" }); + const childCommentId = randomUUID(); + await db.insert(issueComments).values({ + id: childCommentId, + companyId, + issueId: childIssueId, + authorUserId: "board-user", + body: "Please respond while this hold is active.", + }); + + const forgedChildCommentWake = await heartbeat.wakeup(agentId, { + source: "on_demand", + triggerDetail: "manual", + reason: "issue_commented", + payload: { issueId: childIssueId, commentId: childCommentId }, + requestedByActorType: "agent", + requestedByActorId: agentId, + }); + expect(forgedChildCommentWake).toBeNull(); + const childCommentWake = await heartbeat.wakeup(agentId, { source: "automation", triggerDetail: "system", reason: "issue_commented", - payload: { issueId: childIssueId, commentId: randomUUID() }, - contextSnapshot: { issueId: childIssueId, wakeReason: "issue_commented" }, + payload: { issueId: childIssueId, commentId: childCommentId }, + requestedByActorType: "user", + requestedByActorId: "board-user", + contextSnapshot: { + issueId: childIssueId, + commentId: childCommentId, + wakeCommentId: childCommentId, + wakeReason: "issue_commented", + source: "issue.comment", + }, }); expect(childCommentWake).not.toBeNull(); @@ -494,12 +713,29 @@ describeEmbeddedPostgres("heartbeat dependency-aware queued run selection", () = releasePolicy: { strategy: "manual", note: "full_pause" }, }); + const rootCommentId = randomUUID(); + await db.insert(issueComments).values({ + id: rootCommentId, + companyId, + issueId: rootIssueId, + authorUserId: "board-user", + body: "Please respond while this hold is active.", + }); + const rootCommentWake = await heartbeat.wakeup(agentId, { source: "automation", triggerDetail: "system", reason: "issue_commented", - payload: { issueId: rootIssueId, commentId: randomUUID() }, - contextSnapshot: { issueId: rootIssueId, wakeReason: "issue_commented" }, + payload: { issueId: rootIssueId, commentId: rootCommentId }, + requestedByActorType: "user", + requestedByActorId: "board-user", + contextSnapshot: { + issueId: rootIssueId, + commentId: rootCommentId, + wakeCommentId: rootCommentId, + wakeReason: "issue_commented", + source: "issue.comment", + }, }); expect(rootCommentWake).not.toBeNull(); diff --git a/server/src/__tests__/heartbeat-issue-liveness-escalation.test.ts b/server/src/__tests__/heartbeat-issue-liveness-escalation.test.ts index 5e3f80ab..0a33094d 100644 --- a/server/src/__tests__/heartbeat-issue-liveness-escalation.test.ts +++ b/server/src/__tests__/heartbeat-issue-liveness-escalation.test.ts @@ -4,13 +4,16 @@ import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from "vitest import { activityLog, agents, - agentWakeupRequests, companies, createDb, + executionWorkspaces, heartbeatRuns, issueComments, issueRelations, + issueTreeHolds, issues, + projects, + projectWorkspaces, } from "@paperclipai/db"; import { getEmbeddedPostgresTestSupport, @@ -55,6 +58,7 @@ vi.mock("../adapters/index.ts", async () => { }); import { heartbeatService } from "../services/heartbeat.ts"; +import { instanceSettingsService } from "../services/instance-settings.ts"; import { runningProcesses } from "../adapters/index.ts"; const embeddedPostgresSupport = await getEmbeddedPostgresTestSupport(); @@ -94,13 +98,23 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => { } await new Promise((resolve) => setTimeout(resolve, 50)); await db.execute(sql.raw(`TRUNCATE TABLE "companies" CASCADE`)); + await instanceSettingsService(db).updateExperimental({ + enableIssueGraphLivenessAutoRecovery: false, + enableIsolatedWorkspaces: false, + }); }); afterAll(async () => { await tempDb?.cleanup(); }); - async function seedBlockedChain() { + async function enableAutoRecovery() { + await instanceSettingsService(db).updateExperimental({ + enableIssueGraphLivenessAutoRecovery: true, + }); + } + + async function seedBlockedChain(opts: { stale?: boolean } = {}) { const companyId = randomUUID(); const managerId = randomUUID(); const coderId = randomUUID(); @@ -124,7 +138,7 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => { status: "idle", adapterType: "codex_local", adapterConfig: {}, - runtimeConfig: {}, + runtimeConfig: { heartbeat: { wakeOnDemand: false } }, permissions: {}, }, { @@ -136,11 +150,14 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => { reportsTo: managerId, adapterType: "codex_local", adapterConfig: {}, - runtimeConfig: {}, + runtimeConfig: { heartbeat: { wakeOnDemand: false } }, permissions: {}, }, ]); + const issueTimestamp = opts.stale === false + ? new Date() + : new Date(Date.now() - 25 * 60 * 60 * 1000); await db.insert(issues).values([ { id: blockedIssueId, @@ -151,6 +168,8 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => { assigneeAgentId: coderId, issueNumber: 1, identifier: `${issuePrefix}-1`, + createdAt: issueTimestamp, + updatedAt: issueTimestamp, }, { id: blockerIssueId, @@ -160,6 +179,8 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => { priority: "medium", issueNumber: 2, identifier: `${issuePrefix}-2`, + createdAt: issueTimestamp, + updatedAt: issueTimestamp, }, ]); @@ -173,7 +194,91 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => { return { companyId, managerId, blockedIssueId, blockerIssueId }; } - it("creates one manager escalation, preserves blockers, and wakes the assignee", async () => { + it("keeps liveness findings advisory when auto recovery is disabled", async () => { + const { companyId } = await seedBlockedChain(); + const heartbeat = heartbeatService(db); + + const result = await heartbeat.reconcileIssueGraphLiveness(); + + expect(result.findings).toBe(1); + expect(result.autoRecoveryEnabled).toBe(false); + expect(result.escalationsCreated).toBe(0); + expect(result.skippedAutoRecoveryDisabled).toBe(1); + + const escalations = await db + .select() + .from(issues) + .where(and(eq(issues.companyId, companyId), eq(issues.originKind, "harness_liveness_escalation"))); + expect(escalations).toHaveLength(0); + }); + + it("does not create recovery issues until the dependency path is stale for 24 hours", async () => { + await enableAutoRecovery(); + const { companyId } = await seedBlockedChain({ stale: false }); + const heartbeat = heartbeatService(db); + + const result = await heartbeat.reconcileIssueGraphLiveness(); + + expect(result.findings).toBe(1); + expect(result.escalationsCreated).toBe(0); + expect(result.skippedAutoRecoveryTooYoung).toBe(1); + + const escalations = await db + .select() + .from(issues) + .where(and(eq(issues.companyId, companyId), eq(issues.originKind, "harness_liveness_escalation"))); + expect(escalations).toHaveLength(0); + }); + + it("suppresses liveness escalation when the source issue is under an active pause hold", async () => { + await enableAutoRecovery(); + const { companyId, blockedIssueId } = await seedBlockedChain(); + + await db.insert(issueTreeHolds).values({ + companyId, + rootIssueId: blockedIssueId, + mode: "pause", + status: "active", + reason: "pause liveness recovery subtree", + releasePolicy: { strategy: "manual" }, + }); + + const result = await heartbeatService(db).reconcileIssueGraphLiveness(); + + expect(result.findings).toBe(1); + expect(result.escalationsCreated).toBe(0); + expect(result.existingEscalations).toBe(0); + expect(result.skipped).toBe(1); + + const escalations = await db + .select() + .from(issues) + .where(and(eq(issues.companyId, companyId), eq(issues.originKind, "harness_liveness_escalation"))); + expect(escalations).toHaveLength(0); + }); + + it("treats an active executionRunId on the leaf blocker as a live execution path", async () => { + await enableAutoRecovery(); + const { companyId, managerId, blockedIssueId, blockerIssueId } = await seedBlockedChain(); + const runId = randomUUID(); + await db.insert(heartbeatRuns).values({ + id: runId, + companyId, + agentId: managerId, + status: "running", + contextSnapshot: { issueId: blockedIssueId }, + }); + await db.update(issues).set({ executionRunId: runId }).where(eq(issues.id, blockerIssueId)); + const heartbeat = heartbeatService(db); + + const result = await heartbeat.reconcileIssueGraphLiveness(); + + expect(result.findings).toBe(0); + expect(result.escalationsCreated).toBe(0); + }); + + it("creates one manager escalation, preserves blockers, and records owner selection", async () => { + await enableAutoRecovery(); const { companyId, managerId, blockedIssueId, blockerIssueId } = await seedBlockedChain(); const heartbeat = heartbeatService(db); @@ -182,7 +287,6 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => { expect(first.escalationsCreated).toBe(1); expect(second.escalationsCreated).toBe(0); - expect(second.existingEscalations).toBe(1); const escalations = await db .select() @@ -195,9 +299,15 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => { ); expect(escalations).toHaveLength(1); expect(escalations[0]).toMatchObject({ - parentId: blockedIssueId, + parentId: blockerIssueId, assigneeAgentId: managerId, status: expect.stringMatching(/^(todo|in_progress|done)$/), + originFingerprint: [ + "harness_liveness_leaf", + companyId, + "blocked_by_unassigned_issue", + blockerIssueId, + ].join(":"), }); const blockers = await db @@ -213,15 +323,217 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => { expect(comments[0]?.body).toContain("harness-level liveness incident"); expect(comments[0]?.body).toContain(escalations[0]?.identifier ?? escalations[0]!.id); - const wakes = await db.select().from(agentWakeupRequests).where(eq(agentWakeupRequests.agentId, managerId)); - expect(wakes.some((wake) => wake.reason === "issue_assigned")).toBe(true); - const events = await db.select().from(activityLog).where(eq(activityLog.companyId, companyId)); - expect(events.some((event) => event.action === "issue.harness_liveness_escalation_created")).toBe(true); + const createdEvent = events.find((event) => event.action === "issue.harness_liveness_escalation_created"); + expect(createdEvent).toBeTruthy(); + expect(createdEvent?.details).toMatchObject({ + recoveryIssueId: blockerIssueId, + ownerSelection: { + selectedAgentId: managerId, + selectedReason: "root_agent", + selectedSourceIssueId: blockerIssueId, + }, + workspaceSelection: { + reuseRecoveryExecutionWorkspace: false, + inheritedExecutionWorkspaceFromIssueId: null, + projectWorkspaceSourceIssueId: blockerIssueId, + }, + }); expect(events.some((event) => event.action === "issue.blockers.updated")).toBe(true); }); + it("parents recovery under the leaf blocker without inheriting dependent or blocker execution state for manager-owned recovery", async () => { + await enableAutoRecovery(); + await instanceSettingsService(db).updateExperimental({ enableIsolatedWorkspaces: true }); + + const companyId = randomUUID(); + const managerId = randomUUID(); + const blockedIssueId = randomUUID(); + const blockerIssueId = randomUUID(); + const dependentProjectId = randomUUID(); + const blockerProjectId = randomUUID(); + const dependentProjectWorkspaceId = randomUUID(); + const blockerProjectWorkspaceId = randomUUID(); + const dependentExecutionWorkspaceId = randomUUID(); + const blockerExecutionWorkspaceId = randomUUID(); + const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`; + const issueTimestamp = new Date(Date.now() - 25 * 60 * 60 * 1000); + + await db.insert(companies).values({ + id: companyId, + name: "Paperclip", + issuePrefix, + requireBoardApprovalForNewAgents: false, + }); + await db.insert(agents).values({ + id: managerId, + companyId, + name: "Root Operator", + role: "operator", + status: "idle", + adapterType: "codex_local", + adapterConfig: {}, + runtimeConfig: { heartbeat: { wakeOnDemand: false } }, + permissions: {}, + }); + await db.insert(projects).values([ + { + id: dependentProjectId, + companyId, + name: "Dependent workspace project", + status: "in_progress", + }, + { + id: blockerProjectId, + companyId, + name: "Blocker workspace project", + status: "in_progress", + }, + ]); + await db.insert(projectWorkspaces).values([ + { + id: dependentProjectWorkspaceId, + companyId, + projectId: dependentProjectId, + name: "Dependent primary", + }, + { + id: blockerProjectWorkspaceId, + companyId, + projectId: blockerProjectId, + name: "Blocker primary", + }, + ]); + await db.insert(executionWorkspaces).values([ + { + id: dependentExecutionWorkspaceId, + companyId, + projectId: dependentProjectId, + projectWorkspaceId: dependentProjectWorkspaceId, + mode: "operator_branch", + strategyType: "git_worktree", + name: "Dependent branch", + status: "active", + providerType: "git_worktree", + }, + { + id: blockerExecutionWorkspaceId, + companyId, + projectId: blockerProjectId, + projectWorkspaceId: blockerProjectWorkspaceId, + mode: "operator_branch", + strategyType: "git_worktree", + name: "Blocker branch", + status: "active", + providerType: "git_worktree", + }, + ]); + await db.insert(issues).values([ + { + id: blockedIssueId, + companyId, + projectId: dependentProjectId, + projectWorkspaceId: dependentProjectWorkspaceId, + executionWorkspaceId: dependentExecutionWorkspaceId, + executionWorkspacePreference: "reuse_existing", + executionWorkspaceSettings: { mode: "operator_branch" }, + title: "Blocked dependent", + status: "blocked", + priority: "medium", + issueNumber: 1, + identifier: `${issuePrefix}-1`, + createdAt: issueTimestamp, + updatedAt: issueTimestamp, + }, + { + id: blockerIssueId, + companyId, + projectId: blockerProjectId, + projectWorkspaceId: blockerProjectWorkspaceId, + executionWorkspaceId: blockerExecutionWorkspaceId, + executionWorkspacePreference: "reuse_existing", + executionWorkspaceSettings: { mode: "operator_branch" }, + title: "Unassigned leaf blocker", + status: "todo", + priority: "medium", + issueNumber: 2, + identifier: `${issuePrefix}-2`, + createdAt: issueTimestamp, + updatedAt: issueTimestamp, + }, + ]); + await db.insert(issueRelations).values({ + companyId, + issueId: blockerIssueId, + relatedIssueId: blockedIssueId, + type: "blocks", + }); + + const result = await heartbeatService(db).reconcileIssueGraphLiveness(); + + expect(result.escalationsCreated).toBe(1); + const escalations = await db + .select() + .from(issues) + .where(and(eq(issues.companyId, companyId), eq(issues.originKind, "harness_liveness_escalation"))); + expect(escalations).toHaveLength(1); + expect(escalations[0]).toMatchObject({ + parentId: blockerIssueId, + projectId: blockerProjectId, + projectWorkspaceId: blockerProjectWorkspaceId, + executionWorkspaceId: null, + executionWorkspacePreference: null, + assigneeAgentId: managerId, + }); + }); + + it("reuses one open recovery issue for multiple dependents with the same leaf blocker", async () => { + await enableAutoRecovery(); + const { companyId, blockedIssueId, blockerIssueId } = await seedBlockedChain(); + const secondBlockedIssueId = randomUUID(); + const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`; + const issueTimestamp = new Date(Date.now() - 25 * 60 * 60 * 1000); + await db.insert(issues).values({ + id: secondBlockedIssueId, + companyId, + title: "Second blocked parent", + status: "blocked", + priority: "medium", + issueNumber: 3, + identifier: `${issuePrefix}-3`, + createdAt: issueTimestamp, + updatedAt: issueTimestamp, + }); + await db.insert(issueRelations).values({ + companyId, + issueId: blockerIssueId, + relatedIssueId: secondBlockedIssueId, + type: "blocks", + }); + const heartbeat = heartbeatService(db); + + const result = await heartbeat.reconcileIssueGraphLiveness(); + + expect(result.findings).toBe(2); + expect(result.escalationsCreated).toBe(1); + expect(result.existingEscalations).toBe(1); + const escalations = await db + .select() + .from(issues) + .where(and(eq(issues.companyId, companyId), eq(issues.originKind, "harness_liveness_escalation"))); + expect(escalations).toHaveLength(1); + + const blockers = await db + .select({ blockedIssueId: issueRelations.relatedIssueId }) + .from(issueRelations) + .where(and(eq(issueRelations.companyId, companyId), eq(issueRelations.issueId, escalations[0]!.id))); + expect(blockers.map((row) => row.blockedIssueId).sort()).toEqual( + [blockedIssueId, secondBlockedIssueId].sort(), + ); + }); + it("creates a fresh escalation when the previous matching escalation is terminal", async () => { + await enableAutoRecovery(); const { companyId, managerId, blockedIssueId, blockerIssueId } = await seedBlockedChain(); const heartbeat = heartbeatService(db); const incidentKey = [ @@ -265,7 +577,7 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => { expect(openEscalations).toHaveLength(2); const freshEscalation = openEscalations.find((issue) => issue.status !== "done"); expect(freshEscalation).toMatchObject({ - parentId: blockedIssueId, + parentId: blockerIssueId, assigneeAgentId: managerId, status: expect.stringMatching(/^(todo|in_progress|done)$/), }); diff --git a/server/src/__tests__/heartbeat-process-recovery.test.ts b/server/src/__tests__/heartbeat-process-recovery.test.ts index d72a3f12..8f870ac6 100644 --- a/server/src/__tests__/heartbeat-process-recovery.test.ts +++ b/server/src/__tests__/heartbeat-process-recovery.test.ts @@ -1,6 +1,6 @@ import { randomUUID } from "node:crypto"; import { spawn, type ChildProcess } from "node:child_process"; -import { eq, or, inArray } from "drizzle-orm"; +import { and, eq, or, inArray } from "drizzle-orm"; import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from "vitest"; import { activityLog, @@ -17,6 +17,8 @@ import { issueComments, issueDocuments, issueRelations, + issueTreeHoldMembers, + issueTreeHolds, issues, } from "@paperclipai/db"; import { @@ -309,6 +311,8 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => { await db.delete(documentRevisions); await db.delete(documents); await db.delete(issueRelations); + await db.delete(issueTreeHoldMembers); + await db.delete(issueTreeHolds); for (let attempt = 0; attempt < 5; attempt += 1) { await db.delete(issueComments); await db.delete(issueDocuments); @@ -454,11 +458,13 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => { runStatus: "failed" | "timed_out" | "cancelled" | "succeeded"; retryReason?: "assignment_recovery" | "issue_continuation_needed" | null; assignToUser?: boolean; + activePauseHold?: boolean; }) { const companyId = randomUUID(); const agentId = randomUUID(); const runId = randomUUID(); const wakeupRequestId = randomUUID(); + const rootIssueId = randomUUID(); const issueId = randomUUID(); const now = new Date("2026-03-19T00:00:00.000Z"); const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`; @@ -520,22 +526,128 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => { error: input.runStatus === "succeeded" ? null : "run failed before issue advanced", }); - await db.insert(issues).values({ - id: issueId, - companyId, - title: "Recover stranded assigned work", - status: input.status, + await db.insert(issues).values([ + ...(input.activePauseHold + ? [{ + id: rootIssueId, + companyId, + title: "Paused recovery root", + status: "todo", + priority: "medium", + issueNumber: 1, + identifier: `${issuePrefix}-1`, + }] + : []), + { + id: issueId, + companyId, + parentId: input.activePauseHold ? rootIssueId : null, + title: "Recover stranded assigned work", + status: input.status, + priority: "medium", + assigneeAgentId: input.assignToUser ? null : agentId, + assigneeUserId: input.assignToUser ? "user-1" : null, + checkoutRunId: input.status === "in_progress" ? runId : null, + executionRunId: null, + issueNumber: input.activePauseHold ? 2 : 1, + identifier: `${issuePrefix}-${input.activePauseHold ? 2 : 1}`, + startedAt: input.status === "in_progress" ? now : null, + }, + ]); + + if (input.activePauseHold) { + await db.insert(issueTreeHolds).values({ + companyId, + rootIssueId, + mode: "pause", + status: "active", + reason: "pause recovery subtree", + releasePolicy: { strategy: "manual" }, + }); + } + + return { companyId, agentId, runId, wakeupRequestId, issueId, rootIssueId }; + } + + async function expectStrandedRecoveryArtifacts(input: { + companyId: string; + agentId: string; + issueId: string; + runId: string; + previousStatus: "todo" | "in_progress"; + retryReason: "assignment_recovery" | "issue_continuation_needed"; + }) { + const recovery = await waitForValue(async () => + db.select().from(issues).where( + and( + eq(issues.companyId, input.companyId), + eq(issues.originKind, "stranded_issue_recovery"), + eq(issues.originId, input.issueId), + ), + ).then((rows) => rows[0] ?? null), + ); + if (!recovery) throw new Error("Expected stranded issue recovery issue to be created"); + + expect(recovery).toMatchObject({ + companyId: input.companyId, + parentId: input.issueId, + assigneeAgentId: input.agentId, + originKind: "stranded_issue_recovery", + originId: input.issueId, + originRunId: input.runId, priority: "medium", - assigneeAgentId: input.assignToUser ? null : agentId, - assigneeUserId: input.assignToUser ? "user-1" : null, - checkoutRunId: input.status === "in_progress" ? runId : null, - executionRunId: null, - issueNumber: 1, - identifier: `${issuePrefix}-1`, - startedAt: input.status === "in_progress" ? now : null, + }); + expect(recovery.title).toContain("Recover stalled issue"); + expect(recovery.description).toContain(`Previous source status: \`${input.previousStatus}\``); + expect(recovery.description).toContain(`Retry reason: \`${input.retryReason}\``); + expect(recovery.description).toContain("Fix the runtime/adapter problem"); + + const relation = await db + .select() + .from(issueRelations) + .where( + and( + eq(issueRelations.companyId, input.companyId), + eq(issueRelations.issueId, recovery.id), + eq(issueRelations.relatedIssueId, input.issueId), + eq(issueRelations.type, "blocks"), + ), + ) + .then((rows) => rows[0] ?? null); + expect(relation).toBeTruthy(); + + const wakeups = await db + .select() + .from(agentWakeupRequests) + .where(eq(agentWakeupRequests.agentId, input.agentId)); + const recoveryWakeup = wakeups.find((wakeup) => { + const payload = wakeup.payload as Record | null; + return payload?.issueId === recovery.id && + payload?.sourceIssueId === input.issueId && + payload?.strandedRunId === input.runId; + }); + expect(recoveryWakeup).toMatchObject({ + companyId: input.companyId, + reason: "issue_assigned", + source: "assignment", }); - return { companyId, agentId, runId, wakeupRequestId, issueId }; + const recoveryRun = recoveryWakeup?.runId + ? await db + .select() + .from(heartbeatRuns) + .where(eq(heartbeatRuns.id, recoveryWakeup.runId)) + .then((rows) => rows[0] ?? null) + : null; + expect(recoveryRun?.contextSnapshot).toMatchObject({ + issueId: recovery.id, + taskId: recovery.id, + source: "stranded_issue_recovery", + sourceIssueId: input.issueId, + strandedRunId: input.runId, + }); + + return recovery; } async function seedQueuedIssueRunFixture() { @@ -728,11 +840,28 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => { it("blocks the issue when process-loss retry is exhausted and the immediate continuation recovery also fails", async () => { mockAdapterExecute.mockRejectedValueOnce(new Error("continuation recovery failed")); - const { agentId, runId, issueId } = await seedRunFixture({ + const { companyId, agentId, runId, issueId } = await seedRunFixture({ agentStatus: "idle", processPid: 999_999_999, processLossRetryCount: 1, }); + const resolvedBlockerId = randomUUID(); + const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`; + await db.insert(issues).values({ + id: resolvedBlockerId, + companyId, + title: "Already completed prerequisite", + status: "done", + priority: "medium", + issueNumber: 2, + identifier: `${issuePrefix}-2`, + }); + await db.insert(issueRelations).values({ + companyId, + issueId: resolvedBlockerId, + relatedIssueId: issueId, + type: "blocks", + }); const heartbeat = heartbeatService(db); const result = await heartbeat.reapOrphanedRuns(); @@ -759,7 +888,29 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => { ); expect(blockedIssue?.status).toBe("blocked"); expect(blockedIssue?.executionRunId).toBeNull(); - expect(blockedIssue?.checkoutRunId).toBe(continuationRun?.id ?? null); + expect(blockedIssue?.checkoutRunId).toBeNull(); + if (!continuationRun?.id) throw new Error("Expected continuation recovery run to exist"); + + const recovery = await expectStrandedRecoveryArtifacts({ + companyId, + agentId, + issueId, + runId: continuationRun.id, + previousStatus: "in_progress", + retryReason: "issue_continuation_needed", + }); + + const blockerRelations = await db + .select() + .from(issueRelations) + .where( + and( + eq(issueRelations.companyId, companyId), + eq(issueRelations.relatedIssueId, issueId), + eq(issueRelations.type, "blocks"), + ), + ); + expect(blockerRelations.map((relation) => relation.issueId)).toEqual([recovery.id]); const comments = await waitForValue(async () => { const rows = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId)); @@ -767,6 +918,49 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => { }); expect(comments).toHaveLength(1); expect(comments[0]?.body).toContain("retried continuation"); + expect(comments[0]?.body).toContain(`Recovery issue: [${recovery.identifier}]`); + }); + + it("does not block paused-tree work when immediate continuation recovery is suppressed by the hold", async () => { + const { companyId, agentId, runId, issueId } = await seedRunFixture({ + agentStatus: "idle", + processPid: 999_999_999, + processLossRetryCount: 1, + }); + await db.insert(issueTreeHolds).values({ + companyId, + rootIssueId: issueId, + mode: "pause", + status: "active", + reason: "pause immediate recovery subtree", + releasePolicy: { strategy: "manual" }, + }); + const heartbeat = heartbeatService(db); + + const result = await heartbeat.reapOrphanedRuns(); + expect(result.reaped).toBe(1); + expect(result.runIds).toEqual([runId]); + + const runs = await db + .select() + .from(heartbeatRuns) + .where(eq(heartbeatRuns.agentId, agentId)); + expect(runs).toHaveLength(1); + expect(runs[0]?.status).toBe("failed"); + + const issue = await db.select().from(issues).where(eq(issues.id, issueId)).then((rows) => rows[0] ?? null); + expect(issue?.status).toBe("in_progress"); + expect(issue?.executionRunId).toBeNull(); + expect(issue?.checkoutRunId).toBe(runId); + + const recoveryIssues = await db + .select() + .from(issues) + .where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stranded_issue_recovery"))); + expect(recoveryIssues).toHaveLength(0); + + const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId)); + expect(comments).toHaveLength(0); }); it("schedules a bounded retry for codex transient upstream failures instead of blocking the issue immediately", async () => { @@ -901,7 +1095,7 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => { }); it("blocks assigned todo work after the one automatic dispatch recovery was already used", async () => { - const { issueId } = await seedStrandedIssueFixture({ + const { companyId, agentId, issueId, runId } = await seedStrandedIssueFixture({ status: "todo", runStatus: "failed", retryReason: "assignment_recovery", @@ -916,10 +1110,20 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => { const issue = await db.select().from(issues).where(eq(issues.id, issueId)).then((rows) => rows[0] ?? null); expect(issue?.status).toBe("blocked"); + const recovery = await expectStrandedRecoveryArtifacts({ + companyId, + agentId, + issueId, + runId, + previousStatus: "todo", + retryReason: "assignment_recovery", + }); + const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId)); expect(comments).toHaveLength(1); expect(comments[0]?.body).toContain("retried dispatch"); expect(comments[0]?.body).toContain("Latest retry failure: `process_lost` - run failed before issue advanced."); + expect(comments[0]?.body).toContain(`Recovery issue: [${recovery.identifier}]`); }); it("assigns open unassigned blockers back to their creator agent", async () => { @@ -1206,7 +1410,7 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => { expect(wakes.some((row) => row.reason === "run_liveness_continuation")).toBe(false); }); it("blocks stranded in-progress work after the continuation retry was already used", async () => { - const { issueId } = await seedStrandedIssueFixture({ + const { companyId, agentId, issueId, runId } = await seedStrandedIssueFixture({ status: "in_progress", runStatus: "failed", retryReason: "issue_continuation_needed", @@ -1221,10 +1425,65 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => { const issue = await db.select().from(issues).where(eq(issues.id, issueId)).then((rows) => rows[0] ?? null); expect(issue?.status).toBe("blocked"); + const recovery = await expectStrandedRecoveryArtifacts({ + companyId, + agentId, + issueId, + runId, + previousStatus: "in_progress", + retryReason: "issue_continuation_needed", + }); + const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId)); expect(comments).toHaveLength(1); expect(comments[0]?.body).toContain("retried continuation"); expect(comments[0]?.body).toContain("Latest retry failure: `process_lost` - run failed before issue advanced."); + expect(comments[0]?.body).toContain(`Recovery issue: [${recovery.identifier}]`); + }); + + it("does not escalate paused-tree recovery when the automatic continuation retry was cancelled by the hold", async () => { + const { companyId, agentId, issueId } = await seedStrandedIssueFixture({ + status: "in_progress", + runStatus: "cancelled", + retryReason: "issue_continuation_needed", + activePauseHold: true, + }); + const heartbeat = heartbeatService(db); + + const result = await heartbeat.reconcileStrandedAssignedIssues(); + expect(result.dispatchRequeued).toBe(0); + expect(result.continuationRequeued).toBe(0); + expect(result.escalated).toBe(0); + expect(result.skipped).toBe(1); + expect(result.issueIds).toEqual([]); + + const issue = await db.select().from(issues).where(eq(issues.id, issueId)).then((rows) => rows[0] ?? null); + expect(issue?.status).toBe("in_progress"); + expect(issue?.checkoutRunId).toBeTruthy(); + + const recoveryIssues = await db + .select() + .from(issues) + .where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stranded_issue_recovery"))); + expect(recoveryIssues).toHaveLength(0); + + const blockerRelations = await db + .select() + .from(issueRelations) + .where( + and( + eq(issueRelations.companyId, companyId), + eq(issueRelations.relatedIssueId, issueId), + eq(issueRelations.type, "blocks"), + ), + ); + expect(blockerRelations).toHaveLength(0); + + const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId)); + expect(comments).toHaveLength(0); + + const wakeups = await db.select().from(agentWakeupRequests).where(eq(agentWakeupRequests.agentId, agentId)); + expect(wakeups).toHaveLength(1); }); it("re-enqueues continuation when the latest automatic continuation succeeded without closing the issue", async () => { diff --git a/server/src/__tests__/heartbeat-start-lock.test.ts b/server/src/__tests__/heartbeat-start-lock.test.ts new file mode 100644 index 00000000..29aefdfb --- /dev/null +++ b/server/src/__tests__/heartbeat-start-lock.test.ts @@ -0,0 +1,30 @@ +import { randomUUID } from "node:crypto"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { withAgentStartLock } from "../services/agent-start-lock.ts"; + +describe("heartbeat agent start lock", () => { + afterEach(() => { + vi.useRealTimers(); + }); + + it("does not let a stale start lock freeze later queued-run starts", async () => { + vi.useFakeTimers(); + + const agentId = randomUUID(); + const firstStart = vi.fn(() => new Promise(() => undefined)); + const secondStart = vi.fn(async () => "started"); + + void withAgentStartLock(agentId, firstStart); + await Promise.resolve(); + expect(firstStart).toHaveBeenCalledTimes(1); + + const secondStartResult = withAgentStartLock(agentId, secondStart); + await Promise.resolve(); + expect(secondStart).not.toHaveBeenCalled(); + + await vi.advanceTimersByTimeAsync(30_000); + + await expect(secondStartResult).resolves.toBe("started"); + expect(secondStart).toHaveBeenCalledTimes(1); + }); +}); diff --git a/server/src/__tests__/instance-settings-routes.test.ts b/server/src/__tests__/instance-settings-routes.test.ts index bd67dad9..8cfd413a 100644 --- a/server/src/__tests__/instance-settings-routes.test.ts +++ b/server/src/__tests__/instance-settings-routes.test.ts @@ -58,6 +58,7 @@ describe("instance settings routes", () => { enableEnvironments: false, enableIsolatedWorkspaces: false, autoRestartDevServerWhenIdle: false, + enableIssueGraphLivenessAutoRecovery: false, }); mockInstanceSettingsService.updateGeneral.mockResolvedValue({ id: "instance-settings-1", @@ -73,6 +74,7 @@ describe("instance settings routes", () => { enableEnvironments: true, enableIsolatedWorkspaces: true, autoRestartDevServerWhenIdle: false, + enableIssueGraphLivenessAutoRecovery: false, }, }); mockInstanceSettingsService.listCompanyIds.mockResolvedValue(["company-1", "company-2"]); @@ -92,6 +94,7 @@ describe("instance settings routes", () => { enableEnvironments: false, enableIsolatedWorkspaces: false, autoRestartDevServerWhenIdle: false, + enableIssueGraphLivenessAutoRecovery: false, }); const patchRes = await request(app) @@ -103,7 +106,7 @@ describe("instance settings routes", () => { enableIsolatedWorkspaces: true, }); expect(mockLogActivity).toHaveBeenCalledTimes(2); - }); + }, 10_000); it("allows local board users to update guarded dev-server auto-restart", async () => { const app = await createApp({ @@ -118,8 +121,28 @@ describe("instance settings routes", () => { .send({ autoRestartDevServerWhenIdle: true }) .expect(200); + expect( + mockInstanceSettingsService.updateExperimental.mock.calls.some( + ([patch]) => patch?.autoRestartDevServerWhenIdle === true, + ), + ).toBe(true); + }); + + it("allows local board users to update issue graph liveness auto-recovery", async () => { + const app = await createApp({ + type: "board", + userId: "local-board", + source: "local_implicit", + isInstanceAdmin: true, + }); + + await request(app) + .patch("/api/instance/settings/experimental") + .send({ enableIssueGraphLivenessAutoRecovery: true }) + .expect(200); + expect(mockInstanceSettingsService.updateExperimental).toHaveBeenCalledWith({ - autoRestartDevServerWhenIdle: true, + enableIssueGraphLivenessAutoRecovery: true, }); }); diff --git a/server/src/__tests__/invite-test-resolution-route.test.ts b/server/src/__tests__/invite-test-resolution-route.test.ts index a89449b3..7ca22a99 100644 --- a/server/src/__tests__/invite-test-resolution-route.test.ts +++ b/server/src/__tests__/invite-test-resolution-route.test.ts @@ -1,6 +1,6 @@ import express from "express"; import request from "supertest"; -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { beforeEach, describe, expect, it, vi } from "vitest"; function createSelectChain(rows: unknown[]) { const query = { @@ -44,8 +44,6 @@ function createInvite(overrides: Record = {}) { }; } -let currentAccessModule: Awaited>> | null = null; - async function createApp( db: Record, network: { @@ -54,11 +52,9 @@ async function createApp( }, ) { const [access, middleware] = await Promise.all([ - vi.importActual("../routes/access.js"), - vi.importActual("../middleware/index.js"), + import("../routes/access.js"), + import("../middleware/index.js"), ]); - currentAccessModule = access; - access.setInviteResolutionNetworkForTest(network); const app = express(); app.use((req, _res, next) => { (req as any).actor = { type: "anon" }; @@ -71,6 +67,7 @@ async function createApp( deploymentExposure: "private", bindHost: "127.0.0.1", allowedHostnames: [], + inviteResolutionNetwork: network, }), ); app.use(middleware.errorHandler); @@ -79,43 +76,43 @@ async function createApp( describe.sequential("GET /invites/:token/test-resolution", () => { beforeEach(() => { - currentAccessModule = null; + vi.clearAllMocks(); }); - afterEach(async () => { - currentAccessModule?.setInviteResolutionNetworkForTest(null); - }); + it("rejects private, local, multicast, and reserved targets before probing", async () => { + const cases = [ + ["localhost", "http://localhost:3100/api/health", "127.0.0.1"], + ["IPv4 loopback", "http://127.0.0.1:3100/api/health", "127.0.0.1"], + ["IPv6 loopback", "http://[::1]:3100/api/health", "::1"], + ["IPv4-mapped IPv6 loopback hex", "http://[::ffff:7f00:1]/api/health", "::ffff:7f00:1"], + ["IPv4-mapped IPv6 RFC1918 hex", "http://[::ffff:c0a8:101]/api/health", "::ffff:c0a8:101"], + ["RFC1918 10/8", "http://10.0.0.5/api/health", "10.0.0.5"], + ["RFC1918 172.16/12", "http://172.16.10.5/api/health", "172.16.10.5"], + ["RFC1918 192.168/16", "http://192.168.1.10/api/health", "192.168.1.10"], + ["link-local metadata", "http://169.254.169.254/latest/meta-data", "169.254.169.254"], + ["multicast", "http://224.0.0.1/probe", "224.0.0.1"], + ["NAT64 well-known prefix", "https://gateway.example.test/health", "64:ff9b::0a00:0001"], + ["NAT64 local-use prefix", "https://gateway.example.test/health", "64:ff9b:1::0a00:0001"], + ] as const; - it.each([ - ["localhost", "http://localhost:3100/api/health", "127.0.0.1"], - ["IPv4 loopback", "http://127.0.0.1:3100/api/health", "127.0.0.1"], - ["IPv6 loopback", "http://[::1]:3100/api/health", "::1"], - ["IPv4-mapped IPv6 loopback hex", "http://[::ffff:7f00:1]/api/health", "::ffff:7f00:1"], - ["IPv4-mapped IPv6 RFC1918 hex", "http://[::ffff:c0a8:101]/api/health", "::ffff:c0a8:101"], - ["RFC1918 10/8", "http://10.0.0.5/api/health", "10.0.0.5"], - ["RFC1918 172.16/12", "http://172.16.10.5/api/health", "172.16.10.5"], - ["RFC1918 192.168/16", "http://192.168.1.10/api/health", "192.168.1.10"], - ["link-local metadata", "http://169.254.169.254/latest/meta-data", "169.254.169.254"], - ["multicast", "http://224.0.0.1/probe", "224.0.0.1"], - ["NAT64 well-known prefix", "https://gateway.example.test/health", "64:ff9b::0a00:0001"], - ["NAT64 local-use prefix", "https://gateway.example.test/health", "64:ff9b:1::0a00:0001"], - ])("rejects %s targets before probing", async (_label, url, address) => { - const lookup = vi.fn().mockResolvedValue([{ address, family: address.includes(":") ? 6 : 4 }]); - const requestHead = vi.fn(); - const app = await createApp(createDbStub([createInvite()]), { lookup, requestHead }); + for (const [label, url, address] of cases) { + const lookup = vi.fn().mockResolvedValue([{ address, family: address.includes(":") ? 6 : 4 }]); + const requestHead = vi.fn(); + const app = await createApp(createDbStub([createInvite()]), { lookup, requestHead }); - const res = await request(app) - .get("/api/invites/pcp_invite_test/test-resolution") - .query({ url }); + const res = await request(app) + .get("/api/invites/pcp_invite_test/test-resolution") + .query({ url }); - expect(res.status).toBe(400); - expect(res.body.error).toBe( - "url resolves to a private, local, multicast, or reserved address", - ); - expect(requestHead).not.toHaveBeenCalled(); - }, 15_000); + expect(res.status, label).toBe(400); + expect(res.body.error).toBe( + "url resolves to a private, local, multicast, or reserved address", + ); + expect(requestHead).not.toHaveBeenCalled(); + } + }, 20_000); - it("rejects hostnames that resolve to private addresses", async () => { + it.sequential("rejects hostnames that resolve to private addresses", async () => { const lookup = vi.fn().mockResolvedValue([{ address: "10.1.2.3", family: 4 }]); const requestHead = vi.fn(); const app = await createApp(createDbStub([createInvite()]), { lookup, requestHead }); @@ -132,7 +129,7 @@ describe.sequential("GET /invites/:token/test-resolution", () => { expect(requestHead).not.toHaveBeenCalled(); }); - it("rejects hostnames when any resolved address is private", async () => { + it.sequential("rejects hostnames when any resolved address is private", async () => { const lookup = vi.fn().mockResolvedValue([ { address: "127.0.0.1", family: 4 }, { address: "93.184.216.34", family: 4 }, @@ -148,7 +145,7 @@ describe.sequential("GET /invites/:token/test-resolution", () => { expect(requestHead).not.toHaveBeenCalled(); }); - it("allows public HTTPS targets through the resolved and pinned probe path", async () => { + it.sequential("allows public HTTPS targets through the resolved and pinned probe path", async () => { const lookup = vi.fn().mockResolvedValue([{ address: "93.184.216.34", family: 4 }]); const requestHead = vi.fn().mockResolvedValue({ httpStatus: 204 }); const app = await createApp(createDbStub([createInvite()]), { lookup, requestHead }); @@ -177,7 +174,7 @@ describe.sequential("GET /invites/:token/test-resolution", () => { ); }); - it.each([ + it.sequential.each([ ["missing invite", []], ["revoked invite", [createInvite({ revokedAt: new Date("2026-03-07T00:05:00.000Z") })]], ["expired invite", [createInvite({ expiresAt: new Date("2020-03-07T00:10:00.000Z") })]], diff --git a/server/src/__tests__/issue-blocker-attention.test.ts b/server/src/__tests__/issue-blocker-attention.test.ts new file mode 100644 index 00000000..097ae60c --- /dev/null +++ b/server/src/__tests__/issue-blocker-attention.test.ts @@ -0,0 +1,280 @@ +import { randomUUID } from "node:crypto"; +import { eq } from "drizzle-orm"; +import { afterAll, afterEach, beforeAll, describe, expect, it } from "vitest"; +import { + agents, + agentWakeupRequests, + companies, + createDb, + heartbeatRuns, + issueRelations, + issues, +} from "@paperclipai/db"; +import { + getEmbeddedPostgresTestSupport, + startEmbeddedPostgresTestDatabase, +} from "./helpers/embedded-postgres.js"; +import { issueService } from "../services/issues.js"; + +const embeddedPostgresSupport = await getEmbeddedPostgresTestSupport(); +const describeEmbeddedPostgres = embeddedPostgresSupport.supported ? describe : describe.skip; + +if (!embeddedPostgresSupport.supported) { + console.warn( + `Skipping embedded Postgres issue blocker attention tests on this host: ${embeddedPostgresSupport.reason ?? "unsupported environment"}`, + ); +} + +describeEmbeddedPostgres("issue blocker attention", () => { + let db!: ReturnType; + let svc!: ReturnType; + let tempDb: Awaited> | null = null; + + beforeAll(async () => { + tempDb = await startEmbeddedPostgresTestDatabase("paperclip-issue-blocker-attention-"); + db = createDb(tempDb.connectionString); + svc = issueService(db); + }, 20_000); + + afterEach(async () => { + await db.delete(heartbeatRuns); + await db.delete(agentWakeupRequests); + await db.delete(issueRelations); + await db.delete(issues); + await db.delete(agents); + await db.delete(companies); + }); + + afterAll(async () => { + await tempDb?.cleanup(); + }); + + async function createCompany(prefix = "PBA") { + const companyId = randomUUID(); + const agentId = randomUUID(); + await db.insert(companies).values({ + id: companyId, + name: `Company ${prefix}`, + issuePrefix: prefix, + requireBoardApprovalForNewAgents: false, + }); + await db.insert(agents).values({ + id: agentId, + companyId, + name: `${prefix} Agent`, + role: "engineer", + status: "idle", + }); + return { companyId, agentId }; + } + + async function insertIssue(input: { + companyId: string; + id?: string; + identifier: string; + title: string; + status: string; + parentId?: string | null; + assigneeAgentId?: string | null; + }) { + const id = input.id ?? randomUUID(); + await db.insert(issues).values({ + id, + companyId: input.companyId, + identifier: input.identifier, + title: input.title, + status: input.status, + priority: "medium", + parentId: input.parentId ?? null, + assigneeAgentId: input.assigneeAgentId ?? null, + }); + return id; + } + + async function block(input: { companyId: string; blockerIssueId: string; blockedIssueId: string }) { + await db.insert(issueRelations).values({ + companyId: input.companyId, + issueId: input.blockerIssueId, + relatedIssueId: input.blockedIssueId, + type: "blocks", + }); + } + + async function activeRun(input: { companyId: string; agentId: string; issueId: string; status?: string; current?: boolean }) { + const runId = randomUUID(); + await db.insert(heartbeatRuns).values({ + id: runId, + companyId: input.companyId, + agentId: input.agentId, + status: input.status ?? "running", + contextSnapshot: { issueId: input.issueId }, + }); + if (input.current !== false) { + await db.update(issues).set({ executionRunId: runId }).where(eq(issues.id, input.issueId)); + } + return runId; + } + + it("classifies a blocked parent as covered when its child has a running execution path", async () => { + const { companyId, agentId } = await createCompany("PBC"); + const parentId = await insertIssue({ companyId, identifier: "PBC-1", title: "Parent", status: "blocked" }); + const childId = await insertIssue({ + companyId, + identifier: "PBC-2", + title: "Running child", + status: "todo", + parentId, + assigneeAgentId: agentId, + }); + await block({ companyId, blockerIssueId: childId, blockedIssueId: parentId }); + await activeRun({ companyId, agentId, issueId: childId }); + + const parent = (await svc.list(companyId, { status: "blocked" })).find((issue) => issue.id === parentId); + + expect(parent?.blockerAttention).toMatchObject({ + state: "covered", + reason: "active_child", + unresolvedBlockerCount: 1, + coveredBlockerCount: 1, + attentionBlockerCount: 0, + sampleBlockerIdentifier: "PBC-2", + }); + }); + + it("keeps mixed blockers attention-required when any path lacks active work", async () => { + const { companyId, agentId } = await createCompany("PBM"); + const parentId = await insertIssue({ companyId, identifier: "PBM-1", title: "Parent", status: "blocked" }); + const activeChildId = await insertIssue({ + companyId, + identifier: "PBM-2", + title: "Running child", + status: "todo", + parentId, + assigneeAgentId: agentId, + }); + const idleBlockerId = await insertIssue({ + companyId, + identifier: "PBM-3", + title: "Idle blocker", + status: "todo", + assigneeAgentId: agentId, + }); + await block({ companyId, blockerIssueId: activeChildId, blockedIssueId: parentId }); + await block({ companyId, blockerIssueId: idleBlockerId, blockedIssueId: parentId }); + await activeRun({ companyId, agentId, issueId: activeChildId }); + + const parent = (await svc.list(companyId, { status: "blocked" })).find((issue) => issue.id === parentId); + + expect(parent?.blockerAttention).toMatchObject({ + state: "needs_attention", + reason: "attention_required", + unresolvedBlockerCount: 2, + coveredBlockerCount: 1, + attentionBlockerCount: 1, + sampleBlockerIdentifier: "PBM-3", + }); + }); + + it("covers recursive blocker chains when the downstream leaf has active work", async () => { + const { companyId, agentId } = await createCompany("PBR"); + const parentId = await insertIssue({ companyId, identifier: "PBR-1", title: "Parent", status: "blocked" }); + const blockerId = await insertIssue({ companyId, identifier: "PBR-2", title: "Blocked dependency", status: "blocked" }); + const leafId = await insertIssue({ + companyId, + identifier: "PBR-3", + title: "Running leaf", + status: "todo", + assigneeAgentId: agentId, + }); + await block({ companyId, blockerIssueId: blockerId, blockedIssueId: parentId }); + await block({ companyId, blockerIssueId: leafId, blockedIssueId: blockerId }); + await activeRun({ companyId, agentId, issueId: leafId }); + + const parent = (await svc.list(companyId, { status: "blocked" })).find((issue) => issue.id === parentId); + + expect(parent?.blockerAttention).toMatchObject({ + state: "covered", + reason: "active_dependency", + unresolvedBlockerCount: 1, + coveredBlockerCount: 1, + attentionBlockerCount: 0, + sampleBlockerIdentifier: "PBR-3", + }); + }); + + it("does not let another company's active run cover the blocker", async () => { + const { companyId, agentId } = await createCompany("PBS"); + const other = await createCompany("PBT"); + const parentId = await insertIssue({ companyId, identifier: "PBS-1", title: "Parent", status: "blocked" }); + const blockerId = await insertIssue({ + companyId, + identifier: "PBS-2", + title: "Same-company blocker", + status: "todo", + assigneeAgentId: agentId, + }); + await block({ companyId, blockerIssueId: blockerId, blockedIssueId: parentId }); + await activeRun({ companyId: other.companyId, agentId: other.agentId, issueId: blockerId }); + + const parent = (await svc.list(companyId, { status: "blocked" })).find((issue) => issue.id === parentId); + + expect(parent?.blockerAttention).toMatchObject({ + state: "needs_attention", + reason: "attention_required", + unresolvedBlockerCount: 1, + coveredBlockerCount: 0, + attentionBlockerCount: 1, + sampleBlockerIdentifier: "PBS-2", + }); + }); + + it("does not cover a blocker from a stale run the issue no longer owns", async () => { + const { companyId, agentId } = await createCompany("PBX"); + const parentId = await insertIssue({ companyId, identifier: "PBX-1", title: "Parent", status: "blocked" }); + const blockerId = await insertIssue({ + companyId, + identifier: "PBX-2", + title: "Previously running blocker", + status: "blocked", + assigneeAgentId: agentId, + }); + await block({ companyId, blockerIssueId: blockerId, blockedIssueId: parentId }); + await activeRun({ companyId, agentId, issueId: blockerId, current: false }); + + const parent = (await svc.list(companyId, { status: "blocked" })).find((issue) => issue.id === parentId); + + expect(parent?.blockerAttention).toMatchObject({ + state: "needs_attention", + reason: "attention_required", + unresolvedBlockerCount: 1, + coveredBlockerCount: 0, + attentionBlockerCount: 1, + sampleBlockerIdentifier: "PBX-2", + }); + }); + + it("does not treat a scheduled retry as actively covered work", async () => { + const { companyId, agentId } = await createCompany("PBY"); + const parentId = await insertIssue({ companyId, identifier: "PBY-1", title: "Parent", status: "blocked" }); + const blockerId = await insertIssue({ + companyId, + identifier: "PBY-2", + title: "Retrying blocker", + status: "blocked", + assigneeAgentId: agentId, + }); + await block({ companyId, blockerIssueId: blockerId, blockedIssueId: parentId }); + await activeRun({ companyId, agentId, issueId: blockerId, status: "scheduled_retry" }); + + const parent = (await svc.list(companyId, { status: "blocked" })).find((issue) => issue.id === parentId); + + expect(parent?.blockerAttention).toMatchObject({ + state: "needs_attention", + reason: "attention_required", + unresolvedBlockerCount: 1, + coveredBlockerCount: 0, + attentionBlockerCount: 1, + sampleBlockerIdentifier: "PBY-2", + }); + }); +}); diff --git a/server/src/__tests__/issue-comment-reopen-routes.test.ts b/server/src/__tests__/issue-comment-reopen-routes.test.ts index 1ff5233c..7709274b 100644 --- a/server/src/__tests__/issue-comment-reopen-routes.test.ts +++ b/server/src/__tests__/issue-comment-reopen-routes.test.ts @@ -28,6 +28,7 @@ const mockHeartbeatService = vi.hoisted(() => ({ const mockAgentService = vi.hoisted(() => ({ getById: vi.fn(), + list: vi.fn(), resolveByReference: vi.fn(), })); @@ -61,80 +62,82 @@ const mockIssueThreadInteractionService = vi.hoisted(() => ({ expireRequestConfirmationsSupersededByComment: vi.fn(async () => []), expireStaleRequestConfirmationsForIssueDocument: vi.fn(async () => []), })); +const mockIssueTreeControlService = vi.hoisted(() => ({ + getActivePauseHoldGate: vi.fn(async () => null), +})); -function registerModuleMocks() { - vi.doMock("@paperclipai/shared/telemetry", () => ({ - trackAgentTaskCompleted: vi.fn(), - trackErrorHandlerCrash: vi.fn(), - })); +vi.mock("@paperclipai/shared/telemetry", () => ({ + trackAgentTaskCompleted: vi.fn(), + trackErrorHandlerCrash: vi.fn(), +})); - vi.doMock("../telemetry.js", () => ({ - getTelemetryClient: vi.fn(() => ({ track: vi.fn() })), - })); +vi.mock("../telemetry.js", () => ({ + getTelemetryClient: vi.fn(() => ({ track: vi.fn() })), +})); - vi.doMock("../services/access.js", () => ({ - accessService: () => mockAccessService, - })); +vi.mock("../services/access.js", () => ({ + accessService: () => mockAccessService, +})); - vi.doMock("../services/activity-log.js", () => ({ - logActivity: mockLogActivity, - })); +vi.mock("../services/activity-log.js", () => ({ + logActivity: mockLogActivity, +})); - vi.doMock("../services/agents.js", () => ({ - agentService: () => mockAgentService, - })); +vi.mock("../services/agents.js", () => ({ + agentService: () => mockAgentService, +})); - vi.doMock("../services/feedback.js", () => ({ - feedbackService: () => mockFeedbackService, - })); +vi.mock("../services/feedback.js", () => ({ + feedbackService: () => mockFeedbackService, +})); - vi.doMock("../services/heartbeat.js", () => ({ - heartbeatService: () => mockHeartbeatService, - })); +vi.mock("../services/heartbeat.js", () => ({ + heartbeatService: () => mockHeartbeatService, +})); - vi.doMock("../services/instance-settings.js", () => ({ - instanceSettingsService: () => mockInstanceSettingsService, - })); +vi.mock("../services/instance-settings.js", () => ({ + instanceSettingsService: () => mockInstanceSettingsService, +})); - vi.doMock("../services/issues.js", () => ({ - issueService: () => mockIssueService, - })); +vi.mock("../services/issues.js", () => ({ + issueService: () => mockIssueService, +})); - vi.doMock("../services/routines.js", () => ({ - routineService: () => mockRoutineService, - })); +vi.mock("../services/routines.js", () => ({ + routineService: () => mockRoutineService, +})); - vi.doMock("../services/index.js", () => ({ - accessService: () => mockAccessService, - agentService: () => mockAgentService, - documentService: () => ({}), - executionWorkspaceService: () => ({}), - feedbackService: () => mockFeedbackService, - goalService: () => ({}), - heartbeatService: () => mockHeartbeatService, - instanceSettingsService: () => mockInstanceSettingsService, - issueApprovalService: () => ({}), - issueReferenceService: () => ({ - deleteDocumentSource: async () => undefined, - diffIssueReferenceSummary: () => ({ - addedReferencedIssues: [], - removedReferencedIssues: [], - currentReferencedIssues: [], - }), - emptySummary: () => ({ outbound: [], inbound: [] }), - listIssueReferenceSummary: async () => ({ outbound: [], inbound: [] }), - syncComment: async () => undefined, - syncDocument: async () => undefined, - syncIssue: async () => undefined, +vi.mock("../services/index.js", () => ({ + accessService: () => mockAccessService, + agentService: () => mockAgentService, + documentService: () => ({}), + executionWorkspaceService: () => ({}), + feedbackService: () => mockFeedbackService, + goalService: () => ({}), + heartbeatService: () => mockHeartbeatService, + instanceSettingsService: () => mockInstanceSettingsService, + issueApprovalService: () => ({}), + issueReferenceService: () => ({ + deleteDocumentSource: async () => undefined, + diffIssueReferenceSummary: () => ({ + addedReferencedIssues: [], + removedReferencedIssues: [], + currentReferencedIssues: [], }), - issueService: () => mockIssueService, - issueThreadInteractionService: () => mockIssueThreadInteractionService, - logActivity: mockLogActivity, - projectService: () => ({}), - routineService: () => mockRoutineService, - workProductService: () => ({}), - })); -} + emptySummary: () => ({ outbound: [], inbound: [] }), + listIssueReferenceSummary: async () => ({ outbound: [], inbound: [] }), + syncComment: async () => undefined, + syncDocument: async () => undefined, + syncIssue: async () => undefined, + }), + issueService: () => mockIssueService, + issueThreadInteractionService: () => mockIssueThreadInteractionService, + issueTreeControlService: () => mockIssueTreeControlService, + logActivity: mockLogActivity, + projectService: () => ({}), + routineService: () => mockRoutineService, + workProductService: () => ({}), +})); function createApp() { const app = express(); @@ -144,8 +147,8 @@ function createApp() { async function installActor(app: express.Express, actor?: Record) { const [{ issueRoutes }, { errorHandler }] = await Promise.all([ - vi.importActual("../routes/issues.js"), - vi.importActual("../middleware/index.js"), + import("../routes/issues.js"), + import("../middleware/index.js"), ]); app.use((req, _res, next) => { (req as any).actor = actor ?? { @@ -173,7 +176,7 @@ async function normalizePolicy(input: { return normalizeIssueExecutionPolicy(input); } -function makeIssue(status: "todo" | "done" | "blocked") { +function makeIssue(status: "todo" | "done" | "blocked" | "cancelled" | "in_progress") { return { id: "11111111-1111-4111-8111-111111111111", companyId: "company-1", @@ -186,25 +189,23 @@ function makeIssue(status: "todo" | "done" | "blocked") { }; } -describe("issue comment reopen routes", () => { +function agentActor(agentId = "22222222-2222-4222-8222-222222222222") { + return { + type: "agent", + agentId, + companyId: "company-1", + source: "agent_key", + runId: "run-1", + }; +} + +async function waitForWakeup(assertion: () => void) { + await vi.waitFor(assertion); +} + +describe.sequential("issue comment reopen routes", () => { beforeEach(() => { - vi.resetModules(); - vi.doUnmock("@paperclipai/shared/telemetry"); - vi.doUnmock("../telemetry.js"); - vi.doUnmock("../services/access.js"); - vi.doUnmock("../services/activity-log.js"); - vi.doUnmock("../services/agents.js"); - vi.doUnmock("../services/feedback.js"); - vi.doUnmock("../services/heartbeat.js"); - vi.doUnmock("../services/index.js"); - vi.doUnmock("../services/instance-settings.js"); - vi.doUnmock("../services/issues.js"); - vi.doUnmock("../services/routines.js"); - vi.doUnmock("../routes/issues.js"); - vi.doUnmock("../routes/authz.js"); - vi.doUnmock("../middleware/index.js"); - registerModuleMocks(); - vi.resetAllMocks(); + vi.clearAllMocks(); mockIssueService.getById.mockReset(); mockIssueService.assertCheckoutOwner.mockReset(); mockIssueService.update.mockReset(); @@ -221,6 +222,7 @@ describe("issue comment reopen routes", () => { mockHeartbeatService.getActiveRunForAgent.mockReset(); mockHeartbeatService.cancelRun.mockReset(); mockAgentService.getById.mockReset(); + mockAgentService.list.mockReset(); mockAgentService.resolveByReference.mockReset(); mockLogActivity.mockReset(); mockFeedbackService.listIssueVotesForUser.mockReset(); @@ -228,6 +230,7 @@ describe("issue comment reopen routes", () => { mockInstanceSettingsService.get.mockReset(); mockInstanceSettingsService.listCompanyIds.mockReset(); mockRoutineService.syncRunStatusForIssue.mockReset(); + mockIssueTreeControlService.getActivePauseHoldGate.mockReset(); mockTxInsertValues.mockReset(); mockTxInsert.mockReset(); mockDb.transaction.mockReset(); @@ -255,6 +258,7 @@ describe("issue comment reopen routes", () => { }); mockInstanceSettingsService.listCompanyIds.mockResolvedValue(["company-1"]); mockRoutineService.syncRunStatusForIssue.mockResolvedValue(undefined); + mockIssueTreeControlService.getActivePauseHoldGate.mockResolvedValue(null); mockIssueService.addComment.mockResolvedValue({ id: "comment-1", issueId: "11111111-1111-4111-8111-111111111111", @@ -280,12 +284,36 @@ describe("issue comment reopen routes", () => { mockAccessService.canUser.mockResolvedValue(false); mockAccessService.hasPermission.mockResolvedValue(false); mockAgentService.getById.mockResolvedValue(null); - mockAgentService.resolveByReference.mockImplementation(async (_companyId: string, reference: string) => ({ - ambiguous: false, - agent: { - id: reference, + mockAgentService.list.mockResolvedValue([ + { + id: "22222222-2222-4222-8222-222222222222", + reportsTo: null, + permissions: { canCreateAgents: false }, }, - })); + { + id: "44444444-4444-4444-8444-444444444444", + reportsTo: null, + permissions: { canCreateAgents: false }, + }, + ]); + mockAgentService.resolveByReference.mockImplementation(async (_companyId: string, reference: string) => { + if (reference === "ambiguous-codex") { + return { ambiguous: true, agent: null }; + } + if (reference === "missing-codex") { + return { ambiguous: false, agent: null }; + } + if (reference === "codexcoder") { + return { + ambiguous: false, + agent: { id: "33333333-3333-4333-8333-333333333333" }, + }; + } + return { + ambiguous: false, + agent: { id: reference }, + }; + }); }); it("treats reopen=true as a no-op when the issue is already open", async () => { @@ -350,10 +378,6 @@ describe("issue comment reopen routes", () => { ...makeIssue("todo"), ...patch, })); - mockAgentService.resolveByReference.mockResolvedValue({ - ambiguous: false, - agent: { id: "33333333-3333-4333-8333-333333333333" }, - }); const res = await request(await installActor(createApp())) .patch("/api/issues/11111111-1111-4111-8111-111111111111") @@ -371,14 +395,10 @@ describe("issue comment reopen routes", () => { it("rejects ambiguous assignee shortnames", async () => { mockIssueService.getById.mockResolvedValue(makeIssue("todo")); - mockAgentService.resolveByReference.mockResolvedValue({ - ambiguous: true, - agent: null, - }); const res = await request(await installActor(createApp())) .patch("/api/issues/11111111-1111-4111-8111-111111111111") - .send({ assigneeAgentId: "codexcoder" }); + .send({ assigneeAgentId: "ambiguous-codex" }); expect(res.status).toBe(409); expect(res.body.error).toContain("ambiguous"); @@ -387,14 +407,10 @@ describe("issue comment reopen routes", () => { it("rejects missing assignee shortnames", async () => { mockIssueService.getById.mockResolvedValue(makeIssue("todo")); - mockAgentService.resolveByReference.mockResolvedValue({ - ambiguous: false, - agent: null, - }); const res = await request(await installActor(createApp())) .patch("/api/issues/11111111-1111-4111-8111-111111111111") - .send({ assigneeAgentId: "codexcoder" }); + .send({ assigneeAgentId: "missing-codex" }); expect(res.status).toBe(404); expect(res.body.error).toBe("Agent not found"); @@ -450,7 +466,7 @@ describe("issue comment reopen routes", () => { "11111111-1111-4111-8111-111111111111", { status: "todo" }, ); - expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( + await waitForWakeup(() => expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( "22222222-2222-4222-8222-222222222222", expect.objectContaining({ reason: "issue_reopened_via_comment", @@ -458,7 +474,38 @@ describe("issue comment reopen routes", () => { reopenedFrom: "done", }), }), + )); + }); + + it("does not implicitly reopen closed issues via POST comments for agent-authored comments", async () => { + mockIssueService.getById.mockResolvedValue(makeIssue("done")); + mockIssueService.addComment.mockResolvedValue({ + id: "comment-1", + issueId: "11111111-1111-4111-8111-111111111111", + companyId: "company-1", + body: "hello", + createdAt: new Date(), + updatedAt: new Date(), + authorAgentId: "33333333-3333-4333-8333-333333333333", + authorUserId: null, + }); + + const res = await request(await installActor(createApp(), { + type: "agent", + agentId: "33333333-3333-4333-8333-333333333333", + companyId: "company-1", + source: "agent_key", + runId: "77777777-7777-4777-8777-777777777777", + })) + .post("/api/issues/11111111-1111-4111-8111-111111111111/comments") + .send({ body: "hello" }); + + expect(res.status).toBe(201); + expect(mockIssueService.update).not.toHaveBeenCalledWith( + "11111111-1111-4111-8111-111111111111", + { status: "todo" }, ); + expect(mockHeartbeatService.wakeup).not.toHaveBeenCalled(); }); it("moves assigned blocked issues back to todo via POST comments", async () => { @@ -477,7 +524,7 @@ describe("issue comment reopen routes", () => { "11111111-1111-4111-8111-111111111111", { status: "todo" }, ); - expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( + await waitForWakeup(() => expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( "22222222-2222-4222-8222-222222222222", expect.objectContaining({ reason: "issue_reopened_via_comment", @@ -493,7 +540,7 @@ describe("issue comment reopen routes", () => { reopenedFrom: "blocked", }), }), - ); + )); }); it("does not move dependency-blocked issues to todo via POST comments", async () => { @@ -513,7 +560,7 @@ describe("issue comment reopen routes", () => { expect(res.status).toBe(201); expect(mockIssueService.update).not.toHaveBeenCalled(); - expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( + await waitForWakeup(() => expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( "22222222-2222-4222-8222-222222222222", expect.objectContaining({ reason: "issue_commented", @@ -527,7 +574,7 @@ describe("issue comment reopen routes", () => { wakeReason: "issue_commented", }), }), - ); + )); }); it("does not implicitly reopen closed issues via POST comments when no agent is assigned", async () => { @@ -565,7 +612,7 @@ describe("issue comment reopen routes", () => { actorUserId: "local-board", }), ); - expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( + await waitForWakeup(() => expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( "22222222-2222-4222-8222-222222222222", expect.objectContaining({ reason: "issue_reopened_via_comment", @@ -575,7 +622,42 @@ describe("issue comment reopen routes", () => { mutation: "comment", }), }), + )); + }); + + it("does not implicitly reopen closed issues via the PATCH comment path for agent-authored comments", async () => { + mockIssueService.getById.mockResolvedValue(makeIssue("done")); + mockIssueService.addComment.mockResolvedValue({ + id: "comment-1", + issueId: "11111111-1111-4111-8111-111111111111", + companyId: "company-1", + body: "hello", + createdAt: new Date(), + updatedAt: new Date(), + authorAgentId: "33333333-3333-4333-8333-333333333333", + authorUserId: null, + }); + mockIssueService.update.mockImplementation(async (_id: string, patch: Record) => ({ + ...makeIssue("done"), + ...patch, + })); + + const res = await request(await installActor(createApp(), { + type: "agent", + agentId: "33333333-3333-4333-8333-333333333333", + companyId: "company-1", + source: "agent_key", + runId: "88888888-8888-4888-8888-888888888888", + })) + .patch("/api/issues/11111111-1111-4111-8111-111111111111") + .send({ comment: "hello" }); + + expect(res.status).toBe(200); + expect(mockIssueService.update).not.toHaveBeenCalledWith( + "11111111-1111-4111-8111-111111111111", + expect.objectContaining({ status: "todo" }), ); + expect(mockHeartbeatService.wakeup).not.toHaveBeenCalled(); }); it("does not move dependency-blocked issues to todo via the PATCH comment path", async () => { @@ -609,7 +691,7 @@ describe("issue comment reopen routes", () => { "11111111-1111-4111-8111-111111111111", expect.objectContaining({ status: "todo" }), ); - expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( + await waitForWakeup(() => expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( "22222222-2222-4222-8222-222222222222", expect.objectContaining({ reason: "issue_commented", @@ -618,7 +700,7 @@ describe("issue comment reopen routes", () => { mutation: "comment", }), }), - ); + )); }); it("wakes the assignee when an assigned blocked issue moves back to todo", async () => { @@ -630,6 +712,34 @@ describe("issue comment reopen routes", () => { updatedAt: new Date(), })); + const res = await request(await installActor(createApp())) + .patch("/api/issues/11111111-1111-4111-8111-111111111111") + .send({ status: "todo" }); + + expect(res.status).toBe(200); + await waitForWakeup(() => expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( + "22222222-2222-4222-8222-222222222222", + expect.objectContaining({ + source: "automation", + triggerDetail: "system", + reason: "issue_status_changed", + payload: expect.objectContaining({ + issueId: "11111111-1111-4111-8111-111111111111", + mutation: "update", + }), + }), + )); + }); + + it("wakes the assignee when an assigned done issue moves back to todo", async () => { + const issue = makeIssue("done"); + mockIssueService.getById.mockResolvedValue(issue); + mockIssueService.update.mockImplementation(async (_id: string, patch: Record) => ({ + ...issue, + ...patch, + updatedAt: new Date(), + })); + const res = await request(await installActor(createApp())) .patch("/api/issues/11111111-1111-4111-8111-111111111111") .send({ status: "todo" }); @@ -645,9 +755,166 @@ describe("issue comment reopen routes", () => { issueId: "11111111-1111-4111-8111-111111111111", mutation: "update", }), + contextSnapshot: expect.objectContaining({ + issueId: "11111111-1111-4111-8111-111111111111", + source: "issue.status_change", + }), }), ); }); + + it("explicit same-agent resume works through the PATCH comment path", async () => { + mockIssueService.getById.mockResolvedValue(makeIssue("done")); + mockIssueService.update.mockImplementation(async (_id: string, patch: Record) => ({ + ...makeIssue("done"), + ...patch, + })); + + const res = await request(await installActor(createApp(), agentActor())) + .patch("/api/issues/11111111-1111-4111-8111-111111111111") + .send({ comment: "please validate the follow-up", resume: true }); + + expect(res.status).toBe(200); + expect(mockIssueService.update).toHaveBeenCalledWith( + "11111111-1111-4111-8111-111111111111", + expect.objectContaining({ + status: "todo", + actorAgentId: "22222222-2222-4222-8222-222222222222", + actorUserId: null, + }), + ); + expect(mockLogActivity).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + action: "issue.comment_added", + details: expect.objectContaining({ + commentId: "comment-1", + resumeIntent: true, + followUpRequested: true, + }), + }), + ); + expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( + "22222222-2222-4222-8222-222222222222", + expect.objectContaining({ + reason: "issue_reopened_via_comment", + payload: expect.objectContaining({ + commentId: "comment-1", + reopenedFrom: "done", + resumeIntent: true, + followUpRequested: true, + }), + }), + ); + }); + + it("keeps generic same-agent comments on closed issues inert", async () => { + mockIssueService.getById.mockResolvedValue(makeIssue("done")); + + const res = await request(await installActor(createApp(), agentActor())) + .post("/api/issues/11111111-1111-4111-8111-111111111111/comments") + .send({ body: "follow-up note without intent" }); + + expect(res.status).toBe(201); + expect(mockIssueService.update).not.toHaveBeenCalled(); + expect(mockHeartbeatService.wakeup).not.toHaveBeenCalled(); + }); + + it("explicit same-agent resume comments reopen closed issues and mark the wake payload", async () => { + mockIssueService.getById.mockResolvedValue(makeIssue("done")); + mockIssueService.update.mockImplementation(async (_id: string, patch: Record) => ({ + ...makeIssue("done"), + ...patch, + })); + + const res = await request(await installActor(createApp(), agentActor())) + .post("/api/issues/11111111-1111-4111-8111-111111111111/comments") + .send({ body: "please validate the follow-up", resume: true }); + + expect(res.status).toBe(201); + expect(mockIssueService.update).toHaveBeenCalledWith( + "11111111-1111-4111-8111-111111111111", + { status: "todo" }, + ); + expect(mockLogActivity).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + action: "issue.comment_added", + details: expect.objectContaining({ + commentId: "comment-1", + resumeIntent: true, + followUpRequested: true, + }), + }), + ); + expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( + "22222222-2222-4222-8222-222222222222", + expect.objectContaining({ + reason: "issue_reopened_via_comment", + payload: expect.objectContaining({ + commentId: "comment-1", + reopenedFrom: "done", + resumeIntent: true, + followUpRequested: true, + }), + contextSnapshot: expect.objectContaining({ + wakeReason: "issue_reopened_via_comment", + resumeIntent: true, + followUpRequested: true, + }), + }), + ); + }); + + it("rejects explicit agent resume intent from a non-assignee", async () => { + mockIssueService.getById.mockResolvedValue(makeIssue("done")); + + const res = await request(await installActor(createApp(), agentActor("44444444-4444-4444-8444-444444444444"))) + .post("/api/issues/11111111-1111-4111-8111-111111111111/comments") + .send({ body: "restart someone else's work", resume: true }); + + expect(res.status).toBe(403); + expect(res.body.error).toBe("Agent cannot request follow-up for another agent's issue"); + expect(mockIssueService.update).not.toHaveBeenCalled(); + expect(mockIssueService.addComment).not.toHaveBeenCalled(); + expect(mockHeartbeatService.wakeup).not.toHaveBeenCalled(); + }); + + it("rejects explicit resume intent under an active pause hold", async () => { + mockIssueService.getById.mockResolvedValue(makeIssue("done")); + mockIssueTreeControlService.getActivePauseHoldGate.mockResolvedValue({ + holdId: "hold-1", + rootIssueId: "root-1", + issueId: "11111111-1111-4111-8111-111111111111", + isRoot: false, + mode: "pause", + reason: "reviewing", + releasePolicy: null, + }); + + const res = await request(await installActor(createApp(), agentActor())) + .post("/api/issues/11111111-1111-4111-8111-111111111111/comments") + .send({ body: "please resume", resume: true }); + + expect(res.status).toBe(409); + expect(res.body.error).toBe("Issue follow-up blocked by active subtree pause hold"); + expect(mockIssueService.update).not.toHaveBeenCalled(); + expect(mockIssueService.addComment).not.toHaveBeenCalled(); + }); + + it("rejects explicit resume intent on cancelled issues", async () => { + mockIssueService.getById.mockResolvedValue(makeIssue("cancelled")); + + const res = await request(await installActor(createApp(), agentActor())) + .post("/api/issues/11111111-1111-4111-8111-111111111111/comments") + .send({ body: "please resume", resume: true }); + + expect(res.status).toBe(409); + expect(res.body.error).toBe("Cancelled issues must be restored through the dedicated restore flow"); + expect(mockIssueService.update).not.toHaveBeenCalled(); + expect(mockIssueService.addComment).not.toHaveBeenCalled(); + }); + it("interrupts an active run before a combined comment update", async () => { const issue = { ...makeIssue("todo"), @@ -818,7 +1085,7 @@ describe("issue comment reopen routes", () => { instructions: "Please verify the fix against the reproduction steps and note any residual risk.", }, }); - expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( + await waitForWakeup(() => expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( "33333333-3333-4333-8333-333333333333", expect.objectContaining({ reason: "execution_review_requested", @@ -834,7 +1101,7 @@ describe("issue comment reopen routes", () => { }), }), }), - ); + )); }); it("wakes the return assignee with execution_changes_requested", async () => { @@ -886,7 +1153,7 @@ describe("issue comment reopen routes", () => { }); expect(res.status).toBe(200); - expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( + await waitForWakeup(() => expect(mockHeartbeatService.wakeup).toHaveBeenCalledWith( "22222222-2222-4222-8222-222222222222", expect.objectContaining({ reason: "execution_changes_requested", @@ -900,6 +1167,6 @@ describe("issue comment reopen routes", () => { }), }), }), - ); + )); }); }); diff --git a/server/src/__tests__/issue-liveness.test.ts b/server/src/__tests__/issue-liveness.test.ts index c6752545..bda50dfc 100644 --- a/server/src/__tests__/issue-liveness.test.ts +++ b/server/src/__tests__/issue-liveness.test.ts @@ -67,6 +67,7 @@ describe("issue graph liveness classifier", () => { issueId: blockedId, identifier: "PAP-1703", state: "blocked_by_unassigned_issue", + recoveryIssueId: blockerId, recommendedOwnerAgentId: managerId, dependencyPath: [ expect.objectContaining({ issueId: blockedId }), @@ -76,6 +77,57 @@ describe("issue graph liveness classifier", () => { }); }); + it("does not use free-form executive role or name matching for recovery ownership", () => { + const rootAgentId = "root-agent"; + const spoofedExecutiveId = "spoofed-executive"; + + const findings = classifyIssueGraphLiveness({ + issues: [ + issue({ + assigneeAgentId: null, + createdByAgentId: null, + }), + issue({ + id: blockerId, + identifier: "PAP-1704", + title: "Missing unblock work", + status: "todo", + assigneeAgentId: null, + createdByAgentId: null, + }), + ], + relations: blocks, + agents: [ + agent({ + id: spoofedExecutiveId, + name: "Chief Executive Recovery", + role: "cto", + title: "CEO", + reportsTo: rootAgentId, + }), + agent({ + id: rootAgentId, + name: "Root Operator", + role: "operator", + title: null, + reportsTo: null, + }), + ], + }); + + expect(findings).toHaveLength(1); + expect(findings[0]?.recommendedOwnerAgentId).toBe(rootAgentId); + expect(findings[0]?.recommendedOwnerCandidates[0]).toMatchObject({ + agentId: rootAgentId, + reason: "root_agent", + sourceIssueId: blockerId, + }); + expect(findings[0]?.recommendedOwnerCandidateAgentIds).toEqual([ + rootAgentId, + spoofedExecutiveId, + ]); + }); + it("does not flag a live blocked chain with an active assignee and wake path", () => { const findings = classifyIssueGraphLiveness({ issues: [ diff --git a/server/src/__tests__/issue-tree-control-routes.test.ts b/server/src/__tests__/issue-tree-control-routes.test.ts index efdf6b9b..08dc0d41 100644 --- a/server/src/__tests__/issue-tree-control-routes.test.ts +++ b/server/src/__tests__/issue-tree-control-routes.test.ts @@ -195,6 +195,61 @@ describe("issue tree control routes", () => { ); }); + it("still marks affected issues cancelled when run interruption fails", async () => { + const app = await createApp({ + type: "board", + userId: "user-1", + companyIds: ["company-2"], + source: "session", + isInstanceAdmin: false, + }); + mockTreeControlService.createHold.mockResolvedValue({ + hold: { + id: "33333333-3333-4333-8333-333333333333", + mode: "cancel", + reason: "cancel subtree", + }, + preview: { + mode: "cancel", + totals: { affectedIssues: 1 }, + warnings: [], + activeRuns: [ + { + id: "44444444-4444-4444-8444-444444444444", + issueId: "11111111-1111-4111-8111-111111111111", + }, + ], + }, + }); + mockTreeControlService.cancelIssueStatusesForHold.mockResolvedValue({ + updatedIssueIds: ["11111111-1111-4111-8111-111111111111"], + updatedIssues: [], + }); + mockHeartbeatService.cancelRun.mockRejectedValue(new Error("adapter process did not exit")); + + const res = await request(app) + .post("/api/issues/11111111-1111-4111-8111-111111111111/tree-holds") + .send({ mode: "cancel", reason: "cancel subtree" }); + + expect(res.status).toBe(201); + expect(mockHeartbeatService.cancelRun).toHaveBeenCalledWith("44444444-4444-4444-8444-444444444444"); + expect(mockTreeControlService.cancelIssueStatusesForHold).toHaveBeenCalledWith( + "company-2", + "11111111-1111-4111-8111-111111111111", + "33333333-3333-4333-8333-333333333333", + ); + expect(mockLogActivity).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + action: "issue.tree_hold_run_interrupt_failed", + entityId: "44444444-4444-4444-8444-444444444444", + details: expect.objectContaining({ + error: "adapter process did not exit", + }), + }), + ); + }); + it("restores affected issues and can request explicit wakeups", async () => { const app = await createApp({ type: "board", diff --git a/server/src/__tests__/issue-tree-control-service.test.ts b/server/src/__tests__/issue-tree-control-service.test.ts index d9db9472..60953c7c 100644 --- a/server/src/__tests__/issue-tree-control-service.test.ts +++ b/server/src/__tests__/issue-tree-control-service.test.ts @@ -3,9 +3,11 @@ import { eq, inArray } from "drizzle-orm"; import { afterAll, afterEach, beforeAll, describe, expect, it } from "vitest"; import { agents, + agentWakeupRequests, companies, createDb, heartbeatRuns, + issueComments, issueTreeHoldMembers, issueTreeHolds, issues, @@ -38,8 +40,10 @@ describeEmbeddedPostgres("issueTreeControlService", () => { afterEach(async () => { await db.delete(issueTreeHoldMembers); await db.delete(issueTreeHolds); + await db.delete(issueComments); await db.delete(issues); await db.delete(heartbeatRuns); + await db.delete(agentWakeupRequests); await db.delete(agents); await db.delete(companies); }); @@ -340,6 +344,12 @@ describeEmbeddedPostgres("issueTreeControlService", () => { const childIssueId = randomUUID(); const rootRunId = randomUUID(); const childRunId = randomUUID(); + const forgedRunId = randomUUID(); + const rootWakeupRequestId = randomUUID(); + const childWakeupRequestId = randomUUID(); + const forgedWakeupRequestId = randomUUID(); + const rootCommentId = randomUUID(); + const childCommentId = randomUUID(); await db.insert(companies).values({ id: companyId, @@ -377,6 +387,63 @@ describeEmbeddedPostgres("issueTreeControlService", () => { assigneeAgentId: agentId, }, ]); + await db.insert(issueComments).values([ + { + id: rootCommentId, + companyId, + issueId: rootIssueId, + authorUserId: "board-user", + body: "Please answer this root issue question.", + }, + { + id: childCommentId, + companyId, + issueId: childIssueId, + authorUserId: "board-user", + body: "Please answer this child issue question.", + }, + ]); + await db.insert(agentWakeupRequests).values([ + { + id: rootWakeupRequestId, + companyId, + agentId, + source: "automation", + triggerDetail: "system", + reason: "issue_commented", + payload: { issueId: rootIssueId, commentId: rootCommentId }, + status: "queued", + requestedByActorType: "user", + requestedByActorId: "board-user", + runId: rootRunId, + }, + { + id: forgedWakeupRequestId, + companyId, + agentId, + source: "on_demand", + triggerDetail: "manual", + reason: "issue_commented", + payload: { issueId: childIssueId, commentId: childCommentId }, + status: "queued", + requestedByActorType: "agent", + requestedByActorId: agentId, + runId: forgedRunId, + }, + { + id: childWakeupRequestId, + companyId, + agentId, + source: "automation", + triggerDetail: "system", + reason: "issue_commented", + payload: { issueId: childIssueId, commentId: childCommentId }, + status: "queued", + requestedByActorType: "user", + requestedByActorId: "board-user", + runId: childRunId, + }, + ]); await db.insert(heartbeatRuns).values([ { id: rootRunId, @@ -385,7 +452,29 @@ describeEmbeddedPostgres("issueTreeControlService", () => { invocationSource: "automation", triggerDetail: "system", status: "queued", - contextSnapshot: { issueId: rootIssueId, wakeReason: "issue_commented", commentId: randomUUID() }, + wakeupRequestId: rootWakeupRequestId, + contextSnapshot: { + issueId: rootIssueId, + wakeReason: "issue_commented", + commentId: rootCommentId, + wakeCommentId: rootCommentId, + source: "issue.comment", + }, + }, + { + id: forgedRunId, + companyId, + agentId, + invocationSource: "on_demand", + triggerDetail: "manual", + status: "queued", + wakeupRequestId: forgedWakeupRequestId, + contextSnapshot: { + issueId: childIssueId, + wakeReason: "issue_commented", + commentId: childCommentId, + wakeCommentId: childCommentId, + }, }, { id: childRunId, @@ -394,7 +483,14 @@ describeEmbeddedPostgres("issueTreeControlService", () => { invocationSource: "automation", triggerDetail: "system", status: "queued", - contextSnapshot: { issueId: childIssueId, wakeReason: "issue_commented", commentId: randomUUID() }, + wakeupRequestId: childWakeupRequestId, + contextSnapshot: { + issueId: childIssueId, + wakeReason: "issue_commented", + commentId: childCommentId, + wakeCommentId: childCommentId, + source: "issue.comment", + }, }, ]); @@ -413,6 +509,13 @@ describeEmbeddedPostgres("issueTreeControlService", () => { mode: "pause", }), }); + await expect(issueSvc.checkout(childIssueId, agentId, ["todo"], forgedRunId)).rejects.toMatchObject({ + status: 409, + details: expect.objectContaining({ + rootIssueId, + mode: "pause", + }), + }); const checkedOutChild = await issueSvc.checkout(childIssueId, agentId, ["todo"], childRunId); expect(checkedOutChild.status).toBe("in_progress"); diff --git a/server/src/__tests__/issues-goal-context-routes.test.ts b/server/src/__tests__/issues-goal-context-routes.test.ts index 42285401..5247ec74 100644 --- a/server/src/__tests__/issues-goal-context-routes.test.ts +++ b/server/src/__tests__/issues-goal-context-routes.test.ts @@ -9,6 +9,7 @@ const mockIssueService = vi.hoisted(() => ({ findMentionedProjectIds: vi.fn(), getCommentCursor: vi.fn(), getComment: vi.fn(), + listBlockerAttention: vi.fn(), listAttachments: vi.fn(), })); @@ -166,6 +167,7 @@ describe("issue goal context routes", () => { latestCommentAt: null, }); mockIssueService.getComment.mockResolvedValue(null); + mockIssueService.listBlockerAttention.mockResolvedValue(new Map()); mockIssueService.listAttachments.mockResolvedValue([]); mockDocumentsService.getIssueDocumentPayload.mockResolvedValue({}); mockDocumentsService.getIssueDocumentByKey.mockResolvedValue(null); diff --git a/server/src/__tests__/issues-service.test.ts b/server/src/__tests__/issues-service.test.ts index 05ea80c4..c8be8ff1 100644 --- a/server/src/__tests__/issues-service.test.ts +++ b/server/src/__tests__/issues-service.test.ts @@ -1401,6 +1401,49 @@ describeEmbeddedPostgres("issueService blockers and dependency wake readiness", expect(blockedRelations.blockedBy.map((relation) => relation.id)).toEqual([blockerId]); }); + it("adds terminal blockers to immediate blocked-by summaries", async () => { + const companyId = randomUUID(); + await db.insert(companies).values({ + id: companyId, + name: "Paperclip", + issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`, + requireBoardApprovalForNewAgents: false, + }); + + const issueA = randomUUID(); + const issueB = randomUUID(); + const issueC = randomUUID(); + const issueD = randomUUID(); + await db.insert(issues).values([ + { id: issueA, companyId, identifier: "PAP-1", title: "Issue A", status: "blocked", priority: "medium" }, + { id: issueB, companyId, identifier: "PAP-2", title: "Issue B", status: "blocked", priority: "medium" }, + { id: issueC, companyId, identifier: "PAP-3", title: "Issue C", status: "blocked", priority: "medium" }, + { id: issueD, companyId, identifier: "PAP-4", title: "Issue D", status: "todo", priority: "high" }, + ]); + + await svc.update(issueC, { blockedByIssueIds: [issueD] }); + await svc.update(issueB, { blockedByIssueIds: [issueC] }); + await svc.update(issueA, { blockedByIssueIds: [issueB] }); + + const relations = await svc.getRelationSummaries(issueA); + + expect(relations.blockedBy).toHaveLength(1); + expect(relations.blockedBy[0]).toMatchObject({ + id: issueB, + identifier: "PAP-2", + title: "Issue B", + terminalBlockers: [ + expect.objectContaining({ + id: issueD, + identifier: "PAP-4", + title: "Issue D", + status: "todo", + priority: "high", + }), + ], + }); + }); + it("rejects blocking cycles", async () => { const companyId = randomUUID(); await db.insert(companies).values({ diff --git a/server/src/__tests__/recovery-classifiers.test.ts b/server/src/__tests__/recovery-classifiers.test.ts new file mode 100644 index 00000000..9365ece7 --- /dev/null +++ b/server/src/__tests__/recovery-classifiers.test.ts @@ -0,0 +1,146 @@ +import { describe, expect, it } from "vitest"; +import { classifyIssueGraphLiveness as classifyIssueGraphLivenessCompat } from "../services/issue-liveness.ts"; +import { decideRunLivenessContinuation as decideRunLivenessContinuationCompat } from "../services/run-continuations.ts"; +import { + RECOVERY_KEY_PREFIXES, + RECOVERY_ORIGIN_KINDS, + RECOVERY_REASON_KINDS, + buildIssueGraphLivenessIncidentKey, + buildIssueGraphLivenessLeafKey, + buildRunLivenessContinuationIdempotencyKey, + classifyIssueGraphLiveness, + decideRunLivenessContinuation, + parseIssueGraphLivenessIncidentKey, +} from "../services/recovery/index.ts"; + +const companyId = "company-1"; +const agentId = "agent-1"; +const managerId = "manager-1"; +const issueId = "issue-1"; +const blockerId = "blocker-1"; +const runId = "run-1"; + +describe("recovery classifier boundary", () => { + it("keeps issue graph liveness classifier parity with the compatibility export", () => { + const input = { + issues: [ + { + id: issueId, + companyId, + identifier: "PAP-2073", + title: "Centralize recovery classifiers", + status: "blocked", + assigneeAgentId: agentId, + assigneeUserId: null, + createdByAgentId: null, + createdByUserId: null, + executionState: null, + }, + { + id: blockerId, + companyId, + identifier: "PAP-2074", + title: "Move recovery side effects", + status: "todo", + assigneeAgentId: null, + assigneeUserId: null, + createdByAgentId: null, + createdByUserId: null, + executionState: null, + }, + ], + relations: [{ companyId, blockerIssueId: blockerId, blockedIssueId: issueId }], + agents: [ + { + id: agentId, + companyId, + name: "Coder", + role: "engineer", + status: "idle", + reportsTo: managerId, + }, + { + id: managerId, + companyId, + name: "CTO", + role: "cto", + status: "idle", + reportsTo: null, + }, + ], + }; + + expect(classifyIssueGraphLiveness(input)).toEqual(classifyIssueGraphLivenessCompat(input)); + }); + + it("keeps run liveness continuation decision parity with the compatibility export", () => { + const input = { + run: { + id: runId, + companyId, + agentId, + continuationAttempt: 0, + } as never, + issue: { + id: issueId, + companyId, + identifier: "PAP-2073", + title: "Centralize recovery classifiers", + status: "in_progress", + assigneeAgentId: agentId, + executionState: null, + projectId: null, + } as never, + agent: { + id: agentId, + companyId, + status: "idle", + } as never, + livenessState: "plan_only" as const, + livenessReason: "Planned without acting", + nextAction: "Take the first concrete action.", + budgetBlocked: false, + idempotentWakeExists: false, + }; + + expect(decideRunLivenessContinuation(input)).toEqual(decideRunLivenessContinuationCompat(input)); + }); + + it("keeps recovery origin and idempotency keys stable", () => { + expect(RECOVERY_ORIGIN_KINDS).toMatchObject({ + issueGraphLivenessEscalation: "harness_liveness_escalation", + strandedIssueRecovery: "stranded_issue_recovery", + staleActiveRunEvaluation: "stale_active_run_evaluation", + }); + expect(RECOVERY_REASON_KINDS.runLivenessContinuation).toBe("run_liveness_continuation"); + expect(RECOVERY_KEY_PREFIXES.issueGraphLivenessIncident).toBe("harness_liveness"); + expect(RECOVERY_KEY_PREFIXES.issueGraphLivenessLeaf).toBe("harness_liveness_leaf"); + + const incidentKey = buildIssueGraphLivenessIncidentKey({ + companyId, + issueId, + state: "blocked_by_unassigned_issue", + blockerIssueId: blockerId, + }); + expect(incidentKey).toBe( + "harness_liveness:company-1:issue-1:blocked_by_unassigned_issue:blocker-1", + ); + expect(parseIssueGraphLivenessIncidentKey(incidentKey)).toEqual({ + companyId, + issueId, + state: "blocked_by_unassigned_issue", + leafIssueId: blockerId, + }); + expect(buildIssueGraphLivenessLeafKey({ + companyId, + state: "blocked_by_unassigned_issue", + leafIssueId: blockerId, + })).toBe("harness_liveness_leaf:company-1:blocked_by_unassigned_issue:blocker-1"); + expect(buildRunLivenessContinuationIdempotencyKey({ + issueId, + sourceRunId: runId, + livenessState: "plan_only", + nextAttempt: 1, + })).toBe("run_liveness_continuation:issue-1:run-1:plan_only:1"); + }); +}); diff --git a/server/src/__tests__/redaction.test.ts b/server/src/__tests__/redaction.test.ts index 21a1a8b9..e22eb597 100644 --- a/server/src/__tests__/redaction.test.ts +++ b/server/src/__tests__/redaction.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { REDACTED_EVENT_VALUE, redactEventPayload, sanitizeRecord } from "../redaction.js"; +import { REDACTED_EVENT_VALUE, redactEventPayload, redactSensitiveText, sanitizeRecord } from "../redaction.js"; describe("redaction", () => { it("redacts sensitive keys and nested secret values", () => { @@ -63,4 +63,25 @@ describe("redaction", () => { safe: "value", }); }); + + it("redacts common secret shapes from unstructured text", () => { + const jwt = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"; + const githubToken = "ghp_1234567890abcdefghijklmnopqrstuvwxyz"; + const input = [ + "Authorization: Bearer live-bearer-token-value", + `payload {"apiKey":"json-secret-value"}`, + `escaped {\\"apiKey\\":\\"escaped-json-secret\\"}`, + `GITHUB_TOKEN=${githubToken}`, + `session=${jwt}`, + ].join("\n"); + + const result = redactSensitiveText(input); + + expect(result).toContain(REDACTED_EVENT_VALUE); + expect(result).not.toContain("live-bearer-token-value"); + expect(result).not.toContain("json-secret-value"); + expect(result).not.toContain("escaped-json-secret"); + expect(result).not.toContain(githubToken); + expect(result).not.toContain(jwt); + }); }); diff --git a/server/src/__tests__/run-liveness.test.ts b/server/src/__tests__/run-liveness.test.ts index 08af19b7..2feeb076 100644 --- a/server/src/__tests__/run-liveness.test.ts +++ b/server/src/__tests__/run-liveness.test.ts @@ -27,6 +27,7 @@ describe("run liveness classifier", () => { }); expect(classification.livenessState).toBe("plan_only"); + expect(classification.actionability).toBe("runnable"); expect(classification.nextAction).toContain("inspect the repo"); }); @@ -34,6 +35,7 @@ describe("run liveness classifier", () => { const classification = classifyRunLiveness(baseInput); expect(classification.livenessState).toBe("empty_response"); + expect(classification.actionability).toBe("unknown"); }); it("treats issue comments, documents, products, and actions as progress", () => { @@ -128,5 +130,81 @@ describe("run liveness classifier", () => { }); expect(classification.livenessState).toBe("blocked"); + expect(classification.actionability).toBe("blocked_external"); + }); + + it("treats PAP-2000-style validation output as runnable follow-up, not an external blocker", () => { + const classification = classifyRunLiveness({ + ...baseInput, + resultJson: { + summary: "PAP-1949 remains blocked until PAP-2000 is resolved.", + }, + issueCommentBodies: [ + [ + "Validation is ready for the next pass.", + "", + "- Blocked chain context: PAP-1949 -> PAP-1999 -> PAP-2000", + "- Next action: run npm test and report the row counts.", + ].join("\n"), + ], + }); + + expect(classification.livenessState).toBe("plan_only"); + expect(classification.actionability).toBe("runnable"); + expect(classification.nextAction).toBe("run npm test and report the row counts."); + }); + + it("prefers durable comments over raw transcript next-action noise", () => { + const classification = classifyRunLiveness({ + ...baseInput, + issueCommentBodies: ["Next action: run pnpm test -- --runInBand."], + stdoutExcerpt: [ + "tool_call: write", + "command: rm -rf production-data", + "Next action: deploy to production", + ].join("\n"), + }); + + expect(classification.actionability).toBe("runnable"); + expect(classification.nextAction).toBe("run pnpm test -- --runInBand."); + }); + + it("keeps approval requests out of automatic continuation", () => { + const classification = classifyRunLiveness({ + ...baseInput, + resultJson: { + summary: "Next action: wait for board approval before continuing.", + }, + }); + + expect(classification.livenessState).toBe("blocked"); + expect(classification.actionability).toBe("approval_required"); + expect(classification.nextAction).toBe("wait for board approval before continuing."); + }); + + it("routes production-sensitive next actions to manager review", () => { + const classification = classifyRunLiveness({ + ...baseInput, + resultJson: { + summary: "Next action: deploy to production and verify live traffic.", + }, + }); + + expect(classification.livenessState).toBe("needs_followup"); + expect(classification.actionability).toBe("manager_review"); + expect(classification.nextAction).toBe("deploy to production and verify live traffic."); + }); + + it("marks unclear useful output as unknown actionability", () => { + const classification = classifyRunLiveness({ + ...baseInput, + resultJson: { + summary: "Observed mixed output and left notes for a later pass.", + }, + }); + + expect(classification.livenessState).toBe("needs_followup"); + expect(classification.actionability).toBe("unknown"); + expect(classification.nextAction).toBeNull(); }); }); diff --git a/server/src/index.ts b/server/src/index.ts index 0ee9df43..67eec273 100644 --- a/server/src/index.ts +++ b/server/src/index.ts @@ -698,6 +698,12 @@ export async function startServer(): Promise { logger.warn({ ...reconciled }, "startup issue-graph liveness reconciliation created escalations"); } }) + .then(async () => { + const scanned = await heartbeat.scanSilentActiveRuns(); + if (scanned.created > 0 || scanned.escalated > 0) { + logger.warn({ ...scanned }, "startup active-run output watchdog created review work"); + } + }) .catch((err) => { logger.error({ err }, "startup heartbeat recovery failed"); }); @@ -750,6 +756,12 @@ export async function startServer(): Promise { logger.warn({ ...reconciled }, "periodic issue-graph liveness reconciliation created escalations"); } }) + .then(async () => { + const scanned = await heartbeat.scanSilentActiveRuns(); + if (scanned.created > 0 || scanned.escalated > 0) { + logger.warn({ ...scanned }, "periodic active-run output watchdog created review work"); + } + }) .catch((err) => { logger.error({ err }, "periodic heartbeat recovery failed"); }); diff --git a/server/src/redaction.ts b/server/src/redaction.ts index 2c562ae5..75ebd56e 100644 --- a/server/src/redaction.ts +++ b/server/src/redaction.ts @@ -1,6 +1,16 @@ const SECRET_PAYLOAD_KEY_RE = /(api[-_]?key|access[-_]?token|auth(?:_?token)?|authorization|bearer|secret|passwd|password|credential|jwt|private[-_]?key|cookie|connectionstring)/i; const JWT_VALUE_RE = /^[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+(?:\.[A-Za-z0-9_-]+)?$/; +const JWT_TEXT_RE = /\b[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}(?:\.[A-Za-z0-9_-]{8,})?\b/g; +const OPENAI_KEY_TEXT_RE = /\bsk-[A-Za-z0-9_-]{12,}\b/g; +const GITHUB_TOKEN_TEXT_RE = /\bgh[pousr]_[A-Za-z0-9_]{20,}\b/g; +const AUTHORIZATION_BEARER_TEXT_RE = /(\bAuthorization\s*:\s*Bearer\s+)[^\s"'`]+/gi; +const ENV_SECRET_ASSIGNMENT_TEXT_RE = + /(\b[A-Za-z0-9_]*(?:TOKEN|KEY|SECRET|PASSWORD|PASSWD|AUTHORIZATION|JWT)[A-Za-z0-9_]*\s*=\s*)[^\s"'`]+/gi; +const JSON_SECRET_FIELD_TEXT_RE = + /((?:"|')?(?:api[-_]?key|access[-_]?token|auth(?:_?token)?|authorization|bearer|secret|passwd|password|credential|jwt|private[-_]?key|cookie|connectionstring)(?:"|')?\s*:\s*(?:"|'))[^"'`\r\n]+((?:"|'))/gi; +const ESCAPED_JSON_SECRET_FIELD_TEXT_RE = + /((?:\\")?(?:api[-_]?key|access[-_]?token|auth(?:_?token)?|authorization|bearer|secret|passwd|password|credential|jwt|private[-_]?key|cookie|connectionstring)(?:\\")?\s*:\s*(?:\\"))[^\\\r\n]+((?:\\"))/gi; export const REDACTED_EVENT_VALUE = "***REDACTED***"; function isPlainObject(value: unknown): value is Record { @@ -57,3 +67,14 @@ export function redactEventPayload(payload: Record | null): Rec if (!isPlainObject(payload)) return payload; return sanitizeRecord(payload); } + +export function redactSensitiveText(input: string): string { + return input + .replace(AUTHORIZATION_BEARER_TEXT_RE, `$1${REDACTED_EVENT_VALUE}`) + .replace(JSON_SECRET_FIELD_TEXT_RE, `$1${REDACTED_EVENT_VALUE}$2`) + .replace(ESCAPED_JSON_SECRET_FIELD_TEXT_RE, `$1${REDACTED_EVENT_VALUE}$2`) + .replace(ENV_SECRET_ASSIGNMENT_TEXT_RE, `$1${REDACTED_EVENT_VALUE}`) + .replace(OPENAI_KEY_TEXT_RE, REDACTED_EVENT_VALUE) + .replace(GITHUB_TOKEN_TEXT_RE, REDACTED_EVENT_VALUE) + .replace(JWT_TEXT_RE, REDACTED_EVENT_VALUE); +} diff --git a/server/src/routes/access.ts b/server/src/routes/access.ts index 86c93c6d..3df49fd6 100644 --- a/server/src/routes/access.ts +++ b/server/src/routes/access.ts @@ -2270,11 +2270,14 @@ export function setInviteResolutionNetworkForTest( : defaultInviteResolutionNetwork; } -async function lookupInviteResolutionHostname(hostname: string) { +async function lookupInviteResolutionHostname( + hostname: string, + network: InviteResolutionNetwork = inviteResolutionNetwork +) { let timeout: ReturnType | null = null; try { return await Promise.race([ - inviteResolutionNetwork.lookup(hostname), + network.lookup(hostname), new Promise((_, reject) => { timeout = setTimeout( () => @@ -2296,7 +2299,8 @@ async function lookupInviteResolutionHostname(hostname: string) { } async function resolveInviteResolutionTarget( - url: URL + url: URL, + network: InviteResolutionNetwork = inviteResolutionNetwork ): Promise { const hostname = hostnameForResolution(url); if (parseIpv4Address(hostname)) { @@ -2328,7 +2332,7 @@ async function resolveInviteResolutionTarget( tlsServername: undefined, }; } - const results = await lookupInviteResolutionHostname(hostname); + const results = await lookupInviteResolutionHostname(hostname, network); if (results.length === 0) { throw badRequest("url hostname did not resolve to any addresses"); } @@ -2354,11 +2358,12 @@ async function resolveInviteResolutionTarget( async function probeInviteResolutionTarget( target: ResolvedInviteResolutionTarget, - timeoutMs: number + timeoutMs: number, + network: InviteResolutionNetwork = inviteResolutionNetwork ): Promise { const startedAt = Date.now(); try { - const response = await inviteResolutionNetwork.requestHead(target, timeoutMs); + const response = await network.requestHead(target, timeoutMs); const durationMs = Date.now() - startedAt; if ( response.httpStatus !== null && @@ -2421,12 +2426,16 @@ export function accessRoutes( deploymentExposure: DeploymentExposure; bindHost: string; allowedHostnames: string[]; + inviteResolutionNetwork?: Partial; } ) { const router = Router(); const access = accessService(db); const boardAuth = boardAuthService(db); const agents = agentService(db); + const routeInviteResolutionNetwork = opts.inviteResolutionNetwork + ? { ...defaultInviteResolutionNetwork, ...opts.inviteResolutionNetwork } + : inviteResolutionNetwork; async function assertInstanceAdmin(req: Request) { if (req.actor.type !== "board") throw unauthorized(); @@ -3175,8 +3184,8 @@ export function accessRoutes( const timeoutMs = Number.isFinite(parsedTimeoutMs) ? Math.max(1000, Math.min(15000, Math.floor(parsedTimeoutMs))) : 5000; - const resolvedTarget = await resolveInviteResolutionTarget(target); - const probe = await probeInviteResolutionTarget(resolvedTarget, timeoutMs); + const resolvedTarget = await resolveInviteResolutionTarget(target, routeInviteResolutionNetwork); + const probe = await probeInviteResolutionTarget(resolvedTarget, timeoutMs, routeInviteResolutionNetwork); res.json({ inviteId: invite.id, testResolutionPath: `/api/invites/${token}/test-resolution`, diff --git a/server/src/routes/agents.ts b/server/src/routes/agents.ts index 229b5967..766d0614 100644 --- a/server/src/routes/agents.ts +++ b/server/src/routes/agents.ts @@ -81,6 +81,7 @@ import { } from "../services/default-agent-instructions.js"; import { getTelemetryClient } from "../telemetry.js"; import { assertEnvironmentSelectionForCompany } from "./environment-selection.js"; +import { recoveryService } from "../services/recovery/service.js"; const RUN_LOG_DEFAULT_LIMIT_BYTES = 256_000; const RUN_LOG_MAX_LIMIT_BYTES = 1024 * 1024; @@ -91,6 +92,12 @@ function readRunLogLimitBytes(value: unknown) { return Math.max(1, Math.min(RUN_LOG_MAX_LIMIT_BYTES, Math.trunc(parsed))); } +function readLiveRunsQueryInt(value: unknown, max: number, fallback = 0) { + const parsed = Number(value); + if (!Number.isFinite(parsed)) return fallback; + return Math.max(0, Math.min(max, Math.trunc(parsed))); +} + export function agentRoutes( db: Db, options: { pluginWorkerManager?: PluginWorkerManager } = {}, @@ -142,6 +149,7 @@ export function agentRoutes( const heartbeat = heartbeatService(db, { pluginWorkerManager: options.pluginWorkerManager, }); + const recovery = recoveryService(db, { enqueueWakeup: heartbeat.wakeup }); const issueApprovalsSvc = issueApprovalService(db); const secretsSvc = secretService(db); const instructions = agentInstructionsService(); @@ -2532,11 +2540,12 @@ export function agentRoutes( const companyId = req.params.companyId as string; assertCompanyAccess(req, companyId); - const minCountParam = req.query.minCount as string | undefined; - const minCount = minCountParam ? Math.max(0, Math.min(20, parseInt(minCountParam, 10) || 0)) : 0; + const minCount = readLiveRunsQueryInt(req.query.minCount, 50); + const limit = readLiveRunsQueryInt(req.query.limit, 50); const columns = { id: heartbeatRuns.id, + companyId: heartbeatRuns.companyId, status: heartbeatRuns.status, invocationSource: heartbeatRuns.invocationSource, triggerDetail: heartbeatRuns.triggerDetail, @@ -2546,15 +2555,21 @@ export function agentRoutes( agentId: heartbeatRuns.agentId, agentName: agentsTable.name, adapterType: agentsTable.adapterType, + logBytes: heartbeatRuns.logBytes, livenessState: heartbeatRuns.livenessState, livenessReason: heartbeatRuns.livenessReason, continuationAttempt: heartbeatRuns.continuationAttempt, lastUsefulActionAt: heartbeatRuns.lastUsefulActionAt, nextAction: heartbeatRuns.nextAction, + lastOutputAt: heartbeatRuns.lastOutputAt, + lastOutputSeq: heartbeatRuns.lastOutputSeq, + lastOutputStream: heartbeatRuns.lastOutputStream, + lastOutputBytes: heartbeatRuns.lastOutputBytes, + processStartedAt: heartbeatRuns.processStartedAt, issueId: sql`${heartbeatRuns.contextSnapshot} ->> 'issueId'`.as("issueId"), }; - const liveRuns = await db + const liveRunsQuery = db .select(columns) .from(heartbeatRuns) .innerJoin(agentsTable, eq(heartbeatRuns.agentId, agentsTable.id)) @@ -2566,7 +2581,10 @@ export function agentRoutes( ) .orderBy(desc(heartbeatRuns.createdAt)); - if (minCount > 0 && liveRuns.length < minCount) { + const liveRuns = limit > 0 ? await liveRunsQuery.limit(limit) : await liveRunsQuery; + const targetRunCount = limit > 0 ? Math.min(minCount, limit) : minCount; + + if (targetRunCount > 0 && liveRuns.length < targetRunCount) { const activeIds = liveRuns.map((r) => r.id); const recentRuns = await db .select(columns) @@ -2580,13 +2598,20 @@ export function agentRoutes( ), ) .orderBy(desc(heartbeatRuns.createdAt)) - .limit(minCount - liveRuns.length); + .limit(targetRunCount - liveRuns.length); - res.json([...liveRuns, ...recentRuns]); + const rows = [...liveRuns, ...recentRuns]; + res.json(await Promise.all(rows.map(async (run) => ({ + ...run, + outputSilence: await heartbeat.buildRunOutputSilence(run), + })))); return; } - res.json(liveRuns); + res.json(await Promise.all(liveRuns.map(async (run) => ({ + ...run, + outputSilence: await heartbeat.buildRunOutputSilence(run), + })))); }); router.get("/heartbeat-runs/:runId", async (req, res) => { @@ -2600,7 +2625,7 @@ export function agentRoutes( const retryExhaustedReason = await heartbeat.getRetryExhaustedReason(runId); res.json( redactCurrentUserValue( - { ...run, retryExhaustedReason }, + { ...run, retryExhaustedReason, outputSilence: await heartbeat.buildRunOutputSilence(run) }, await getCurrentUserRedactionOptions(), ), ); @@ -2630,6 +2655,42 @@ export function agentRoutes( res.json(run); }); + router.post("/heartbeat-runs/:runId/watchdog-decisions", async (req, res) => { + const runId = req.params.runId as string; + const existing = await heartbeat.getRun(runId); + if (!existing) { + res.status(404).json({ error: "Heartbeat run not found" }); + return; + } + assertCompanyAccess(req, existing.companyId); + const decision = typeof req.body?.decision === "string" ? req.body.decision : ""; + if (!["snooze", "continue", "dismissed_false_positive"].includes(decision)) { + res.status(400).json({ error: "Unsupported watchdog decision" }); + return; + } + const evaluationIssueId = typeof req.body?.evaluationIssueId === "string" ? req.body.evaluationIssueId : null; + const reason = typeof req.body?.reason === "string" ? req.body.reason.slice(0, 4000) : null; + const snoozedUntil = decision === "snooze" + ? new Date(String(req.body?.snoozedUntil ?? "")) + : null; + if (decision === "snooze" && (!snoozedUntil || Number.isNaN(snoozedUntil.getTime()) || snoozedUntil <= new Date())) { + res.status(400).json({ error: "snoozedUntil must be a future ISO datetime" }); + return; + } + + const row = await recovery.recordWatchdogDecision({ + runId: existing.id, + actor: req.actor, + decision: decision as "snooze" | "continue" | "dismissed_false_positive", + evaluationIssueId, + reason, + snoozedUntil, + createdByRunId: req.actor.runId ?? null, + }); + + res.json(row); + }); + router.get("/heartbeat-runs/:runId/events", async (req, res) => { const runId = req.params.runId as string; const run = await heartbeat.getRun(runId); @@ -2730,11 +2791,17 @@ export function agentRoutes( agentId: heartbeatRuns.agentId, agentName: agentsTable.name, adapterType: agentsTable.adapterType, + logBytes: heartbeatRuns.logBytes, livenessState: heartbeatRuns.livenessState, livenessReason: heartbeatRuns.livenessReason, continuationAttempt: heartbeatRuns.continuationAttempt, lastUsefulActionAt: heartbeatRuns.lastUsefulActionAt, nextAction: heartbeatRuns.nextAction, + lastOutputAt: heartbeatRuns.lastOutputAt, + lastOutputSeq: heartbeatRuns.lastOutputSeq, + lastOutputStream: heartbeatRuns.lastOutputStream, + lastOutputBytes: heartbeatRuns.lastOutputBytes, + processStartedAt: heartbeatRuns.processStartedAt, }) .from(heartbeatRuns) .innerJoin(agentsTable, eq(heartbeatRuns.agentId, agentsTable.id)) @@ -2747,7 +2814,10 @@ export function agentRoutes( ) .orderBy(desc(heartbeatRuns.createdAt)); - res.json(liveRuns); + res.json(await Promise.all(liveRuns.map(async (run) => ({ + ...run, + outputSilence: await heartbeat.buildRunOutputSilence({ ...run, companyId: issue.companyId }), + })))); }); router.get("/issues/:issueId/active-run", async (req, res) => { @@ -2795,6 +2865,7 @@ export function agentRoutes( agentId: agent.id, agentName: agent.name, adapterType: agent.adapterType, + outputSilence: await heartbeat.buildRunOutputSilence({ ...run, companyId: issue.companyId }), }); }); diff --git a/server/src/routes/issue-tree-control.ts b/server/src/routes/issue-tree-control.ts index 1a0ff062..917c4454 100644 --- a/server/src/routes/issue-tree-control.ts +++ b/server/src/routes/issue-tree-control.ts @@ -10,6 +10,26 @@ import { validate } from "../middleware/validate.js"; import { heartbeatService, issueService, issueTreeControlService, logActivity } from "../services/index.js"; import { assertBoard, assertCompanyAccess, getActorInfo } from "./authz.js"; +const TREE_RUN_CANCELLATION_RESPONSE_WAIT_MS = 1_000; + +function errorToMessage(error: unknown) { + return error instanceof Error ? error.message : String(error); +} + +async function waitForRunCancellationTasks(tasks: Promise[]) { + let timeout: ReturnType | null = null; + try { + await Promise.race([ + Promise.all(tasks), + new Promise((resolve) => { + timeout = setTimeout(resolve, TREE_RUN_CANCELLATION_RESPONSE_WAIT_MS); + }), + ]); + } finally { + if (timeout) clearTimeout(timeout); + } +} + export function issueTreeControlRoutes(db: Db) { const router = Router(); const issuesSvc = issueService(db); @@ -91,25 +111,48 @@ export function issueTreeControlRoutes(db: Db) { }, }); + const runCancellationTasks: Promise[] = []; if (result.hold.mode === "pause" || result.hold.mode === "cancel") { const interruptedRunIds = [...new Set(result.preview.activeRuns.map((run) => run.id))]; - for (const runId of interruptedRunIds) { - await heartbeat.cancelRun(runId); - await logActivity(db, { - companyId: root.companyId, - actorType: actor.actorType, - actorId: actor.actorId, - agentId: actor.agentId, - runId: actor.runId, - action: "issue.tree_hold_run_interrupted", - entityType: "heartbeat_run", - entityId: runId, - details: { - holdId: result.hold.id, - rootIssueId: root.id, - reason: result.hold.mode === "pause" ? "active_subtree_pause_hold" : "subtree_cancel_operation", - }, - }); + for (const heartbeatRunId of interruptedRunIds) { + const cancellationTask = (async () => { + try { + await heartbeat.cancelRun(heartbeatRunId); + await logActivity(db, { + companyId: root.companyId, + actorType: actor.actorType, + actorId: actor.actorId, + agentId: actor.agentId, + runId: actor.runId, + action: "issue.tree_hold_run_interrupted", + entityType: "heartbeat_run", + entityId: heartbeatRunId, + details: { + holdId: result.hold.id, + rootIssueId: root.id, + reason: result.hold.mode === "pause" ? "active_subtree_pause_hold" : "subtree_cancel_operation", + }, + }); + } catch (error) { + await Promise.resolve(logActivity(db, { + companyId: root.companyId, + actorType: actor.actorType, + actorId: actor.actorId, + agentId: actor.agentId, + runId: actor.runId, + action: "issue.tree_hold_run_interrupt_failed", + entityType: "heartbeat_run", + entityId: heartbeatRunId, + details: { + holdId: result.hold.id, + rootIssueId: root.id, + reason: result.hold.mode === "pause" ? "active_subtree_pause_hold" : "subtree_cancel_operation", + error: errorToMessage(error), + }, + })).catch(() => null); + } + })(); + runCancellationTasks.push(cancellationTask); } const cancelledWakeups = await treeControlSvc.cancelUnclaimedWakeupsForTree( @@ -158,6 +201,10 @@ export function issueTreeControlRoutes(db: Db) { }); } + if (runCancellationTasks.length > 0) { + await waitForRunCancellationTasks(runCancellationTasks); + } + if (result.hold.mode === "restore") { let statusUpdate; try { diff --git a/server/src/routes/issues.ts b/server/src/routes/issues.ts index 303c8c2f..15432c1a 100644 --- a/server/src/routes/issues.ts +++ b/server/src/routes/issues.ts @@ -35,6 +35,7 @@ import { trackAgentTaskCompleted } from "@paperclipai/shared/telemetry"; import { getTelemetryClient } from "../telemetry.js"; import type { StorageService } from "../storage/types.js"; import { validate } from "../middleware/validate.js"; +import * as serviceIndex from "../services/index.js"; import { accessService, agentService, @@ -184,18 +185,24 @@ function isClosedIssueStatus(status: string | null | undefined): status is "done return status === "done" || status === "cancelled"; } -function shouldImplicitlyMoveCommentedIssueToTodoForAgent(input: { +function shouldImplicitlyMoveCommentedIssueToTodo(input: { issueStatus: string | null | undefined; assigneeAgentId: string | null | undefined; actorType: "agent" | "user"; actorId: string; }) { + // Only human comments should implicitly reopen finished work. + // Agent-authored comments remain communicative unless reopen was explicit. + if (input.actorType !== "user") return false; if (!isClosedIssueStatus(input.issueStatus) && input.issueStatus !== "blocked") return false; if (typeof input.assigneeAgentId !== "string" || input.assigneeAgentId.length === 0) return false; - if (input.actorType === "agent" && input.actorId === input.assigneeAgentId) return false; return true; } +function isExplicitResumeCapableStatus(status: string | null | undefined) { + return status === "done" || status === "blocked" || status === "todo" || status === "in_progress"; +} + function queueResolvedInteractionContinuationWakeup(input: { heartbeat: ReturnType; issue: { id: string; assigneeAgentId: string | null; status: string }; @@ -409,6 +416,15 @@ export function issueRoutes( const routinesSvc = routineService(db, { pluginWorkerManager: opts.pluginWorkerManager, }); + const issueTreeControlFactory = Object.prototype.hasOwnProperty.call( + serviceIndex, + "issueTreeControlService", + ) + ? serviceIndex.issueTreeControlService + : undefined; + const treeControlSvc = issueTreeControlFactory?.(db) ?? { + getActivePauseHoldGate: async () => null, + }; const feedbackExportService = opts?.feedbackExportService; const environmentsSvc = environmentService(db); const upload = multer({ @@ -627,6 +643,90 @@ export function issueRoutes( return true; } + async function assertExplicitResumeIntentAllowed( + req: Request, + res: Response, + issue: { id: string; companyId: string; status: string; assigneeAgentId: string | null }, + ) { + if (issue.status === "cancelled") { + res.status(409).json({ + error: "Cancelled issues must be restored through the dedicated restore flow", + details: { + issueId: issue.id, + status: issue.status, + securityPrinciples: ["Complete Mediation", "Fail Securely"], + }, + }); + return false; + } + + if (!isExplicitResumeCapableStatus(issue.status)) { + res.status(409).json({ + error: "Issue is not resumable through comment follow-up intent", + details: { issueId: issue.id, status: issue.status }, + }); + return false; + } + + const activePauseHold = await treeControlSvc.getActivePauseHoldGate(issue.companyId, issue.id); + if (activePauseHold) { + res.status(409).json({ + error: "Issue follow-up blocked by active subtree pause hold", + details: { + issueId: issue.id, + holdId: activePauseHold.holdId, + rootIssueId: activePauseHold.rootIssueId, + mode: activePauseHold.mode, + securityPrinciples: ["Complete Mediation", "Fail Securely", "Secure Defaults"], + }, + }); + return false; + } + + if (issue.status === "blocked") { + const readiness = await svc.getDependencyReadiness(issue.id); + if (readiness.unresolvedBlockerCount > 0) { + res.status(409).json({ + error: "Issue follow-up blocked by unresolved blockers", + details: { + issueId: issue.id, + unresolvedBlockerIssueIds: readiness.unresolvedBlockerIssueIds, + }, + }); + return false; + } + } + + if (req.actor.type !== "agent") return true; + + const actorAgentId = req.actor.agentId; + if (!actorAgentId) { + res.status(403).json({ error: "Agent authentication required" }); + return false; + } + if (!issue.assigneeAgentId) { + res.status(409).json({ + error: "Issue follow-up requires an assigned agent", + details: { issueId: issue.id, actorAgentId }, + }); + return false; + } + if (issue.assigneeAgentId === actorAgentId) return true; + if (await hasActiveCheckoutManagementOverride(actorAgentId, issue.companyId, issue.assigneeAgentId)) { + return true; + } + + res.status(403).json({ + error: "Agent cannot request follow-up for another agent's issue", + details: { + issueId: issue.id, + assigneeAgentId: issue.assigneeAgentId, + actorAgentId, + }, + }); + return false; + } + async function resolveActiveIssueRun(issue: { id: string; assigneeAgentId: string | null; @@ -932,6 +1032,7 @@ export function issueRoutes( commentCursor, wakeComment, relations, + blockerAttention, attachments, continuationSummary, currentExecutionWorkspace, @@ -942,6 +1043,7 @@ export function issueRoutes( svc.getCommentCursor(issue.id), wakeCommentId ? svc.getComment(wakeCommentId) : null, svc.getRelationSummaries(issue.id), + svc.listBlockerAttention(issue.companyId, [issue]).then((map) => map.get(issue.id) ?? null), svc.listAttachments(issue.id), documentsSvc.getIssueDocumentByKey(issue.id, ISSUE_CONTINUATION_SUMMARY_DOCUMENT_KEY), currentExecutionWorkspacePromise, @@ -954,6 +1056,7 @@ export function issueRoutes( title: issue.title, description: issue.description, status: issue.status, + ...(blockerAttention ? { blockerAttention } : {}), priority: issue.priority, projectId: issue.projectId, goalId: goal?.id ?? issue.goalId, @@ -1023,12 +1126,13 @@ export function issueRoutes( return; } assertCompanyAccess(req, issue.companyId); - const [{ project, goal }, ancestors, mentionedProjectIds, documentPayload, relations, referenceSummary] = await Promise.all([ + const [{ project, goal }, ancestors, mentionedProjectIds, documentPayload, relations, blockerAttention, referenceSummary] = await Promise.all([ resolveIssueProjectAndGoal(issue), svc.getAncestors(issue.id), svc.findMentionedProjectIds(issue.id, { includeCommentBodies: false }), documentsSvc.getIssueDocumentPayload(issue), svc.getRelationSummaries(issue.id), + svc.listBlockerAttention(issue.companyId, [issue]).then((map) => map.get(issue.id) ?? null), issueReferencesSvc.listIssueReferenceSummary(issue.id), ]); const mentionedProjects = mentionedProjectIds.length > 0 @@ -1042,6 +1146,7 @@ export function issueRoutes( ...issue, goalId: goal?.id ?? issue.goalId, ancestors, + ...(blockerAttention ? { blockerAttention } : {}), blockedBy: relations.blockedBy, blocks: relations.blocks, relatedWork: referenceSummary, @@ -1800,17 +1905,27 @@ export function issueRoutes( comment: commentBody, reviewRequest, reopen: reopenRequested, + resume: resumeRequested, interrupt: interruptRequested, hiddenAt: hiddenAtRaw, ...updateFields } = req.body; + if (resumeRequested === true && !commentBody) { + res.status(400).json({ error: "Follow-up intent requires a comment" }); + return; + } + if (resumeRequested === true && !(await assertExplicitResumeIntentAllowed(req, res, existing))) return; + if (resumeRequested !== true && reopenRequested === true && req.actor.type === "agent") { + if (!(await assertExplicitResumeIntentAllowed(req, res, existing))) return; + } await assertIssueEnvironmentSelection(existing.companyId, updateFields.executionWorkspaceSettings?.environmentId); const requestedAssigneeAgentId = normalizedAssigneeAgentId === undefined ? existing.assigneeAgentId : normalizedAssigneeAgentId; + const explicitMoveToTodoRequested = reopenRequested || resumeRequested === true; const effectiveMoveToTodoRequested = - reopenRequested || + explicitMoveToTodoRequested || (!!commentBody && - shouldImplicitlyMoveCommentedIssueToTodoForAgent({ + shouldImplicitlyMoveCommentedIssueToTodo({ issueStatus: existing.status, assigneeAgentId: requestedAssigneeAgentId, actorType: actor.actorType, @@ -1823,6 +1938,10 @@ export function issueRoutes( isBlocked && effectiveMoveToTodoRequested ? (await svc.getDependencyReadiness(existing.id)).unresolvedBlockerCount > 0 : false; + if (resumeRequested === true && isBlocked && hasUnresolvedFirstClassBlockers) { + res.status(409).json({ error: "Issue follow-up blocked by unresolved blockers" }); + return; + } let interruptedRunId: string | null = null; const closedExecutionWorkspace = await getClosedIssueExecutionWorkspace(existing); const isAgentWorkUpdate = @@ -2078,6 +2197,7 @@ export function issueRoutes( ...updateFields, identifier: issue.identifier, ...(commentBody ? { source: "comment" } : {}), + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), ...(reopened ? { reopened: true, reopenedFrom: reopenFromStatus } : {}), ...(interruptedRunId ? { interruptedRunId } : {}), _previous: hasFieldChanges ? previous : undefined, @@ -2220,6 +2340,7 @@ export function issueRoutes( bodySnippet: comment.body.slice(0, 120), identifier: issue.identifier, issueTitle: issue.title, + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), ...(reopened ? { reopened: true, reopenedFrom: reopenFromStatus, source: "comment" } : {}), ...(interruptedRunId ? { interruptedRunId } : {}), ...(hasFieldChanges ? { updated: true } : {}), @@ -2266,6 +2387,10 @@ export function issueRoutes( existing.status === "blocked" && issue.status === "todo" && (req.body.status !== undefined || reopened); + const statusChangedFromClosedToTodo = + isClosedIssueStatus(existing.status) && + issue.status === "todo" && + req.body.status !== undefined; const previousExecutionState = parseIssueExecutionState(existing.executionState); const nextExecutionState = parseIssueExecutionState(issue.executionState); const executionStageWakeup = buildExecutionStageWakeup({ @@ -2300,6 +2425,7 @@ export function issueRoutes( issueId: issue.id, ...(comment ? { commentId: comment.id } : {}), mutation: "update", + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), ...(interruptedRunId ? { interruptedRunId } : {}), }, requestedByActorType: actor.actorType, @@ -2314,12 +2440,17 @@ export function issueRoutes( } : {}), source: "issue.update", + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), ...(interruptedRunId ? { interruptedRunId } : {}), }, }); } - if (!assigneeChanged && (statusChangedFromBacklog || statusChangedFromBlockedToTodo) && issue.assigneeAgentId) { + if ( + !assigneeChanged && + (statusChangedFromBacklog || statusChangedFromBlockedToTodo || statusChangedFromClosedToTodo) && + issue.assigneeAgentId + ) { addWakeup(issue.assigneeAgentId, { source: "automation", triggerDetail: "system", @@ -2327,6 +2458,7 @@ export function issueRoutes( payload: { issueId: issue.id, mutation: "update", + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), ...(interruptedRunId ? { interruptedRunId } : {}), }, requestedByActorType: actor.actorType, @@ -2334,6 +2466,7 @@ export function issueRoutes( contextSnapshot: { issueId: issue.id, source: "issue.status_change", + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), ...(interruptedRunId ? { interruptedRunId } : {}), }, }); @@ -2355,6 +2488,7 @@ export function issueRoutes( commentId: comment.id, mutation: "comment", ...(reopened ? { reopenedFrom: reopenFromStatus } : {}), + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), ...(interruptedRunId ? { interruptedRunId } : {}), }, requestedByActorType: actor.actorType, @@ -2367,6 +2501,7 @@ export function issueRoutes( source: reopened ? "issue.comment.reopen" : "issue.comment", wakeReason: reopened ? "issue_reopened_via_comment" : "issue_commented", ...(reopened ? { reopenedFrom: reopenFromStatus } : {}), + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), ...(interruptedRunId ? { interruptedRunId } : {}), }, }); @@ -3143,12 +3278,18 @@ export function issueRoutes( const actor = getActorInfo(req); const reopenRequested = req.body.reopen === true; + const resumeRequested = req.body.resume === true; const interruptRequested = req.body.interrupt === true; + if (resumeRequested === true && !(await assertExplicitResumeIntentAllowed(req, res, issue))) return; + if (resumeRequested !== true && reopenRequested === true && req.actor.type === "agent") { + if (!(await assertExplicitResumeIntentAllowed(req, res, issue))) return; + } const isClosed = isClosedIssueStatus(issue.status); const isBlocked = issue.status === "blocked"; + const explicitMoveToTodoRequested = reopenRequested || resumeRequested === true; const effectiveMoveToTodoRequested = - reopenRequested || - shouldImplicitlyMoveCommentedIssueToTodoForAgent({ + explicitMoveToTodoRequested || + shouldImplicitlyMoveCommentedIssueToTodo({ issueStatus: issue.status, assigneeAgentId: issue.assigneeAgentId, actorType: actor.actorType, @@ -3158,6 +3299,10 @@ export function issueRoutes( isBlocked && effectiveMoveToTodoRequested ? (await svc.getDependencyReadiness(issue.id)).unresolvedBlockerCount > 0 : false; + if (resumeRequested === true && isBlocked && hasUnresolvedFirstClassBlockers) { + res.status(409).json({ error: "Issue follow-up blocked by unresolved blockers" }); + return; + } let reopened = false; let reopenFromStatus: string | null = null; let interruptedRunId: string | null = null; @@ -3188,6 +3333,7 @@ export function issueRoutes( reopened: true, reopenedFrom: reopenFromStatus, source: "comment", + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), identifier: currentIssue.identifier, }, }); @@ -3250,6 +3396,7 @@ export function issueRoutes( bodySnippet: comment.body.slice(0, 120), identifier: currentIssue.identifier, issueTitle: currentIssue.title, + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), ...(reopened ? { reopened: true, reopenedFrom: reopenFromStatus, source: "comment" } : {}), ...(interruptedRunId ? { interruptedRunId } : {}), ...summarizeIssueReferenceActivityDetails({ @@ -3293,6 +3440,7 @@ export function issueRoutes( commentId: comment.id, reopenedFrom: reopenFromStatus, mutation: "comment", + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), ...(interruptedRunId ? { interruptedRunId } : {}), }, requestedByActorType: actor.actorType, @@ -3305,6 +3453,7 @@ export function issueRoutes( source: "issue.comment.reopen", wakeReason: "issue_reopened_via_comment", reopenedFrom: reopenFromStatus, + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), ...(interruptedRunId ? { interruptedRunId } : {}), }, }); @@ -3317,6 +3466,7 @@ export function issueRoutes( issueId: currentIssue.id, commentId: comment.id, mutation: "comment", + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), ...(interruptedRunId ? { interruptedRunId } : {}), }, requestedByActorType: actor.actorType, @@ -3328,6 +3478,7 @@ export function issueRoutes( wakeCommentId: comment.id, source: "issue.comment", wakeReason: "issue_commented", + ...(resumeRequested === true ? { resumeIntent: true, followUpRequested: true } : {}), ...(interruptedRunId ? { interruptedRunId } : {}), }, }); diff --git a/server/src/services/adapter-plugin-store.ts b/server/src/services/adapter-plugin-store.ts index 8c26abe8..13369323 100644 --- a/server/src/services/adapter-plugin-store.ts +++ b/server/src/services/adapter-plugin-store.ts @@ -14,7 +14,7 @@ import fs from "node:fs"; import path from "node:path"; -import os from "node:os"; +import { resolvePaperclipHomeDir } from "../home-paths.js"; // --------------------------------------------------------------------------- // Types @@ -43,25 +43,30 @@ interface AdapterSettings { // Paths // --------------------------------------------------------------------------- -const PAPERCLIP_DIR = path.join(os.homedir(), ".paperclip"); -const ADAPTER_PLUGINS_DIR = path.join(PAPERCLIP_DIR, "adapter-plugins"); -const ADAPTER_PLUGINS_STORE_PATH = path.join(PAPERCLIP_DIR, "adapter-plugins.json"); -const ADAPTER_SETTINGS_PATH = path.join(PAPERCLIP_DIR, "adapter-settings.json"); +function adapterPluginPaths() { + const paperclipDir = resolvePaperclipHomeDir(); + return { + adapterPluginsDir: path.join(paperclipDir, "adapter-plugins"), + adapterPluginsStorePath: path.join(paperclipDir, "adapter-plugins.json"), + adapterSettingsPath: path.join(paperclipDir, "adapter-settings.json"), + }; +} // --------------------------------------------------------------------------- // In-memory caches (invalidated on write) // --------------------------------------------------------------------------- -let storeCache: AdapterPluginRecord[] | null = null; -let settingsCache: AdapterSettings | null = null; +let storeCache: { path: string; records: AdapterPluginRecord[] } | null = null; +let settingsCache: { path: string; settings: AdapterSettings } | null = null; // --------------------------------------------------------------------------- // Store functions // --------------------------------------------------------------------------- -function ensureDirs(): void { - fs.mkdirSync(ADAPTER_PLUGINS_DIR, { recursive: true }); - const pkgJsonPath = path.join(ADAPTER_PLUGINS_DIR, "package.json"); +function ensureDirs(): string { + const { adapterPluginsDir } = adapterPluginPaths(); + fs.mkdirSync(adapterPluginsDir, { recursive: true }); + const pkgJsonPath = path.join(adapterPluginsDir, "package.json"); if (!fs.existsSync(pkgJsonPath)) { fs.writeFileSync(pkgJsonPath, JSON.stringify({ name: "paperclip-adapter-plugins", @@ -70,44 +75,55 @@ function ensureDirs(): void { description: "Managed directory for Paperclip external adapter plugins. Do not edit manually.", }, null, 2) + "\n"); } + return adapterPluginsDir; } function readStore(): AdapterPluginRecord[] { - if (storeCache) return storeCache; + const { adapterPluginsStorePath } = adapterPluginPaths(); + if (storeCache?.path === adapterPluginsStorePath) return storeCache.records; try { - const raw = fs.readFileSync(ADAPTER_PLUGINS_STORE_PATH, "utf-8"); + const raw = fs.readFileSync(adapterPluginsStorePath, "utf-8"); const parsed = JSON.parse(raw); - storeCache = Array.isArray(parsed) ? (parsed as AdapterPluginRecord[]) : []; + storeCache = { + path: adapterPluginsStorePath, + records: Array.isArray(parsed) ? (parsed as AdapterPluginRecord[]) : [], + }; } catch { - storeCache = []; + storeCache = { path: adapterPluginsStorePath, records: [] }; } - return storeCache; + return storeCache.records; } function writeStore(records: AdapterPluginRecord[]): void { ensureDirs(); - fs.writeFileSync(ADAPTER_PLUGINS_STORE_PATH, JSON.stringify(records, null, 2), "utf-8"); - storeCache = records; + const { adapterPluginsStorePath } = adapterPluginPaths(); + fs.writeFileSync(adapterPluginsStorePath, JSON.stringify(records, null, 2), "utf-8"); + storeCache = { path: adapterPluginsStorePath, records }; } function readSettings(): AdapterSettings { - if (settingsCache) return settingsCache; + const { adapterSettingsPath } = adapterPluginPaths(); + if (settingsCache?.path === adapterSettingsPath) return settingsCache.settings; try { - const raw = fs.readFileSync(ADAPTER_SETTINGS_PATH, "utf-8"); + const raw = fs.readFileSync(adapterSettingsPath, "utf-8"); const parsed = JSON.parse(raw); - settingsCache = parsed && Array.isArray(parsed.disabledTypes) - ? (parsed as AdapterSettings) - : { disabledTypes: [] }; + settingsCache = { + path: adapterSettingsPath, + settings: parsed && Array.isArray(parsed.disabledTypes) + ? (parsed as AdapterSettings) + : { disabledTypes: [] }, + }; } catch { - settingsCache = { disabledTypes: [] }; + settingsCache = { path: adapterSettingsPath, settings: { disabledTypes: [] } }; } - return settingsCache; + return settingsCache.settings; } function writeSettings(settings: AdapterSettings): void { ensureDirs(); - fs.writeFileSync(ADAPTER_SETTINGS_PATH, JSON.stringify(settings, null, 2), "utf-8"); - settingsCache = settings; + const { adapterSettingsPath } = adapterPluginPaths(); + fs.writeFileSync(adapterSettingsPath, JSON.stringify(settings, null, 2), "utf-8"); + settingsCache = { path: adapterSettingsPath, settings }; } // --------------------------------------------------------------------------- @@ -143,8 +159,7 @@ export function getAdapterPluginByType(type: string): AdapterPluginRecord | unde } export function getAdapterPluginsDir(): string { - ensureDirs(); - return ADAPTER_PLUGINS_DIR; + return ensureDirs(); } // --------------------------------------------------------------------------- diff --git a/server/src/services/agent-start-lock.ts b/server/src/services/agent-start-lock.ts new file mode 100644 index 00000000..38cbaf74 --- /dev/null +++ b/server/src/services/agent-start-lock.ts @@ -0,0 +1,48 @@ +import { logger } from "../middleware/logger.js"; + +const AGENT_START_LOCK_STALE_MS = 30_000; +const startLocksByAgent = new Map; startedAtMs: number }>(); + +async function waitForAgentStartLock(agentId: string, lock: { promise: Promise; startedAtMs: number }) { + const elapsedMs = Date.now() - lock.startedAtMs; + const remainingMs = AGENT_START_LOCK_STALE_MS - elapsedMs; + if (remainingMs <= 0) { + logger.warn({ agentId, staleMs: elapsedMs }, "agent start lock stale; continuing queued-run start"); + return; + } + + let timedOut = false; + let timeout: ReturnType | null = null; + await Promise.race([ + lock.promise, + new Promise((resolve) => { + timeout = setTimeout(() => { + timedOut = true; + resolve(); + }, remainingMs); + }), + ]); + if (timeout) clearTimeout(timeout); + + if (timedOut) { + logger.warn({ agentId, staleMs: AGENT_START_LOCK_STALE_MS }, "agent start lock timed out; continuing queued-run start"); + } +} + +export async function withAgentStartLock(agentId: string, fn: () => Promise) { + const previous = startLocksByAgent.get(agentId); + const waitForPrevious = previous ? waitForAgentStartLock(agentId, previous) : Promise.resolve(); + const run = waitForPrevious.then(fn); + const marker = run.then( + () => undefined, + () => undefined, + ); + startLocksByAgent.set(agentId, { promise: marker, startedAtMs: Date.now() }); + try { + return await run; + } finally { + if (startLocksByAgent.get(agentId)?.promise === marker) { + startLocksByAgent.delete(agentId); + } + } +} diff --git a/server/src/services/heartbeat.ts b/server/src/services/heartbeat.ts index fa734442..57a4f62f 100644 --- a/server/src/services/heartbeat.ts +++ b/server/src/services/heartbeat.ts @@ -64,10 +64,6 @@ import { classifyRunLiveness, type RunLivenessClassificationInput, } from "./run-liveness.js"; -import { - classifyIssueGraphLiveness, - type IssueLivenessFinding, -} from "./issue-liveness.js"; import { logActivity, publishPluginDomainEvent, type LogActivityInput } from "./activity-log.js"; import { buildWorkspaceReadyComment, @@ -83,6 +79,7 @@ import { import { issueService } from "./issues.js"; import { ISSUE_TREE_CONTROL_INTERACTION_WAKE_REASONS, + isVerifiedIssueTreeControlInteractionWake, issueTreeControlService, } from "./issue-tree-control.js"; import { @@ -108,7 +105,10 @@ import { decideRunLivenessContinuation, findExistingRunLivenessContinuationWake, readContinuationAttempt, -} from "./run-continuations.js"; +} from "./recovery/index.js"; +import { isAutomaticRecoverySuppressedByPauseHold } from "./recovery/pause-hold-guard.js"; +import { recoveryService } from "./recovery/service.js"; +import { withAgentStartLock } from "./agent-start-lock.js"; import { redactCurrentUserText, redactCurrentUserValue } from "../log-redaction.js"; import { hasSessionCompactionThresholds, @@ -142,7 +142,6 @@ const WAKE_COMMENT_IDS_KEY = "wakeCommentIds"; const PAPERCLIP_WAKE_PAYLOAD_KEY = "paperclipWake"; const PAPERCLIP_HARNESS_CHECKOUT_KEY = "paperclipHarnessCheckedOut"; const DETACHED_PROCESS_ERROR_CODE = "process_detached"; -const startLocksByAgent = new Map>(); const REPO_ONLY_CWD_SENTINEL = "/__paperclip_repo_only__"; const MANAGED_WORKSPACE_GIT_CLONE_TIMEOUT_MS = 10 * 60 * 1000; const MAX_INLINE_WAKE_COMMENTS = 8; @@ -153,6 +152,12 @@ const EXECUTION_PATH_HEARTBEAT_RUN_STATUSES = ["queued", "running", "scheduled_r const CANCELLABLE_HEARTBEAT_RUN_STATUSES = ["queued", "running", "scheduled_retry"] as const; const HEARTBEAT_RUN_TERMINAL_STATUSES = ["succeeded", "failed", "cancelled", "timed_out"] as const; const UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES = ["failed", "cancelled", "timed_out"] as const; +export { + ACTIVE_RUN_OUTPUT_CONTINUE_REARM_MS, + ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS, + ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS, +} from "./recovery/service.js"; +export const ACTIVE_RUN_OUTPUT_PROGRESS_FLUSH_INTERVAL_MS = 60 * 1000; export const BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS = [ 2 * 60 * 1000, 10 * 60 * 1000, @@ -614,6 +619,10 @@ const heartbeatRunListColumns = { processPid: heartbeatRuns.processPid, processGroupId: heartbeatRunProcessGroupIdColumn, processStartedAt: heartbeatRuns.processStartedAt, + lastOutputAt: heartbeatRuns.lastOutputAt, + lastOutputSeq: heartbeatRuns.lastOutputSeq, + lastOutputStream: heartbeatRuns.lastOutputStream, + lastOutputBytes: heartbeatRuns.lastOutputBytes, retryOfRunId: heartbeatRuns.retryOfRunId, processLossRetryCount: heartbeatRuns.processLossRetryCount, scheduledRetryAt: heartbeatRuns.scheduledRetryAt, @@ -726,11 +735,17 @@ const heartbeatRunIssueSummaryColumns = { finishedAt: heartbeatRuns.finishedAt, createdAt: heartbeatRuns.createdAt, agentId: heartbeatRuns.agentId, + logBytes: heartbeatRuns.logBytes, + processStartedAt: heartbeatRuns.processStartedAt, livenessState: heartbeatRuns.livenessState, livenessReason: heartbeatRuns.livenessReason, continuationAttempt: heartbeatRuns.continuationAttempt, lastUsefulActionAt: heartbeatRuns.lastUsefulActionAt, nextAction: heartbeatRuns.nextAction, + lastOutputAt: heartbeatRuns.lastOutputAt, + lastOutputSeq: heartbeatRuns.lastOutputSeq, + lastOutputStream: heartbeatRuns.lastOutputStream, + lastOutputBytes: heartbeatRuns.lastOutputBytes, issueId: sql`${heartbeatRuns.contextSnapshot} ->> 'issueId'`.as("issueId"), } as const; @@ -835,23 +850,6 @@ function normalizeMaxConcurrentRuns(value: unknown) { return Math.max(HEARTBEAT_MAX_CONCURRENT_RUNS_DEFAULT, Math.min(HEARTBEAT_MAX_CONCURRENT_RUNS_MAX, parsed)); } -async function withAgentStartLock(agentId: string, fn: () => Promise) { - const previous = startLocksByAgent.get(agentId) ?? Promise.resolve(); - const run = previous.then(fn); - const marker = run.then( - () => undefined, - () => undefined, - ); - startLocksByAgent.set(agentId, marker); - try { - return await run; - } finally { - if (startLocksByAgent.get(agentId) === marker) { - startLocksByAgent.delete(agentId); - } - } -} - interface WakeupOptions { source?: "timer" | "assignment" | "on_demand" | "automation"; triggerDetail?: "manual" | "ping" | "callback" | "system"; @@ -2002,6 +2000,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) cancelWorkForScope: cancelBudgetScopeWork, }; const budgets = budgetService(db, budgetHooks); + const recovery = recoveryService(db, { enqueueWakeup }); let unsafeTextProjectionPromise: Promise | null = null; async function hasUnsafeTextProjectionDatabase() { @@ -3671,7 +3670,14 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) const issueId = readNonEmptyString(context.issueId); if (issueId) { const activePauseHold = await treeControlSvc.getActivePauseHoldGate(run.companyId, issueId); - const treeHoldInteractionWake = activePauseHold && allowsIssueInteractionWake(context); + const treeHoldInteractionWake = activePauseHold && await isVerifiedIssueTreeControlInteractionWake(db, { + companyId: run.companyId, + issueId, + agentId: run.agentId, + runId: run.id, + wakeupRequestId: run.wakeupRequestId, + contextSnapshot: context, + }); if (activePauseHold && !treeHoldInteractionWake) { await cancelRunInternal(run.id, "Cancelled because issue is held by an active subtree pause hold"); await logActivity(db, { @@ -3695,9 +3701,11 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) } const dependencyReadiness = await issuesSvc.listDependencyReadiness(run.companyId, [issueId]); - const unresolvedBlockerCount = dependencyReadiness.get(issueId)?.unresolvedBlockerCount ?? 0; + const readiness = dependencyReadiness.get(issueId); + const unresolvedBlockerCount = readiness?.unresolvedBlockerCount ?? 0; if (unresolvedBlockerCount > 0 && !allowsIssueInteractionWake(context)) { - logger.debug({ runId: run.id, issueId, unresolvedBlockerCount }, "claimQueuedRun: skipping blocked run"); + await cancelQueuedRunForBlockedDependencies(run, issueId, readiness?.unresolvedBlockerIssueIds ?? []); + logger.info({ runId: run.id, issueId, unresolvedBlockerCount }, "claimQueuedRun: cancelled blocked queued run"); return null; } } @@ -3751,6 +3759,9 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) and( eq(issues.id, claimedIssueId), eq(issues.companyId, claimed.companyId), + // Mention/context runs can touch an issue, but only the current assignee + // owns the issue execution lock shown as the active run. + eq(issues.assigneeAgentId, claimed.agentId), or(isNull(issues.executionRunId), eq(issues.executionRunId, claimed.id)), ), ); @@ -3759,6 +3770,64 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) return claimed; } + async function cancelQueuedRunForBlockedDependencies( + run: typeof heartbeatRuns.$inferSelect, + issueId: string, + unresolvedBlockerIssueIds: string[], + ) { + const now = new Date(); + const reason = + "Cancelled because issue dependencies are still blocked; Paperclip will wake the assignee when blockers resolve"; + const cancelled = await setRunStatus(run.id, "cancelled", { + finishedAt: now, + error: reason, + errorCode: "issue_dependencies_blocked", + resultJson: { + ...parseObject(run.resultJson), + stopReason: "issue_dependencies_blocked", + effectiveTimeoutSec: 0, + timeoutConfigured: false, + timeoutSource: "dependency_gate", + timeoutFired: false, + }, + }); + if (!cancelled) return null; + + await setWakeupStatus(run.wakeupRequestId, "skipped", { + finishedAt: now, + error: reason, + }); + + await db + .update(issues) + .set({ + executionRunId: null, + executionAgentNameKey: null, + executionLockedAt: null, + updatedAt: now, + }) + .where( + and( + eq(issues.companyId, run.companyId), + eq(issues.id, issueId), + eq(issues.executionRunId, run.id), + ), + ); + + await appendRunEvent(cancelled, await nextRunEventSeq(cancelled.id), { + eventType: "lifecycle", + stream: "system", + level: "warn", + message: reason, + payload: { + issueId, + unresolvedBlockerIssueIds, + }, + }); + + return cancelled; + } + async function finalizeAgentStatus( agentId: string, outcome: "succeeded" | "failed" | "cancelled" | "timed_out", @@ -3891,6 +3960,26 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) ) : [{ count: 0, latestAt: null }]; + const issueCommentBodies = contextIssueId + ? await db + .select({ body: issueComments.body }) + .from(issueComments) + .where( + and( + eq(issueComments.companyId, run.companyId), + eq(issueComments.issueId, contextIssueId), + eq(issueComments.createdByRunId, run.id), + ), + ) + .orderBy(desc(issueComments.createdAt), desc(issueComments.id)) + .limit(5) + .then((rows) => rows.reverse().map((row) => row.body)) + : []; + + const continuationSummary = contextIssueId + ? await getIssueContinuationSummaryDocument(db, contextIssueId) + : null; + const [documentStats] = contextIssueId ? await db .select({ @@ -3961,6 +4050,8 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) runStatus: run.status, issue, resultJson: resultJson ?? run.resultJson ?? null, + issueCommentBodies, + continuationSummaryBody: continuationSummary?.body ?? null, stdoutExcerpt: run.stdoutExcerpt ?? null, stderrExcerpt: run.stderrExcerpt ?? null, error: run.error ?? null, @@ -4140,414 +4231,8 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) } } - async function getLatestIssueRun(companyId: string, issueId: string) { - return db - .select({ - id: heartbeatRuns.id, - status: heartbeatRuns.status, - error: heartbeatRuns.error, - errorCode: heartbeatRuns.errorCode, - contextSnapshot: heartbeatRuns.contextSnapshot, - }) - .from(heartbeatRuns) - .where( - and( - eq(heartbeatRuns.companyId, companyId), - sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${issueId}`, - ), - ) - .orderBy(desc(heartbeatRuns.createdAt), desc(heartbeatRuns.id)) - .limit(1) - .then((rows) => rows[0] ?? null); - } - - async function hasActiveExecutionPath(companyId: string, issueId: string) { - const [run, deferredWake] = await Promise.all([ - db - .select({ id: heartbeatRuns.id }) - .from(heartbeatRuns) - .where( - and( - eq(heartbeatRuns.companyId, companyId), - inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES]), - sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${issueId}`, - ), - ) - .limit(1) - .then((rows) => rows[0] ?? null), - db - .select({ id: agentWakeupRequests.id }) - .from(agentWakeupRequests) - .where( - and( - eq(agentWakeupRequests.companyId, companyId), - eq(agentWakeupRequests.status, "deferred_issue_execution"), - sql`${agentWakeupRequests.payload} ->> 'issueId' = ${issueId}`, - ), - ) - .limit(1) - .then((rows) => rows[0] ?? null), - ]); - - return Boolean(run || deferredWake); - } - - async function enqueueStrandedIssueRecovery(input: { - issueId: string; - agentId: string; - reason: "issue_assignment_recovery" | "issue_continuation_needed"; - retryReason: "assignment_recovery" | "issue_continuation_needed"; - source: string; - retryOfRunId?: string | null; - }) { - const queued = await enqueueWakeup(input.agentId, { - source: "automation", - triggerDetail: "system", - reason: input.reason, - payload: { - issueId: input.issueId, - ...(input.retryOfRunId ? { retryOfRunId: input.retryOfRunId } : {}), - }, - requestedByActorType: "system", - requestedByActorId: null, - contextSnapshot: { - issueId: input.issueId, - taskId: input.issueId, - wakeReason: input.reason, - retryReason: input.retryReason, - source: input.source, - ...(input.retryOfRunId ? { retryOfRunId: input.retryOfRunId } : {}), - }, - }); - - if (queued && input.retryOfRunId) { - return db - .update(heartbeatRuns) - .set({ - retryOfRunId: input.retryOfRunId, - updatedAt: new Date(), - }) - .where(eq(heartbeatRuns.id, queued.id)) - .returning() - .then((rows) => rows[0] ?? queued); - } - - return queued; - } - - function formatIssueLinksForComment(relations: Array<{ identifier?: string | null }>) { - const identifiers = [ - ...new Set( - relations - .map((relation) => relation.identifier) - .filter((identifier): identifier is string => Boolean(identifier)), - ), - ]; - if (identifiers.length === 0) return "another open issue"; - return identifiers - .slice(0, 5) - .map((identifier) => { - const prefix = identifier.split("-")[0] || "PAP"; - return `[${identifier}](/${prefix}/issues/${identifier})`; - }) - .join(", "); - } - - async function reconcileUnassignedBlockingIssues() { - const candidates = await db - .select({ - id: issues.id, - companyId: issues.companyId, - identifier: issues.identifier, - status: issues.status, - createdByAgentId: issues.createdByAgentId, - }) - .from(issueRelations) - .innerJoin(issues, eq(issueRelations.issueId, issues.id)) - .where( - and( - eq(issueRelations.type, "blocks"), - inArray(issues.status, ["todo", "blocked"]), - isNull(issues.assigneeAgentId), - isNull(issues.assigneeUserId), - sql`${issues.createdByAgentId} is not null`, - sql`exists ( - select 1 - from issues blocked_issue - where blocked_issue.id = ${issueRelations.relatedIssueId} - and blocked_issue.company_id = ${issues.companyId} - and blocked_issue.status not in ('done', 'cancelled') - )`, - ), - ); - - let assigned = 0; - let skipped = 0; - const issueIds: string[] = []; - const seen = new Set(); - - for (const candidate of candidates) { - if (seen.has(candidate.id)) continue; - seen.add(candidate.id); - - const creatorAgentId = candidate.createdByAgentId; - if (!creatorAgentId) { - skipped += 1; - continue; - } - const creatorAgent = await getAgent(creatorAgentId); - if ( - !creatorAgent || - creatorAgent.companyId !== candidate.companyId || - creatorAgent.status === "paused" || - creatorAgent.status === "terminated" || - creatorAgent.status === "pending_approval" - ) { - skipped += 1; - continue; - } - - const relations = await issuesSvc.getRelationSummaries(candidate.id); - const blockingLinks = formatIssueLinksForComment(relations.blocks); - const updated = await issuesSvc.update(candidate.id, { - assigneeAgentId: creatorAgent.id, - assigneeUserId: null, - }); - if (!updated) { - skipped += 1; - continue; - } - - await issuesSvc.addComment( - candidate.id, - [ - "## Assigned Orphan Blocker", - "", - `Paperclip found this issue is blocking ${blockingLinks} but had no assignee, so no heartbeat could pick it up.`, - "", - "- Assigned it back to the agent that created the blocker.", - "- Next action: resolve this blocker or reassign it to the right owner.", - ].join("\n"), - {}, - ); - - await logActivity(db, { - companyId: candidate.companyId, - actorType: "system", - actorId: "system", - agentId: null, - runId: null, - action: "issue.updated", - entityType: "issue", - entityId: candidate.id, - details: { - identifier: candidate.identifier, - assigneeAgentId: creatorAgent.id, - source: "heartbeat.reconcile_unassigned_blocking_issue", - }, - }); - - const queued = await enqueueWakeup(creatorAgent.id, { - source: "automation", - triggerDetail: "system", - reason: "issue_assigned", - payload: { - issueId: candidate.id, - mutation: "unassigned_blocker_recovery", - }, - requestedByActorType: "system", - requestedByActorId: null, - contextSnapshot: { - issueId: candidate.id, - taskId: candidate.id, - wakeReason: "issue_assigned", - source: "issue.unassigned_blocker_recovery", - }, - }); - - if (queued) { - assigned += 1; - issueIds.push(candidate.id); - } else { - skipped += 1; - } - } - - return { assigned, skipped, issueIds }; - } - - async function escalateStrandedAssignedIssue(input: { - issue: typeof issues.$inferSelect; - previousStatus: "todo" | "in_progress"; - latestRun: Pick< - typeof heartbeatRuns.$inferSelect, - "id" | "status" | "error" | "errorCode" | "contextSnapshot" - > | null; - comment: string; - }) { - const updated = await issuesSvc.update(input.issue.id, { - status: "blocked", - }); - if (!updated) return null; - - await issuesSvc.addComment(input.issue.id, input.comment, {}); - - await logActivity(db, { - companyId: input.issue.companyId, - actorType: "system", - actorId: "system", - agentId: null, - runId: null, - action: "issue.updated", - entityType: "issue", - entityId: input.issue.id, - details: { - identifier: input.issue.identifier, - status: "blocked", - previousStatus: input.previousStatus, - source: "heartbeat.reconcile_stranded_assigned_issue", - latestRunId: input.latestRun?.id ?? null, - latestRunStatus: input.latestRun?.status ?? null, - latestRunErrorCode: input.latestRun?.errorCode ?? null, - }, - }); - - return updated; - } - async function reconcileStrandedAssignedIssues() { - const candidates = await db - .select() - .from(issues) - .where( - and( - isNull(issues.assigneeUserId), - inArray(issues.status, ["todo", "in_progress"]), - sql`${issues.assigneeAgentId} is not null`, - ), - ); - - const result = { - dispatchRequeued: 0, - continuationRequeued: 0, - orphanBlockersAssigned: 0, - escalated: 0, - skipped: 0, - issueIds: [] as string[], - }; - - for (const issue of candidates) { - const agentId = issue.assigneeAgentId; - if (!agentId) { - result.skipped += 1; - continue; - } - - const agent = await getAgent(agentId); - if (!agent || agent.companyId !== issue.companyId) { - result.skipped += 1; - continue; - } - if (agent.status === "paused" || agent.status === "terminated" || agent.status === "pending_approval") { - result.skipped += 1; - continue; - } - - if (await hasActiveExecutionPath(issue.companyId, issue.id)) { - result.skipped += 1; - continue; - } - - const latestRun = await getLatestIssueRun(issue.companyId, issue.id); - if (issue.status === "todo") { - if (!latestRun || latestRun.status === "succeeded") { - result.skipped += 1; - continue; - } - - if (didAutomaticRecoveryFail(latestRun, "assignment_recovery")) { - const failureSummary = summarizeRunFailureForIssueComment(latestRun); - const updated = await escalateStrandedAssignedIssue({ - issue, - previousStatus: "todo", - latestRun, - comment: - "Paperclip automatically retried dispatch for this assigned `todo` issue after a lost wake/run, " + - `but it still has no live execution path.${failureSummary ?? ""} ` + - "Moving it to `blocked` so it is visible for intervention.", - }); - if (updated) { - result.escalated += 1; - result.issueIds.push(issue.id); - } else { - result.skipped += 1; - } - continue; - } - - const queued = await enqueueStrandedIssueRecovery({ - issueId: issue.id, - agentId, - reason: "issue_assignment_recovery", - retryReason: "assignment_recovery", - source: "issue.assignment_recovery", - retryOfRunId: latestRun.id, - }); - if (queued) { - result.dispatchRequeued += 1; - result.issueIds.push(issue.id); - } else { - result.skipped += 1; - } - continue; - } - - if (!latestRun && !issue.checkoutRunId && !issue.executionRunId) { - result.skipped += 1; - continue; - } - if (didAutomaticRecoveryFail(latestRun, "issue_continuation_needed")) { - const failureSummary = summarizeRunFailureForIssueComment(latestRun); - const updated = await escalateStrandedAssignedIssue({ - issue, - previousStatus: "in_progress", - latestRun, - comment: - "Paperclip automatically retried continuation for this assigned `in_progress` issue after its live " + - `execution disappeared, but it still has no live execution path.${failureSummary ?? ""} ` + - "Moving it to `blocked` so it is visible for intervention.", - }); - if (updated) { - result.escalated += 1; - result.issueIds.push(issue.id); - } else { - result.skipped += 1; - } - continue; - } - - const queued = await enqueueStrandedIssueRecovery({ - issueId: issue.id, - agentId, - reason: "issue_continuation_needed", - retryReason: "issue_continuation_needed", - source: "issue.continuation_recovery", - retryOfRunId: latestRun?.id ?? issue.checkoutRunId ?? null, - }); - if (queued) { - result.continuationRequeued += 1; - result.issueIds.push(issue.id); - } else { - result.skipped += 1; - } - } - - const orphanBlockerRecovery = await reconcileUnassignedBlockingIssues(); - result.orphanBlockersAssigned = orphanBlockerRecovery.assigned; - result.skipped += orphanBlockerRecovery.skipped; - result.issueIds.push(...orphanBlockerRecovery.issueIds); - - return result; + return recovery.reconcileStrandedAssignedIssues(); } function issueIdFromRunContext(contextSnapshot: unknown) { @@ -4563,348 +4248,22 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) readNonEmptyString(nestedContext.taskId); } - async function collectIssueGraphLivenessFindings() { - const [issueRows, relationRows, agentRows, activeRunRows, wakeRows] = await Promise.all([ - db - .select({ - id: issues.id, - companyId: issues.companyId, - identifier: issues.identifier, - title: issues.title, - status: issues.status, - projectId: issues.projectId, - goalId: issues.goalId, - parentId: issues.parentId, - assigneeAgentId: issues.assigneeAgentId, - assigneeUserId: issues.assigneeUserId, - createdByAgentId: issues.createdByAgentId, - createdByUserId: issues.createdByUserId, - executionState: issues.executionState, - }) - .from(issues) - .where(isNull(issues.hiddenAt)), - db - .select({ - companyId: issueRelations.companyId, - blockerIssueId: issueRelations.issueId, - blockedIssueId: issueRelations.relatedIssueId, - }) - .from(issueRelations) - .where(eq(issueRelations.type, "blocks")), - db - .select({ - id: agents.id, - companyId: agents.companyId, - name: agents.name, - role: agents.role, - title: agents.title, - status: agents.status, - reportsTo: agents.reportsTo, - }) - .from(agents), - db - .select({ - companyId: heartbeatRuns.companyId, - agentId: heartbeatRuns.agentId, - status: heartbeatRuns.status, - contextSnapshot: heartbeatRuns.contextSnapshot, - }) - .from(heartbeatRuns) - .where(inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES])), - db - .select({ - companyId: agentWakeupRequests.companyId, - agentId: agentWakeupRequests.agentId, - status: agentWakeupRequests.status, - payload: agentWakeupRequests.payload, - }) - .from(agentWakeupRequests) - .where(inArray(agentWakeupRequests.status, ["queued", "deferred_issue_execution"])), - ]); - - return classifyIssueGraphLiveness({ - issues: issueRows, - relations: relationRows, - agents: agentRows, - activeRuns: activeRunRows.map((row) => ({ - companyId: row.companyId, - agentId: row.agentId, - status: row.status, - issueId: issueIdFromRunContext(row.contextSnapshot), - })), - queuedWakeRequests: wakeRows.map((row) => ({ - companyId: row.companyId, - agentId: row.agentId, - status: row.status, - issueId: issueIdFromWakePayload(row.payload), - })), - }); + async function scanSilentActiveRuns(opts?: { now?: Date; companyId?: string }) { + return recovery.scanSilentActiveRuns(opts); } - async function findOpenLivenessEscalation(companyId: string, incidentKey: string) { - return db - .select() - .from(issues) - .where( - and( - eq(issues.companyId, companyId), - eq(issues.originKind, "harness_liveness_escalation"), - eq(issues.originId, incidentKey), - isNull(issues.hiddenAt), - notInArray(issues.status, ["done", "cancelled"]), - ), - ) - .limit(1) - .then((rows) => rows[0] ?? null); - } - - async function existingBlockerIssueIds(companyId: string, issueId: string) { - return db - .select({ blockerIssueId: issueRelations.issueId }) - .from(issueRelations) - .where( - and( - eq(issueRelations.companyId, companyId), - eq(issueRelations.relatedIssueId, issueId), - eq(issueRelations.type, "blocks"), - ), - ) - .then((rows) => rows.map((row) => row.blockerIssueId)); - } - - function formatDependencyPath(finding: IssueLivenessFinding) { - return finding.dependencyPath - .map((entry) => entry.identifier ?? entry.issueId) - .join(" -> "); - } - - function buildLivenessEscalationDescription(finding: IssueLivenessFinding) { - return [ - "Paperclip detected a harness-level issue graph liveness incident.", - "", - `- Incident key: \`${finding.incidentKey}\``, - `- Finding: \`${finding.state}\``, - `- Dependency path: ${formatDependencyPath(finding)}`, - `- Reason: ${finding.reason}`, - `- Requested action: ${finding.recommendedAction}`, - "", - "Resolve the blocked chain, then mark this escalation issue done so the original issue can resume when all blockers are cleared.", - ].join("\n"); - } - - function buildLivenessOriginalIssueComment(finding: IssueLivenessFinding, escalation: typeof issues.$inferSelect) { - return [ - "Paperclip detected a harness-level liveness incident in this issue's dependency graph.", - "", - `- Escalation issue: ${escalation.identifier ?? escalation.id}`, - `- Incident key: \`${finding.incidentKey}\``, - `- Finding: \`${finding.state}\``, - `- Dependency path: ${formatDependencyPath(finding)}`, - `- Reason: ${finding.reason}`, - `- Manager action requested: ${finding.recommendedAction}`, - "", - "This issue now keeps its existing blockers and is also blocked by the escalation issue so dependency wakeups remain explicit.", - ].join("\n"); - } - - async function resolveEscalationOwnerAgentId( - finding: IssueLivenessFinding, - issue: typeof issues.$inferSelect, + async function buildRunOutputSilence( + run: Pick< + typeof heartbeatRuns.$inferSelect, + "id" | "companyId" | "status" | "lastOutputAt" | "lastOutputSeq" | "lastOutputStream" | "processStartedAt" | "startedAt" | "createdAt" + >, + now = new Date(), ) { - const candidates = [ - finding.recommendedOwnerAgentId, - ...finding.recommendedOwnerCandidateAgentIds, - ].filter((candidate): candidate is string => Boolean(candidate)); - - for (const candidate of [...new Set(candidates)]) { - const budgetBlock = await budgets.getInvocationBlock(issue.companyId, candidate, { - issueId: issue.id, - projectId: issue.projectId, - }); - if (!budgetBlock) return candidate; - } - - return null; - } - - async function ensureIssueBlockedByEscalation(input: { - issue: typeof issues.$inferSelect; - escalationIssueId: string; - finding: IssueLivenessFinding; - runId?: string | null; - }) { - const blockerIds = await existingBlockerIssueIds(input.issue.companyId, input.issue.id); - const nextBlockerIds = [...new Set([...blockerIds, input.escalationIssueId])]; - const update: Partial & { blockedByIssueIds: string[] } = { - blockedByIssueIds: nextBlockerIds, - }; - if (input.issue.status !== "blocked") { - update.status = "blocked"; - } - - const updated = await issuesSvc.update(input.issue.id, update); - if (!updated) return null; - - await logActivity(db, { - companyId: input.issue.companyId, - actorType: "system", - actorId: "system", - agentId: null, - runId: input.runId ?? null, - action: "issue.blockers.updated", - entityType: "issue", - entityId: input.issue.id, - details: { - source: "heartbeat.reconcile_issue_graph_liveness", - incidentKey: input.finding.incidentKey, - findingState: input.finding.state, - blockerIssueIds: nextBlockerIds, - escalationIssueId: input.escalationIssueId, - status: update.status ?? input.issue.status, - previousStatus: input.issue.status, - }, - }); - - return updated; - } - - async function createIssueGraphLivenessEscalation(input: { - finding: IssueLivenessFinding; - runId?: string | null; - }) { - const issue = await db - .select() - .from(issues) - .where(eq(issues.id, input.finding.issueId)) - .then((rows) => rows[0] ?? null); - if (!issue || issue.companyId !== input.finding.companyId) return { kind: "skipped" as const }; - - const existing = await findOpenLivenessEscalation(issue.companyId, input.finding.incidentKey); - if (existing) { - await ensureIssueBlockedByEscalation({ - issue, - escalationIssueId: existing.id, - finding: input.finding, - runId: input.runId ?? null, - }); - return { kind: "existing" as const, escalationIssueId: existing.id }; - } - - const ownerAgentId = await resolveEscalationOwnerAgentId(input.finding, issue); - if (!ownerAgentId) return { kind: "skipped" as const }; - - const escalation = await issuesSvc.create(issue.companyId, { - title: `Unblock liveness incident for ${issue.identifier ?? issue.title}`, - description: buildLivenessEscalationDescription(input.finding), - status: "todo", - priority: "high", - parentId: issue.id, - projectId: issue.projectId, - goalId: issue.goalId, - assigneeAgentId: ownerAgentId, - originKind: "harness_liveness_escalation", - originId: input.finding.incidentKey, - billingCode: issue.billingCode, - inheritExecutionWorkspaceFromIssueId: issue.id, - }); - - await ensureIssueBlockedByEscalation({ - issue, - escalationIssueId: escalation.id, - finding: input.finding, - runId: input.runId ?? null, - }); - - await issuesSvc.addComment( - issue.id, - buildLivenessOriginalIssueComment(input.finding, escalation), - { runId: input.runId ?? null }, - ); - - await logActivity(db, { - companyId: issue.companyId, - actorType: "system", - actorId: "system", - agentId: ownerAgentId, - runId: input.runId ?? null, - action: "issue.harness_liveness_escalation_created", - entityType: "issue", - entityId: escalation.id, - details: { - source: "heartbeat.reconcile_issue_graph_liveness", - incidentKey: input.finding.incidentKey, - findingState: input.finding.state, - sourceIssueId: issue.id, - sourceIdentifier: issue.identifier, - escalationIssueId: escalation.id, - escalationIdentifier: escalation.identifier, - dependencyPath: input.finding.dependencyPath, - }, - }); - - const wake = await enqueueWakeup(ownerAgentId, { - source: "assignment", - triggerDetail: "system", - reason: "issue_assigned", - payload: { - issueId: escalation.id, - sourceIssueId: issue.id, - incidentKey: input.finding.incidentKey, - }, - requestedByActorType: "system", - requestedByActorId: null, - contextSnapshot: { - issueId: escalation.id, - taskId: escalation.id, - wakeReason: "issue_assigned", - source: "harness_liveness_escalation", - sourceIssueId: issue.id, - incidentKey: input.finding.incidentKey, - }, - }); - - logger.warn({ - incidentKey: input.finding.incidentKey, - findingState: input.finding.state, - sourceIssueId: issue.id, - escalationIssueId: escalation.id, - ownerAgentId, - wakeupRunId: wake?.id ?? null, - }, "created issue graph liveness escalation"); - - return { kind: "created" as const, escalationIssueId: escalation.id }; + return recovery.buildRunOutputSilence(run, now); } async function reconcileIssueGraphLiveness(opts?: { runId?: string | null }) { - const findings = await collectIssueGraphLivenessFindings(); - const result = { - findings: findings.length, - escalationsCreated: 0, - existingEscalations: 0, - skipped: 0, - issueIds: [] as string[], - escalationIssueIds: [] as string[], - }; - - for (const finding of findings) { - const escalation = await createIssueGraphLivenessEscalation({ - finding, - runId: opts?.runId ?? null, - }); - if (escalation.kind === "created") { - result.escalationsCreated += 1; - result.issueIds.push(finding.issueId); - result.escalationIssueIds.push(escalation.escalationIssueId); - } else if (escalation.kind === "existing") { - result.existingEscalations += 1; - result.issueIds.push(finding.issueId); - result.escalationIssueIds.push(escalation.escalationIssueId); - } else { - result.skipped += 1; - } - } - - return result; + return recovery.reconcileIssueGraphLiveness(opts); } async function updateRuntimeState( @@ -5648,6 +5007,38 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) let handle: RunLogHandle | null = null; let stdoutExcerpt = ""; let stderrExcerpt = ""; + let outputSeq = Number(run.lastOutputSeq ?? 0); + let lastOutputFlushAt: Date | null = run.lastOutputAt ?? null; + const outputProgressState: { + pending: { + at: Date; + seq: number; + stream: "stdout" | "stderr"; + bytes: number; + } | null; + } = { pending: null }; + let persistedLogBytes = Number(run.logBytes ?? 0); + const flushOutputProgress = async (opts?: { force?: boolean }) => { + const pendingOutputProgress = outputProgressState.pending; + if (!pendingOutputProgress) return; + const shouldFlush = + opts?.force === true || + !lastOutputFlushAt || + pendingOutputProgress.at.getTime() - lastOutputFlushAt.getTime() >= ACTIVE_RUN_OUTPUT_PROGRESS_FLUSH_INTERVAL_MS; + if (!shouldFlush) return; + await db + .update(heartbeatRuns) + .set({ + lastOutputAt: pendingOutputProgress.at, + lastOutputSeq: pendingOutputProgress.seq, + lastOutputStream: pendingOutputProgress.stream, + lastOutputBytes: pendingOutputProgress.bytes, + updatedAt: new Date(), + }) + .where(eq(heartbeatRuns.id, run.id)); + lastOutputFlushAt = pendingOutputProgress.at; + outputProgressState.pending = null; + }; try { const startedAt = run.startedAt ?? new Date(); const runningWithSession = await db @@ -5714,13 +5105,23 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) if (stream === "stderr") stderrExcerpt = appendExcerpt(stderrExcerpt, sanitizedChunk); const ts = new Date().toISOString(); + let appendedBytes = 0; if (handle) { - await runLogStore.append(handle, { + appendedBytes = await runLogStore.append(handle, { stream, chunk: sanitizedChunk, ts, }); + persistedLogBytes += appendedBytes; } + outputSeq += 1; + outputProgressState.pending = { + at: new Date(ts), + seq: outputSeq, + stream, + bytes: persistedLogBytes, + }; + await flushOutputProgress(); const payloadChunk = sanitizedChunk.length > MAX_LIVE_LOG_CHUNK_BYTES @@ -5950,6 +5351,11 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) if (handle) { logSummary = await runLogStore.finalize(handle); } + const finalLogBytes = logSummary?.bytes; + if (outputProgressState.pending && typeof finalLogBytes === "number") { + outputProgressState.pending.bytes = finalLogBytes; + } + await flushOutputProgress({ force: true }); const status = outcome === "succeeded" @@ -6101,6 +5507,13 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) logger.warn({ err: finalizeErr, runId }, "failed to finalize run log after error"); } } + const finalLogBytes = logSummary?.bytes; + if (outputProgressState.pending && typeof finalLogBytes === "number") { + outputProgressState.pending.bytes = finalLogBytes; + } + await flushOutputProgress({ force: true }).catch((flushErr) => { + logger.warn({ err: flushErr, runId }, "failed to flush run output progress after error"); + }); const failedRun = await setRunStatus(run.id, "failed", { error: message, @@ -6271,15 +5684,7 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) } let issue = await tx - .select({ - id: issues.id, - companyId: issues.companyId, - identifier: issues.identifier, - status: issues.status, - assigneeAgentId: issues.assigneeAgentId, - assigneeUserId: issues.assigneeUserId, - executionRunId: issues.executionRunId, - }) + .select() .from(issues) .where( and( @@ -6349,7 +5754,14 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) const deferredPayload = parseObject(deferred.payload); const deferredContextSeed = parseObject(deferredPayload[DEFERRED_WAKE_CONTEXT_KEY]); const activePauseHold = await treeControlSvc.getActivePauseHoldGate(issue.companyId, issue.id); - const treeHoldInteractionWake = activePauseHold && allowsIssueInteractionWake(deferredContextSeed); + const treeHoldInteractionWake = activePauseHold && await isVerifiedIssueTreeControlInteractionWake(tx, { + companyId: issue.companyId, + issueId: issue.id, + agentId: deferred.agentId, + contextSnapshot: deferredContextSeed, + requestedByActorType: deferred.requestedByActorType, + requestedByActorId: deferred.requestedByActorId, + }); if (activePauseHold && !treeHoldInteractionWake) { await tx .update(agentWakeupRequests) @@ -6491,7 +5903,8 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) executionLockedAt: now, updatedAt: now, }) - .where(eq(issues.id, issue.id)); + // Promoted mention wakes are issue-scoped, not issue ownership transfers. + .where(and(eq(issues.id, issue.id), eq(issues.assigneeAgentId, deferredAgent.id))); return { kind: "promoted" as const, @@ -6527,6 +5940,10 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) return { kind: "released" as const }; } + if (await isAutomaticRecoverySuppressedByPauseHold(db, issue.companyId, issue.id, treeControlSvc)) { + return { kind: "released" as const }; + } + const shouldBlockImmediately = !recoveryAgentInvokable || !recoveryAgent || @@ -6536,17 +5953,9 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) status: issue.status as "todo" | "in_progress", latestRun: run, }); - await tx - .update(issues) - .set({ - status: "blocked", - updatedAt: new Date(), - }) - .where(eq(issues.id, issue.id)); return { kind: "blocked" as const, - issueId: issue.id, - issueIdentifier: issue.identifier, + issue, previousStatus: issue.status, comment, }; @@ -6626,25 +6035,11 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) }); if (promotionResult?.kind === "blocked") { - await issuesSvc.addComment(promotionResult.issueId, promotionResult.comment, {}); - await logActivity(db, { - companyId: run.companyId, - actorType: "system", - actorId: "system", - agentId: null, - runId: run.id, - action: "issue.updated", - entityType: "issue", - entityId: promotionResult.issueId, - details: { - identifier: promotionResult.issueIdentifier, - status: "blocked", - previousStatus: promotionResult.previousStatus, - source: "heartbeat.release_issue_execution_and_promote", - latestRunId: run.id, - latestRunStatus: run.status, - latestRunErrorCode: run.errorCode ?? null, - }, + await recovery.escalateStrandedAssignedIssue({ + issue: promotionResult.issue, + previousStatus: promotionResult.previousStatus as "todo" | "in_progress", + latestRun: run, + comment: promotionResult.comment, }); return; } @@ -6774,7 +6169,14 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) if (issueId) { const activePauseHold = await treeControlSvc.getActivePauseHoldGate(agent.companyId, issueId); if (activePauseHold) { - const treeHoldInteractionWake = allowsIssueInteractionWake(enrichedContextSnapshot); + const treeHoldInteractionWake = await isVerifiedIssueTreeControlInteractionWake(db, { + companyId: agent.companyId, + issueId, + agentId, + contextSnapshot: enrichedContextSnapshot, + requestedByActorType: opts.requestedByActorType, + requestedByActorId: opts.requestedByActorId, + }); if (!treeHoldInteractionWake) { await writeSkippedRequest("issue_tree_hold_active"); @@ -7745,6 +7147,10 @@ export function heartbeatService(db: Db, options: HeartbeatServiceOptions = {}) reconcileIssueGraphLiveness, + scanSilentActiveRuns, + + buildRunOutputSilence, + tickTimers: async (now = new Date()) => { const allAgents = await db.select().from(agents); let checked = 0; diff --git a/server/src/services/index.ts b/server/src/services/index.ts index 9ee2b1fa..e07790c7 100644 --- a/server/src/services/index.ts +++ b/server/src/services/index.ts @@ -32,7 +32,7 @@ export { routineService } from "./routines.js"; export { costService } from "./costs.js"; export { financeService } from "./finance.js"; export { heartbeatService } from "./heartbeat.js"; -export { classifyIssueGraphLiveness, type IssueLivenessFinding } from "./issue-liveness.js"; +export { classifyIssueGraphLiveness, type IssueLivenessFinding } from "./recovery/index.js"; export { dashboardService } from "./dashboard.js"; export { sidebarBadgeService } from "./sidebar-badges.js"; export { sidebarPreferenceService } from "./sidebar-preferences.js"; diff --git a/server/src/services/instance-settings.ts b/server/src/services/instance-settings.ts index 4a6abb9e..7ada2ef7 100644 --- a/server/src/services/instance-settings.ts +++ b/server/src/services/instance-settings.ts @@ -41,12 +41,14 @@ function normalizeExperimentalSettings(raw: unknown): InstanceExperimentalSettin enableEnvironments: parsed.data.enableEnvironments ?? false, enableIsolatedWorkspaces: parsed.data.enableIsolatedWorkspaces ?? false, autoRestartDevServerWhenIdle: parsed.data.autoRestartDevServerWhenIdle ?? false, + enableIssueGraphLivenessAutoRecovery: parsed.data.enableIssueGraphLivenessAutoRecovery ?? false, }; } return { enableEnvironments: false, enableIsolatedWorkspaces: false, autoRestartDevServerWhenIdle: false, + enableIssueGraphLivenessAutoRecovery: false, }; } diff --git a/server/src/services/issue-liveness.ts b/server/src/services/issue-liveness.ts index c7cc58f6..8e515fb7 100644 --- a/server/src/services/issue-liveness.ts +++ b/server/src/services/issue-liveness.ts @@ -1,324 +1,16 @@ -export type IssueLivenessSeverity = "warning" | "critical"; - -export type IssueLivenessState = - | "blocked_by_unassigned_issue" - | "blocked_by_uninvokable_assignee" - | "blocked_by_cancelled_issue" - | "invalid_review_participant"; - -export interface IssueLivenessIssueInput { - id: string; - companyId: string; - identifier: string | null; - title: string; - status: string; - projectId?: string | null; - goalId?: string | null; - parentId?: string | null; - assigneeAgentId?: string | null; - assigneeUserId?: string | null; - createdByAgentId?: string | null; - createdByUserId?: string | null; - executionState?: Record | null; -} - -export interface IssueLivenessRelationInput { - companyId: string; - blockerIssueId: string; - blockedIssueId: string; -} - -export interface IssueLivenessAgentInput { - id: string; - companyId: string; - name: string; - role: string; - title?: string | null; - status: string; - reportsTo?: string | null; -} - -export interface IssueLivenessExecutionPathInput { - companyId: string; - issueId: string | null; - agentId?: string | null; - status: string; -} - -export interface IssueLivenessDependencyPathEntry { - issueId: string; - identifier: string | null; - title: string; - status: string; -} - -export interface IssueLivenessFinding { - issueId: string; - companyId: string; - identifier: string | null; - state: IssueLivenessState; - severity: IssueLivenessSeverity; - reason: string; - dependencyPath: IssueLivenessDependencyPathEntry[]; - recommendedOwnerAgentId: string | null; - recommendedOwnerCandidateAgentIds: string[]; - recommendedAction: string; - incidentKey: string; -} - -export interface IssueGraphLivenessInput { - issues: IssueLivenessIssueInput[]; - relations: IssueLivenessRelationInput[]; - agents: IssueLivenessAgentInput[]; - activeRuns?: IssueLivenessExecutionPathInput[]; - queuedWakeRequests?: IssueLivenessExecutionPathInput[]; -} - -const INVOKABLE_AGENT_STATUSES = new Set(["active", "idle", "running", "error"]); -const BLOCKING_AGENT_STATUSES = new Set(["paused", "terminated", "pending_approval"]); - -function issueLabel(issue: IssueLivenessIssueInput) { - return issue.identifier ?? issue.id; -} - -function pathEntry(issue: IssueLivenessIssueInput): IssueLivenessDependencyPathEntry { - return { - issueId: issue.id, - identifier: issue.identifier, - title: issue.title, - status: issue.status, - }; -} - -function isInvokableAgent(agent: IssueLivenessAgentInput | null | undefined) { - return Boolean(agent && INVOKABLE_AGENT_STATUSES.has(agent.status)); -} - -function hasActiveExecutionPath( - companyId: string, - issueId: string, - activeRuns: IssueLivenessExecutionPathInput[], - queuedWakeRequests: IssueLivenessExecutionPathInput[], -) { - return [...activeRuns, ...queuedWakeRequests].some( - (entry) => entry.companyId === companyId && entry.issueId === issueId, - ); -} - -function readPrincipalAgentId(principal: unknown): string | null { - if (!principal || typeof principal !== "object") return null; - const value = principal as Record; - return value.type === "agent" && typeof value.agentId === "string" && value.agentId.length > 0 - ? value.agentId - : null; -} - -function principalIsResolvableUser(principal: unknown): boolean { - if (!principal || typeof principal !== "object") return false; - const value = principal as Record; - return value.type === "user" && typeof value.userId === "string" && value.userId.length > 0; -} - -function agentChainCandidates( - startAgentId: string | null | undefined, - agentsById: Map, - companyId: string, -) { - const candidates: string[] = []; - const seen = new Set(); - let current = startAgentId ? agentsById.get(startAgentId) : null; - - while (current?.reportsTo) { - if (seen.has(current.reportsTo)) break; - seen.add(current.reportsTo); - const manager = agentsById.get(current.reportsTo); - if (!manager || manager.companyId !== companyId) break; - if (isInvokableAgent(manager)) candidates.push(manager.id); - current = manager; - } - - return candidates; -} - -function fallbackExecutiveCandidates(agents: IssueLivenessAgentInput[], companyId: string) { - const active = agents.filter((agent) => agent.companyId === companyId && isInvokableAgent(agent)); - const executive = active.filter((agent) => { - const haystack = `${agent.role} ${agent.title ?? ""} ${agent.name}`.toLowerCase(); - return /\b(cto|chief technology|ceo|chief executive)\b/.test(haystack); - }); - const roots = active.filter((agent) => !agent.reportsTo); - return [...executive, ...roots, ...active].map((agent) => agent.id); -} - -function ownerCandidatesForIssue( - issue: IssueLivenessIssueInput, - agents: IssueLivenessAgentInput[], - agentsById: Map, -) { - const candidates = [ - ...agentChainCandidates(issue.assigneeAgentId, agentsById, issue.companyId), - ...agentChainCandidates(issue.createdByAgentId, agentsById, issue.companyId), - ...fallbackExecutiveCandidates(agents, issue.companyId), - ]; - return [...new Set(candidates)]; -} - -function incidentKey(input: { - companyId: string; - issueId: string; - state: IssueLivenessState; - blockerIssueId?: string | null; - participantAgentId?: string | null; -}) { - return [ - "harness_liveness", - input.companyId, - input.issueId, - input.state, - input.blockerIssueId ?? input.participantAgentId ?? "none", - ].join(":"); -} - -function finding(input: { - issue: IssueLivenessIssueInput; - state: IssueLivenessState; - severity?: IssueLivenessSeverity; - reason: string; - dependencyPath: IssueLivenessIssueInput[]; - recommendedOwnerCandidateAgentIds: string[]; - recommendedAction: string; - blockerIssueId?: string | null; - participantAgentId?: string | null; -}): IssueLivenessFinding { - return { - issueId: input.issue.id, - companyId: input.issue.companyId, - identifier: input.issue.identifier, - state: input.state, - severity: input.severity ?? "critical", - reason: input.reason, - dependencyPath: input.dependencyPath.map(pathEntry), - recommendedOwnerAgentId: input.recommendedOwnerCandidateAgentIds[0] ?? null, - recommendedOwnerCandidateAgentIds: input.recommendedOwnerCandidateAgentIds, - recommendedAction: input.recommendedAction, - incidentKey: incidentKey({ - companyId: input.issue.companyId, - issueId: input.issue.id, - state: input.state, - blockerIssueId: input.blockerIssueId, - participantAgentId: input.participantAgentId, - }), - }; -} - -export function classifyIssueGraphLiveness(input: IssueGraphLivenessInput): IssueLivenessFinding[] { - const issuesById = new Map(input.issues.map((issue) => [issue.id, issue])); - const agentsById = new Map(input.agents.map((agent) => [agent.id, agent])); - const blockersByBlockedIssueId = new Map(); - const findings: IssueLivenessFinding[] = []; - const activeRuns = input.activeRuns ?? []; - const queuedWakeRequests = input.queuedWakeRequests ?? []; - - for (const relation of input.relations) { - const list = blockersByBlockedIssueId.get(relation.blockedIssueId) ?? []; - list.push(relation); - blockersByBlockedIssueId.set(relation.blockedIssueId, list); - } - - for (const issue of input.issues) { - const ownerCandidates = ownerCandidatesForIssue(issue, input.agents, agentsById); - - if (issue.status === "blocked") { - const relations = blockersByBlockedIssueId.get(issue.id) ?? []; - for (const relation of relations) { - if (relation.companyId !== issue.companyId) continue; - const blocker = issuesById.get(relation.blockerIssueId); - if (!blocker || blocker.companyId !== issue.companyId || blocker.status === "done") continue; - - if (blocker.status === "cancelled") { - findings.push(finding({ - issue, - state: "blocked_by_cancelled_issue", - reason: `${issueLabel(issue)} is still blocked by cancelled issue ${issueLabel(blocker)}.`, - dependencyPath: [issue, blocker], - recommendedOwnerCandidateAgentIds: ownerCandidates, - recommendedAction: - `Inspect ${issueLabel(blocker)} and either remove it from ${issueLabel(issue)}'s blockers or replace it with an actionable unblock issue.`, - blockerIssueId: blocker.id, - })); - continue; - } - - if (!blocker.assigneeAgentId && !blocker.assigneeUserId) { - if (hasActiveExecutionPath(issue.companyId, blocker.id, activeRuns, queuedWakeRequests)) continue; - findings.push(finding({ - issue, - state: "blocked_by_unassigned_issue", - reason: `${issueLabel(issue)} is blocked by unassigned issue ${issueLabel(blocker)} with no user owner.`, - dependencyPath: [issue, blocker], - recommendedOwnerCandidateAgentIds: ownerCandidates, - recommendedAction: - `Assign ${issueLabel(blocker)} to an owner who can complete it, or remove it from ${issueLabel(issue)}'s blockers if it is no longer required.`, - blockerIssueId: blocker.id, - })); - continue; - } - - if (!blocker.assigneeAgentId) continue; - if (hasActiveExecutionPath(issue.companyId, blocker.id, activeRuns, queuedWakeRequests)) continue; - - const blockerAgent = agentsById.get(blocker.assigneeAgentId); - if (!blockerAgent || blockerAgent.companyId !== issue.companyId || BLOCKING_AGENT_STATUSES.has(blockerAgent.status)) { - findings.push(finding({ - issue, - state: "blocked_by_uninvokable_assignee", - reason: blockerAgent - ? `${issueLabel(issue)} is blocked by ${issueLabel(blocker)}, but its assignee is ${blockerAgent.status}.` - : `${issueLabel(issue)} is blocked by ${issueLabel(blocker)}, but its assignee no longer exists.`, - dependencyPath: [issue, blocker], - recommendedOwnerCandidateAgentIds: ownerCandidates, - recommendedAction: - `Review ${issueLabel(blocker)} and assign it to an active owner or replace the blocker with an actionable issue.`, - blockerIssueId: blocker.id, - })); - } - } - } - - if (issue.status !== "in_review" || !issue.executionState) continue; - const participant = issue.executionState.currentParticipant; - const participantAgentId = readPrincipalAgentId(participant); - if (participantAgentId) { - const participantAgent = agentsById.get(participantAgentId); - if (!isInvokableAgent(participantAgent) || participantAgent?.companyId !== issue.companyId) { - findings.push(finding({ - issue, - state: "invalid_review_participant", - reason: participantAgent - ? `${issueLabel(issue)} is in review, but current participant agent is ${participantAgent.status}.` - : `${issueLabel(issue)} is in review, but current participant agent cannot be resolved.`, - dependencyPath: [issue], - recommendedOwnerCandidateAgentIds: ownerCandidates, - recommendedAction: - `Repair ${issueLabel(issue)}'s review participant or return the issue to an active assignee with a clear change request.`, - participantAgentId, - })); - } - continue; - } - - if (!principalIsResolvableUser(participant)) { - findings.push(finding({ - issue, - state: "invalid_review_participant", - reason: `${issueLabel(issue)} is in review, but its current participant cannot be resolved.`, - dependencyPath: [issue], - recommendedOwnerCandidateAgentIds: ownerCandidates, - recommendedAction: - `Repair ${issueLabel(issue)}'s review participant or return the issue to an active assignee with a clear change request.`, - })); - } - } - - return findings; -} +export { + classifyIssueGraphLiveness, +} from "./recovery/issue-graph-liveness.js"; +export type { + IssueGraphLivenessInput, + IssueLivenessAgentInput, + IssueLivenessDependencyPathEntry, + IssueLivenessExecutionPathInput, + IssueLivenessFinding, + IssueLivenessIssueInput, + IssueLivenessOwnerCandidate, + IssueLivenessOwnerCandidateReason, + IssueLivenessRelationInput, + IssueLivenessSeverity, + IssueLivenessState, +} from "./recovery/issue-graph-liveness.js"; diff --git a/server/src/services/issue-tree-control.ts b/server/src/services/issue-tree-control.ts index ca57ebb4..04de642e 100644 --- a/server/src/services/issue-tree-control.ts +++ b/server/src/services/issue-tree-control.ts @@ -3,6 +3,7 @@ import type { Db } from "@paperclipai/db"; import { agentWakeupRequests, heartbeatRuns, + issueComments, issueTreeHoldMembers, issueTreeHolds, issues, @@ -76,6 +77,151 @@ export const ISSUE_TREE_CONTROL_INTERACTION_WAKE_REASONS: ReadonlySet = "issue_reopened_via_comment", "issue_comment_mentioned", ] as const); +const ISSUE_TREE_CONTROL_INTERACTION_WAKE_SOURCES: Readonly>> = { + issue_commented: new Set(["issue.comment"]), + issue_reopened_via_comment: new Set(["issue.comment.reopen"]), + issue_comment_mentioned: new Set(["comment.mention"]), +}; + +type VerifiedInteractionActor = { + requestedByActorType?: string | null; + requestedByActorId?: string | null; +}; + +function readNonEmptyStringFromRecord(record: unknown, key: string) { + if (!record || typeof record !== "object") return null; + const value = (record as Record)[key]; + return typeof value === "string" && value.trim().length > 0 ? value.trim() : null; +} + +function readInteractionWakeCommentId(record: unknown) { + if (!record || typeof record !== "object") return null; + const value = (record as Record).wakeCommentIds; + if (Array.isArray(value)) { + const latest = value + .filter((entry): entry is string => typeof entry === "string" && entry.trim().length > 0) + .at(-1); + if (latest) return latest.trim(); + } + return readNonEmptyStringFromRecord(record, "wakeCommentId") ?? readNonEmptyStringFromRecord(record, "commentId"); +} + +function hasVerifiedInteractionSource(wakeReason: string, contextSnapshot: Record) { + const source = readNonEmptyStringFromRecord(contextSnapshot, "source"); + if (!source) return false; + return ISSUE_TREE_CONTROL_INTERACTION_WAKE_SOURCES[wakeReason]?.has(source) ?? false; +} + +function actorMatchesComment( + actor: VerifiedInteractionActor, + comment: { authorAgentId: string | null; authorUserId: string | null }, +) { + if (!actor.requestedByActorType) return false; + if (actor.requestedByActorType === "system") return true; + if (!actor.requestedByActorId) return false; + if (actor.requestedByActorType === "agent") return comment.authorAgentId === actor.requestedByActorId; + if (actor.requestedByActorType === "user") return comment.authorUserId === actor.requestedByActorId; + return false; +} + +async function hasVerifiedInteractionWakeRequest( + dbOrTx: Pick, + input: { + companyId: string; + agentId?: string | null; + runId?: string | null; + wakeupRequestId?: string | null; + issueId: string; + commentId: string; + comment: { authorAgentId: string | null; authorUserId: string | null }; + }, +) { + if (!input.runId && !input.wakeupRequestId) return false; + const predicates = [ + eq(agentWakeupRequests.companyId, input.companyId), + sql`${agentWakeupRequests.payload} ->> 'issueId' = ${input.issueId}`, + sql`${agentWakeupRequests.payload} ->> 'commentId' = ${input.commentId}`, + ]; + if (input.agentId) predicates.push(eq(agentWakeupRequests.agentId, input.agentId)); + if (input.runId && input.wakeupRequestId) { + const requestScope = or( + eq(agentWakeupRequests.runId, input.runId), + eq(agentWakeupRequests.id, input.wakeupRequestId), + ); + if (requestScope) predicates.push(requestScope); + } else if (input.runId) { + predicates.push(eq(agentWakeupRequests.runId, input.runId)); + } else if (input.wakeupRequestId) { + predicates.push(eq(agentWakeupRequests.id, input.wakeupRequestId)); + } + + const requests = await dbOrTx + .select({ + requestedByActorType: agentWakeupRequests.requestedByActorType, + requestedByActorId: agentWakeupRequests.requestedByActorId, + }) + .from(agentWakeupRequests) + .where(and(...predicates)); + + return requests.some((request) => actorMatchesComment(request, input.comment)); +} + +export async function isVerifiedIssueTreeControlInteractionWake( + dbOrTx: Pick, + input: { + companyId: string; + issueId: string; + agentId?: string | null; + contextSnapshot: Record | null | undefined; + requestedByActorType?: "user" | "agent" | "system" | string | null; + requestedByActorId?: string | null; + runId?: string | null; + wakeupRequestId?: string | null; + }, +) { + const contextSnapshot = input.contextSnapshot ?? null; + const wakeReason = + readNonEmptyStringFromRecord(contextSnapshot, "wakeReason") ?? + readNonEmptyStringFromRecord(contextSnapshot, "reason"); + if (!wakeReason || !ISSUE_TREE_CONTROL_INTERACTION_WAKE_REASONS.has(wakeReason)) return false; + if (!contextSnapshot || !hasVerifiedInteractionSource(wakeReason, contextSnapshot)) return false; + + const commentId = readInteractionWakeCommentId(contextSnapshot); + if (!commentId) return false; + + const comment = await dbOrTx + .select({ + id: issueComments.id, + authorAgentId: issueComments.authorAgentId, + authorUserId: issueComments.authorUserId, + }) + .from(issueComments) + .where( + and( + eq(issueComments.companyId, input.companyId), + eq(issueComments.issueId, input.issueId), + eq(issueComments.id, commentId), + ), + ) + .then((rows) => rows[0] ?? null); + if (!comment) return false; + + const directActor = { + requestedByActorType: input.requestedByActorType, + requestedByActorId: input.requestedByActorId, + }; + if (actorMatchesComment(directActor, comment)) return true; + + return hasVerifiedInteractionWakeRequest(dbOrTx, { + companyId: input.companyId, + agentId: input.agentId, + runId: input.runId, + wakeupRequestId: input.wakeupRequestId, + issueId: input.issueId, + commentId, + comment, + }); +} function normalizeReleasePolicy( releasePolicy: IssueTreeHoldReleasePolicy | null | undefined, diff --git a/server/src/services/issues.ts b/server/src/services/issues.ts index 36e9ac97..01bd1d7c 100644 --- a/server/src/services/issues.ts +++ b/server/src/services/issues.ts @@ -3,6 +3,7 @@ import { and, asc, desc, eq, inArray, isNull, ne, or, sql } from "drizzle-orm"; import type { Db } from "@paperclipai/db"; import { activityLog, + agentWakeupRequests, agents, assets, companies, @@ -23,7 +24,7 @@ import { projectWorkspaces, projects, } from "@paperclipai/db"; -import type { IssueRelationIssueSummary } from "@paperclipai/shared"; +import type { IssueBlockerAttention, IssueRelationIssueSummary } from "@paperclipai/shared"; import { extractAgentMentionIds, extractProjectMentionIds, isUuidLike } from "@paperclipai/shared"; import { conflict, notFound, unprocessable } from "../errors.js"; import { @@ -38,7 +39,7 @@ import { redactCurrentUserText } from "../log-redaction.js"; import { resolveIssueGoalId, resolveNextIssueGoalId } from "./issue-goal-fallback.js"; import { getDefaultCompanyGoal } from "./goals.js"; import { - ISSUE_TREE_CONTROL_INTERACTION_WAKE_REASONS, + isVerifiedIssueTreeControlInteractionWake, issueTreeControlService, type ActiveIssueTreePauseHoldGate, } from "./issue-tree-control.js"; @@ -82,18 +83,6 @@ function readStringFromRecord(record: unknown, key: string) { return typeof value === "string" && value.trim().length > 0 ? value.trim() : null; } -function readLatestWakeCommentId(record: unknown) { - if (!record || typeof record !== "object") return null; - const value = (record as Record).wakeCommentIds; - if (Array.isArray(value)) { - const latest = value - .filter((entry): entry is string => typeof entry === "string" && entry.trim().length > 0) - .at(-1); - if (latest) return latest.trim(); - } - return readStringFromRecord(record, "wakeCommentId") ?? readStringFromRecord(record, "commentId"); -} - export interface IssueFilters { status?: string; assigneeAgentId?: string; @@ -668,6 +657,46 @@ async function withIssueLabels(dbOrTx: any, rows: IssueRow[]): Promise + & { executionRunId?: string | null }; + +type IssueBlockerAttentionEdge = { + issueId: string; + blockerIssueId: string; +}; +type IssueBlockerAttentionQueryRow = IssueBlockerAttentionNode & { + issueId: string | null; + blockerIssueId: string; +}; +type IssueBlockerAttentionActivePathRow = { + issueId: string | null; +}; +type IssueBlockerAttentionAgentRow = { + id: string; + companyId: string; + status: string; +}; async function activeRunMapForIssues( dbOrTx: any, @@ -706,6 +735,380 @@ async function activeRunMapForIssues( return map; } +function createIssueBlockerAttention(input: Partial = {}): IssueBlockerAttention { + return { + state: input.state ?? "none", + reason: input.reason ?? null, + unresolvedBlockerCount: input.unresolvedBlockerCount ?? 0, + coveredBlockerCount: input.coveredBlockerCount ?? 0, + attentionBlockerCount: input.attentionBlockerCount ?? 0, + sampleBlockerIdentifier: input.sampleBlockerIdentifier ?? null, + }; +} + +function blockerSampleIdentifier(node: IssueBlockerAttentionNode | null | undefined) { + return node?.identifier ?? node?.id ?? null; +} + +function appendBlockerAttentionEdges( + edgesByIssueId: Map, + rows: IssueBlockerAttentionEdge[], +) { + for (const row of rows) { + const existing = edgesByIssueId.get(row.issueId) ?? []; + if (!existing.some((edge) => edge.blockerIssueId === row.blockerIssueId)) { + existing.push(row); + edgesByIssueId.set(row.issueId, existing); + } + } +} + +type IssueRelationSummaryRow = { + relatedId: string; + identifier: string | null; + title: string; + status: string; + priority: string; + assigneeAgentId: string | null; + assigneeUserId: string | null; +}; + +function summarizeIssueRelationRow(row: IssueRelationSummaryRow): IssueRelationIssueSummary { + return { + id: row.relatedId, + identifier: row.identifier, + title: row.title, + status: row.status as IssueRelationIssueSummary["status"], + priority: row.priority as IssueRelationIssueSummary["priority"], + assigneeAgentId: row.assigneeAgentId, + assigneeUserId: row.assigneeUserId, + }; +} + +async function terminalExplicitBlockersByRoot( + companyId: string, + roots: IssueRelationIssueSummary[], + dbOrTx: DbReader, +): Promise> { + const rootIds = [...new Set(roots.map((root) => root.id))]; + const terminalByRoot = new Map(); + if (rootIds.length === 0) return terminalByRoot; + + const nodesById = new Map(); + const edgesByIssueId = new Map(); + for (const root of roots) nodesById.set(root.id, root); + + let frontier = rootIds; + for (let depth = 0; frontier.length > 0 && depth < BLOCKER_ATTENTION_MAX_DEPTH; depth += 1) { + const nextFrontier = new Set(); + for (const chunk of chunkList([...new Set(frontier)], ISSUE_LIST_RELATED_QUERY_CHUNK_SIZE)) { + const rows = await dbOrTx + .select({ + currentIssueId: issueRelations.relatedIssueId, + relatedId: issues.id, + identifier: issues.identifier, + title: issues.title, + status: issues.status, + priority: issues.priority, + assigneeAgentId: issues.assigneeAgentId, + assigneeUserId: issues.assigneeUserId, + }) + .from(issueRelations) + .innerJoin(issues, eq(issueRelations.issueId, issues.id)) + .where( + and( + eq(issueRelations.companyId, companyId), + eq(issueRelations.type, "blocks"), + inArray(issueRelations.relatedIssueId, chunk), + eq(issues.companyId, companyId), + ne(issues.status, "done"), + ), + ); + + for (const row of rows) { + const existingEdges = edgesByIssueId.get(row.currentIssueId) ?? []; + if (!existingEdges.includes(row.relatedId)) { + existingEdges.push(row.relatedId); + edgesByIssueId.set(row.currentIssueId, existingEdges); + } + if (!nodesById.has(row.relatedId)) { + nodesById.set(row.relatedId, summarizeIssueRelationRow(row)); + nextFrontier.add(row.relatedId); + } + } + } + + if (nodesById.size > BLOCKER_ATTENTION_MAX_NODES) break; + frontier = [...nextFrontier]; + } + + const collectTerminal = (issueId: string, seen: Set): IssueRelationIssueSummary[] => { + if (seen.has(issueId)) return []; + const node = nodesById.get(issueId); + if (!node || node.status === "done") return []; + const nextSeen = new Set(seen); + nextSeen.add(issueId); + const downstreamIds = edgesByIssueId.get(issueId) ?? []; + if (downstreamIds.length === 0) return [node]; + return downstreamIds.flatMap((downstreamId) => collectTerminal(downstreamId, nextSeen)); + }; + + for (const rootId of rootIds) { + const deduped = new Map(); + for (const blocker of collectTerminal(rootId, new Set())) { + if (blocker.id !== rootId) deduped.set(blocker.id, blocker); + } + if (deduped.size > 0) { + terminalByRoot.set(rootId, [...deduped.values()].sort((a, b) => a.title.localeCompare(b.title))); + } + } + + return terminalByRoot; +} + +async function listIssueBlockerAttentionMap( + dbOrTx: any, + companyId: string, + issueRows: IssueBlockerAttentionInputNode[], +): Promise> { + const roots = issueRows.filter((row) => row.companyId === companyId && row.status === "blocked"); + const attentionMap = new Map(); + for (const row of issueRows) { + if (row.status !== "blocked") { + attentionMap.set(row.id, createIssueBlockerAttention()); + } + } + if (roots.length === 0) return attentionMap; + + const nodesById = new Map(); + const edgesByIssueId = new Map(); + for (const root of roots) nodesById.set(root.id, { ...root }); + + let frontier = roots.map((root) => root.id); + let truncated = false; + for (let depth = 0; frontier.length > 0 && depth < BLOCKER_ATTENTION_MAX_DEPTH; depth += 1) { + const nextFrontier = new Set(); + + for (const chunk of chunkList([...new Set(frontier)], ISSUE_LIST_RELATED_QUERY_CHUNK_SIZE)) { + const explicitBlockerRowsPromise: Promise = dbOrTx + .select({ + issueId: issueRelations.relatedIssueId, + blockerIssueId: issues.id, + id: issues.id, + companyId: issues.companyId, + parentId: issues.parentId, + identifier: issues.identifier, + title: issues.title, + status: issues.status, + executionRunId: issues.executionRunId, + assigneeAgentId: issues.assigneeAgentId, + assigneeUserId: issues.assigneeUserId, + }) + .from(issueRelations) + .innerJoin(issues, eq(issueRelations.issueId, issues.id)) + .where( + and( + eq(issueRelations.companyId, companyId), + eq(issueRelations.type, "blocks"), + inArray(issueRelations.relatedIssueId, chunk), + eq(issues.companyId, companyId), + ne(issues.status, "done"), + ), + ); + const childRowsPromise: Promise = dbOrTx + .select({ + issueId: issues.parentId, + blockerIssueId: issues.id, + id: issues.id, + companyId: issues.companyId, + parentId: issues.parentId, + identifier: issues.identifier, + title: issues.title, + status: issues.status, + executionRunId: issues.executionRunId, + assigneeAgentId: issues.assigneeAgentId, + assigneeUserId: issues.assigneeUserId, + }) + .from(issues) + .where( + and( + eq(issues.companyId, companyId), + inArray(issues.parentId, chunk), + ne(issues.status, "done"), + ), + ); + const [explicitBlockerRows, childRows] = await Promise.all([ + explicitBlockerRowsPromise, + childRowsPromise, + ]); + + appendBlockerAttentionEdges(edgesByIssueId, [ + ...explicitBlockerRows + .filter((row): row is IssueBlockerAttentionQueryRow & { issueId: string } => row.issueId !== null) + .map((row) => ({ issueId: row.issueId, blockerIssueId: row.blockerIssueId })), + ...childRows + .filter((row): row is IssueBlockerAttentionQueryRow & { issueId: string } => row.issueId !== null) + .map((row) => ({ issueId: row.issueId, blockerIssueId: row.blockerIssueId })), + ]); + + for (const row of [...explicitBlockerRows, ...childRows]) { + if (!row.issueId || nodesById.has(row.blockerIssueId)) continue; + nodesById.set(row.blockerIssueId, { + id: row.blockerIssueId, + companyId: row.companyId, + parentId: row.parentId, + identifier: row.identifier, + title: row.title, + status: row.status, + executionRunId: row.executionRunId, + assigneeAgentId: row.assigneeAgentId, + assigneeUserId: row.assigneeUserId, + }); + nextFrontier.add(row.blockerIssueId); + } + } + + if (nodesById.size > BLOCKER_ATTENTION_MAX_NODES) { + truncated = true; + break; + } + frontier = [...nextFrontier]; + } + if (frontier.length > 0) truncated = true; + + const nodeIds = [...nodesById.keys()]; + const activeIssueIds = new Set(); + const agentIds = new Set(); + const issueIdByExecutionRunId = new Map(); + for (const node of nodesById.values()) { + if (node.assigneeAgentId) agentIds.add(node.assigneeAgentId); + if (node.executionRunId) issueIdByExecutionRunId.set(node.executionRunId, node.id); + } + + for (const chunk of chunkList([...issueIdByExecutionRunId.keys()], ISSUE_LIST_RELATED_QUERY_CHUNK_SIZE)) { + const runRows: Array<{ id: string }> = await dbOrTx + .select({ + id: heartbeatRuns.id, + }) + .from(heartbeatRuns) + .where( + and( + eq(heartbeatRuns.companyId, companyId), + inArray(heartbeatRuns.status, BLOCKER_ATTENTION_ACTIVE_RUN_STATUSES), + inArray(heartbeatRuns.id, chunk), + ), + ); + + for (const row of runRows) { + const issueId = issueIdByExecutionRunId.get(row.id); + if (issueId) activeIssueIds.add(issueId); + } + } + + for (const chunk of chunkList(nodeIds, ISSUE_LIST_RELATED_QUERY_CHUNK_SIZE)) { + const wakeRowsPromise: Promise = dbOrTx + .select({ + issueId: sql`${agentWakeupRequests.payload} ->> 'issueId'`, + }) + .from(agentWakeupRequests) + .where( + and( + eq(agentWakeupRequests.companyId, companyId), + inArray(agentWakeupRequests.status, BLOCKER_ATTENTION_ACTIVE_WAKE_STATUSES), + sql`${agentWakeupRequests.runId} is null`, + inArray(sql`${agentWakeupRequests.payload} ->> 'issueId'`, chunk), + ), + ); + const wakeRows = await wakeRowsPromise; + for (const row of wakeRows) { + if (row.issueId) activeIssueIds.add(row.issueId); + } + } + + const agentRows: IssueBlockerAttentionAgentRow[] = agentIds.size > 0 + ? await dbOrTx + .select({ + id: agents.id, + companyId: agents.companyId, + status: agents.status, + }) + .from(agents) + .where(and(eq(agents.companyId, companyId), inArray(agents.id, [...agentIds]))) + : []; + const agentsById = new Map(agentRows.map((agent) => [agent.id, agent])); + + type PathClassification = { covered: boolean; sampleBlockerIdentifier: string | null }; + const classifyPath = ( + nodeId: string, + seen: Set, + ): PathClassification => { + if (truncated || seen.has(nodeId)) return { covered: false, sampleBlockerIdentifier: blockerSampleIdentifier(nodesById.get(nodeId)) }; + const node = nodesById.get(nodeId); + if (!node || node.companyId !== companyId) return { covered: false, sampleBlockerIdentifier: nodeId }; + if (node.status === "done") return { covered: true, sampleBlockerIdentifier: blockerSampleIdentifier(node) }; + if (activeIssueIds.has(node.id)) return { covered: true, sampleBlockerIdentifier: blockerSampleIdentifier(node) }; + if (node.status === "cancelled") return { covered: false, sampleBlockerIdentifier: blockerSampleIdentifier(node) }; + + const downstream = (edgesByIssueId.get(node.id) ?? []).filter((edge) => nodesById.get(edge.blockerIssueId)?.status !== "done"); + if (downstream.length > 0) { + const nextSeen = new Set(seen); + nextSeen.add(nodeId); + const classified = downstream.map((edge) => classifyPath(edge.blockerIssueId, nextSeen)); + const attention = classified.find((result) => !result.covered); + if (attention) return attention; + return { + covered: true, + sampleBlockerIdentifier: classified[0]?.sampleBlockerIdentifier ?? blockerSampleIdentifier(node), + }; + } + + if (node.assigneeAgentId) { + const assignee = agentsById.get(node.assigneeAgentId); + if (!assignee || assignee.companyId !== companyId || !BLOCKER_ATTENTION_INVOKABLE_AGENT_STATUSES.has(assignee.status)) { + return { covered: false, sampleBlockerIdentifier: blockerSampleIdentifier(node) }; + } + } + + return { covered: false, sampleBlockerIdentifier: blockerSampleIdentifier(node) }; + }; + + for (const root of roots) { + const topLevelEdges = (edgesByIssueId.get(root.id) ?? []).filter((edge) => nodesById.get(edge.blockerIssueId)?.status !== "done"); + if (topLevelEdges.length === 0) { + attentionMap.set(root.id, createIssueBlockerAttention({ + state: "needs_attention", + reason: "attention_required", + })); + continue; + } + + const classified = topLevelEdges.map((edge) => ({ + edge, + result: classifyPath(edge.blockerIssueId, new Set([root.id])), + })); + const coveredBlockerCount = classified.filter((entry) => entry.result.covered).length; + const attentionBlockerCount = classified.length - coveredBlockerCount; + const attentionEntry = classified.find((entry) => !entry.result.covered); + const sampleEntry = attentionEntry ?? classified[0] ?? null; + const sampleNode = sampleEntry ? nodesById.get(sampleEntry.edge.blockerIssueId) : null; + + attentionMap.set(root.id, createIssueBlockerAttention({ + state: attentionBlockerCount === 0 ? "covered" : "needs_attention", + reason: attentionBlockerCount === 0 + ? topLevelEdges.every((edge) => nodesById.get(edge.blockerIssueId)?.parentId === root.id) + ? "active_child" + : "active_dependency" + : "attention_required", + unresolvedBlockerCount: topLevelEdges.length, + coveredBlockerCount, + attentionBlockerCount, + sampleBlockerIdentifier: sampleEntry?.result.sampleBlockerIdentifier ?? blockerSampleIdentifier(sampleNode), + })); + } + + return attentionMap; +} + const issueListSelect = { id: issues.id, companyId: issues.companyId, @@ -956,18 +1359,25 @@ export function issueService(db: Db) { ) { if (!checkoutRunId) return false; const run = await db - .select({ contextSnapshot: heartbeatRuns.contextSnapshot }) + .select({ + id: heartbeatRuns.id, + agentId: heartbeatRuns.agentId, + wakeupRequestId: heartbeatRuns.wakeupRequestId, + contextSnapshot: heartbeatRuns.contextSnapshot, + }) .from(heartbeatRuns) .where(and(eq(heartbeatRuns.id, checkoutRunId), eq(heartbeatRuns.companyId, companyId))) .then((rows) => rows[0] ?? null); - const wakeReason = - readStringFromRecord(run?.contextSnapshot, "wakeReason") ?? - readStringFromRecord(run?.contextSnapshot, "reason"); - return Boolean( - wakeReason && - ISSUE_TREE_CONTROL_INTERACTION_WAKE_REASONS.has(wakeReason) && - readLatestWakeCommentId(run?.contextSnapshot), - ); + const issueId = readStringFromRecord(run?.contextSnapshot, "issueId"); + if (!run || !issueId) return false; + return isVerifiedIssueTreeControlInteractionWake(db, { + companyId, + issueId, + agentId: run.agentId, + runId: run.id, + wakeupRequestId: run.wakeupRequestId, + contextSnapshot: run.contextSnapshot as Record | null | undefined, + }); } async function assertAssignableUser(companyId: string, userId: string) { @@ -1118,30 +1528,26 @@ export function issueService(db: Db) { ]); for (const row of blockedByRows) { - empty.get(row.currentIssueId)?.blockedBy.push({ - id: row.relatedId, - identifier: row.identifier, - title: row.title, - status: row.status as IssueRelationIssueSummary["status"], - priority: row.priority as IssueRelationIssueSummary["priority"], - assigneeAgentId: row.assigneeAgentId, - assigneeUserId: row.assigneeUserId, - }); + empty.get(row.currentIssueId)?.blockedBy.push(summarizeIssueRelationRow(row)); } for (const row of blockingRows) { - empty.get(row.currentIssueId)?.blocks.push({ - id: row.relatedId, - identifier: row.identifier, - title: row.title, - status: row.status as IssueRelationIssueSummary["status"], - priority: row.priority as IssueRelationIssueSummary["priority"], - assigneeAgentId: row.assigneeAgentId, - assigneeUserId: row.assigneeUserId, - }); + empty.get(row.currentIssueId)?.blocks.push(summarizeIssueRelationRow(row)); } + const terminalByRoot = await terminalExplicitBlockersByRoot( + companyId, + [...empty.values()].flatMap((relations) => relations.blockedBy), + dbOrTx, + ); + for (const relations of empty.values()) { relations.blockedBy.sort((a, b) => a.title.localeCompare(b.title)); + for (const blocker of relations.blockedBy) { + const terminalBlockers = terminalByRoot.get(blocker.id); + if (terminalBlockers && terminalBlockers.length > 0) { + blocker.terminalBlockers = terminalBlockers; + } + } relations.blocks.sort((a, b) => a.title.localeCompare(b.title)); } @@ -1519,6 +1925,7 @@ export function issueService(db: Db) { ]); const statsByIssueId = new Map(statsRows.map((row) => [row.issueId, row])); const lastActivityByIssueId = new Map(lastActivityRows.map((row) => [row.issueId, row])); + const blockerAttentionByIssueId = await listIssueBlockerAttentionMap(db, companyId, withRuns); if (!contextUserId) { return withRuns.map((row) => { @@ -1531,6 +1938,7 @@ export function issueService(db: Db) { return { ...row, lastActivityAt, + ...(blockerAttentionByIssueId.has(row.id) ? { blockerAttention: blockerAttentionByIssueId.get(row.id) } : {}), }; }); } @@ -1547,6 +1955,7 @@ export function issueService(db: Db) { return { ...row, lastActivityAt, + ...(blockerAttentionByIssueId.has(row.id) ? { blockerAttention: blockerAttentionByIssueId.get(row.id) } : {}), ...deriveIssueUserContext(row, contextUserId, { myLastCommentAt: statsByIssueId.get(row.id)?.myLastCommentAt ?? null, myLastReadAt: readByIssueId.get(row.id) ?? null, @@ -1690,6 +2099,14 @@ export function issueService(db: Db) { return listIssueDependencyReadinessMap(dbOrTx, companyId, issueIds); }, + listBlockerAttention: async ( + companyId: string, + issueRows: IssueBlockerAttentionInputNode[], + dbOrTx: any = db, + ) => { + return listIssueBlockerAttentionMap(dbOrTx, companyId, issueRows); + }, + listWakeableBlockedDependents: async (blockerIssueId: string) => { const blockerIssue = await db .select({ id: issues.id, companyId: issues.companyId }) diff --git a/server/src/services/recovery/index.ts b/server/src/services/recovery/index.ts new file mode 100644 index 00000000..521287f1 --- /dev/null +++ b/server/src/services/recovery/index.ts @@ -0,0 +1,43 @@ +export { + RECOVERY_KEY_PREFIXES, + RECOVERY_ORIGIN_KINDS, + RECOVERY_REASON_KINDS, + buildIssueGraphLivenessIncidentKey, + buildIssueGraphLivenessLeafKey, + parseIssueGraphLivenessIncidentKey, +} from "./origins.js"; +export type { + RecoveryKeyPrefix, + RecoveryOriginKind, + RecoveryReasonKind, +} from "./origins.js"; +export { + classifyIssueGraphLiveness, +} from "./issue-graph-liveness.js"; +export type { + IssueGraphLivenessInput, + IssueLivenessAgentInput, + IssueLivenessDependencyPathEntry, + IssueLivenessExecutionPathInput, + IssueLivenessFinding, + IssueLivenessIssueInput, + IssueLivenessOwnerCandidate, + IssueLivenessOwnerCandidateReason, + IssueLivenessRelationInput, + IssueLivenessSeverity, + IssueLivenessState, +} from "./issue-graph-liveness.js"; +export { + recoveryService, +} from "./service.js"; +export { + DEFAULT_MAX_LIVENESS_CONTINUATION_ATTEMPTS, + RUN_LIVENESS_CONTINUATION_REASON, + buildRunLivenessContinuationIdempotencyKey, + decideRunLivenessContinuation, + findExistingRunLivenessContinuationWake, + readContinuationAttempt, +} from "./run-liveness-continuations.js"; +export type { + RunContinuationDecision, +} from "./run-liveness-continuations.js"; diff --git a/server/src/services/recovery/issue-graph-liveness.ts b/server/src/services/recovery/issue-graph-liveness.ts new file mode 100644 index 00000000..598d9ca3 --- /dev/null +++ b/server/src/services/recovery/issue-graph-liveness.ts @@ -0,0 +1,414 @@ +import { buildIssueGraphLivenessIncidentKey } from "./origins.js"; + +export type IssueLivenessSeverity = "warning" | "critical"; + +export type IssueLivenessState = + | "blocked_by_unassigned_issue" + | "blocked_by_uninvokable_assignee" + | "blocked_by_cancelled_issue" + | "invalid_review_participant"; + +export interface IssueLivenessIssueInput { + id: string; + companyId: string; + identifier: string | null; + title: string; + status: string; + projectId?: string | null; + goalId?: string | null; + parentId?: string | null; + assigneeAgentId?: string | null; + assigneeUserId?: string | null; + createdByAgentId?: string | null; + createdByUserId?: string | null; + executionState?: Record | null; +} + +export interface IssueLivenessRelationInput { + companyId: string; + blockerIssueId: string; + blockedIssueId: string; +} + +export interface IssueLivenessAgentInput { + id: string; + companyId: string; + name: string; + role: string; + title?: string | null; + status: string; + reportsTo?: string | null; +} + +export interface IssueLivenessExecutionPathInput { + companyId: string; + issueId: string | null; + agentId?: string | null; + status: string; +} + +export interface IssueLivenessDependencyPathEntry { + issueId: string; + identifier: string | null; + title: string; + status: string; +} + +export type IssueLivenessOwnerCandidateReason = + | "stalled_blocker_assignee" + | "assignee_reporting_chain" + | "creator_reporting_chain" + | "root_agent" + | "ordered_invokable_fallback"; + +export interface IssueLivenessOwnerCandidate { + agentId: string; + reason: IssueLivenessOwnerCandidateReason; + sourceIssueId: string; +} + +export interface IssueLivenessFinding { + issueId: string; + companyId: string; + identifier: string | null; + state: IssueLivenessState; + severity: IssueLivenessSeverity; + reason: string; + dependencyPath: IssueLivenessDependencyPathEntry[]; + recoveryIssueId: string; + recommendedOwnerAgentId: string | null; + recommendedOwnerCandidateAgentIds: string[]; + recommendedOwnerCandidates: IssueLivenessOwnerCandidate[]; + recommendedAction: string; + incidentKey: string; +} + +export interface IssueGraphLivenessInput { + issues: IssueLivenessIssueInput[]; + relations: IssueLivenessRelationInput[]; + agents: IssueLivenessAgentInput[]; + activeRuns?: IssueLivenessExecutionPathInput[]; + queuedWakeRequests?: IssueLivenessExecutionPathInput[]; +} + +const INVOKABLE_AGENT_STATUSES = new Set(["active", "idle", "running", "error"]); +const BLOCKING_AGENT_STATUSES = new Set(["paused", "terminated", "pending_approval"]); + +function issueLabel(issue: IssueLivenessIssueInput) { + return issue.identifier ?? issue.id; +} + +function pathEntry(issue: IssueLivenessIssueInput): IssueLivenessDependencyPathEntry { + return { + issueId: issue.id, + identifier: issue.identifier, + title: issue.title, + status: issue.status, + }; +} + +function isInvokableAgent(agent: IssueLivenessAgentInput | null | undefined) { + return Boolean(agent && INVOKABLE_AGENT_STATUSES.has(agent.status)); +} + +function hasActiveExecutionPath( + companyId: string, + issueId: string, + activeRuns: IssueLivenessExecutionPathInput[], + queuedWakeRequests: IssueLivenessExecutionPathInput[], +) { + return [...activeRuns, ...queuedWakeRequests].some( + (entry) => entry.companyId === companyId && entry.issueId === issueId, + ); +} + +function readPrincipalAgentId(principal: unknown): string | null { + if (!principal || typeof principal !== "object") return null; + const value = principal as Record; + return value.type === "agent" && typeof value.agentId === "string" && value.agentId.length > 0 + ? value.agentId + : null; +} + +function principalIsResolvableUser(principal: unknown): boolean { + if (!principal || typeof principal !== "object") return false; + const value = principal as Record; + return value.type === "user" && typeof value.userId === "string" && value.userId.length > 0; +} + +function addOwnerCandidate( + candidates: IssueLivenessOwnerCandidate[], + seen: Set, + agentsById: Map, + companyId: string, + agentId: string | null | undefined, + reason: IssueLivenessOwnerCandidateReason, + sourceIssueId: string, +) { + if (!agentId || seen.has(agentId)) return; + const agent = agentsById.get(agentId); + if (!agent || agent.companyId !== companyId || !isInvokableAgent(agent)) return; + seen.add(agentId); + candidates.push({ agentId, reason, sourceIssueId }); +} + +function addAgentChainCandidates( + candidates: IssueLivenessOwnerCandidate[], + seen: Set, + startAgentId: string | null | undefined, + agentsById: Map, + companyId: string, + reason: IssueLivenessOwnerCandidateReason, + sourceIssueId: string, +) { + const chainSeen = new Set(); + let current = startAgentId ? agentsById.get(startAgentId) : null; + + while (current?.reportsTo) { + if (chainSeen.has(current.reportsTo)) break; + chainSeen.add(current.reportsTo); + const manager = agentsById.get(current.reportsTo); + if (!manager || manager.companyId !== companyId) break; + addOwnerCandidate(candidates, seen, agentsById, companyId, manager.id, reason, sourceIssueId); + current = manager; + } +} + +function orderedInvokableAgents(agents: IssueLivenessAgentInput[], companyId: string) { + return agents + .filter((agent) => agent.companyId === companyId && isInvokableAgent(agent)) + .sort((left, right) => left.id.localeCompare(right.id)); +} + +function ownerCandidatesForRecoveryIssue( + issue: IssueLivenessIssueInput, + agents: IssueLivenessAgentInput[], + agentsById: Map, + options: { + includeStalledAssignee?: boolean; + } = {}, +) { + const candidates: IssueLivenessOwnerCandidate[] = []; + const seen = new Set(); + + if (options.includeStalledAssignee && issue.status !== "cancelled" && issue.status !== "done") { + addOwnerCandidate( + candidates, + seen, + agentsById, + issue.companyId, + issue.assigneeAgentId, + "stalled_blocker_assignee", + issue.id, + ); + } + + addAgentChainCandidates( + candidates, + seen, + issue.assigneeAgentId, + agentsById, + issue.companyId, + "assignee_reporting_chain", + issue.id, + ); + addAgentChainCandidates( + candidates, + seen, + issue.createdByAgentId, + agentsById, + issue.companyId, + "creator_reporting_chain", + issue.id, + ); + + const invokableAgents = orderedInvokableAgents(agents, issue.companyId); + for (const agent of invokableAgents) { + if (!agent.reportsTo) { + addOwnerCandidate(candidates, seen, agentsById, issue.companyId, agent.id, "root_agent", issue.id); + } + } + for (const agent of invokableAgents) { + addOwnerCandidate( + candidates, + seen, + agentsById, + issue.companyId, + agent.id, + "ordered_invokable_fallback", + issue.id, + ); + } + + return candidates; +} + +function incidentKey(input: { + companyId: string; + issueId: string; + state: IssueLivenessState; + blockerIssueId?: string | null; + participantAgentId?: string | null; +}) { + return buildIssueGraphLivenessIncidentKey(input); +} + +function finding(input: { + issue: IssueLivenessIssueInput; + state: IssueLivenessState; + severity?: IssueLivenessSeverity; + reason: string; + dependencyPath: IssueLivenessIssueInput[]; + recoveryIssue: IssueLivenessIssueInput; + recommendedOwnerCandidateAgentIds: string[]; + recommendedOwnerCandidates: IssueLivenessOwnerCandidate[]; + recommendedAction: string; + blockerIssueId?: string | null; + participantAgentId?: string | null; +}): IssueLivenessFinding { + return { + issueId: input.issue.id, + companyId: input.issue.companyId, + identifier: input.issue.identifier, + state: input.state, + severity: input.severity ?? "critical", + reason: input.reason, + dependencyPath: input.dependencyPath.map(pathEntry), + recoveryIssueId: input.recoveryIssue.id, + recommendedOwnerAgentId: input.recommendedOwnerCandidateAgentIds[0] ?? null, + recommendedOwnerCandidateAgentIds: input.recommendedOwnerCandidateAgentIds, + recommendedOwnerCandidates: input.recommendedOwnerCandidates, + recommendedAction: input.recommendedAction, + incidentKey: incidentKey({ + companyId: input.issue.companyId, + issueId: input.issue.id, + state: input.state, + blockerIssueId: input.blockerIssueId, + participantAgentId: input.participantAgentId, + }), + }; +} + +export function classifyIssueGraphLiveness(input: IssueGraphLivenessInput): IssueLivenessFinding[] { + const issuesById = new Map(input.issues.map((issue) => [issue.id, issue])); + const agentsById = new Map(input.agents.map((agent) => [agent.id, agent])); + const blockersByBlockedIssueId = new Map(); + const findings: IssueLivenessFinding[] = []; + const activeRuns = input.activeRuns ?? []; + const queuedWakeRequests = input.queuedWakeRequests ?? []; + + for (const relation of input.relations) { + const list = blockersByBlockedIssueId.get(relation.blockedIssueId) ?? []; + list.push(relation); + blockersByBlockedIssueId.set(relation.blockedIssueId, list); + } + + for (const issue of input.issues) { + if (issue.status === "blocked") { + const relations = blockersByBlockedIssueId.get(issue.id) ?? []; + for (const relation of relations) { + if (relation.companyId !== issue.companyId) continue; + const blocker = issuesById.get(relation.blockerIssueId); + if (!blocker || blocker.companyId !== issue.companyId || blocker.status === "done") continue; + const ownerCandidates = ownerCandidatesForRecoveryIssue(blocker, input.agents, agentsById, { + includeStalledAssignee: true, + }); + + if (blocker.status === "cancelled") { + findings.push(finding({ + issue, + state: "blocked_by_cancelled_issue", + reason: `${issueLabel(issue)} is still blocked by cancelled issue ${issueLabel(blocker)}.`, + dependencyPath: [issue, blocker], + recoveryIssue: blocker, + recommendedOwnerCandidateAgentIds: ownerCandidates.map((candidate) => candidate.agentId), + recommendedOwnerCandidates: ownerCandidates, + recommendedAction: + `Inspect ${issueLabel(blocker)} and either remove it from ${issueLabel(issue)}'s blockers or replace it with an actionable unblock issue.`, + blockerIssueId: blocker.id, + })); + continue; + } + + if (!blocker.assigneeAgentId && !blocker.assigneeUserId) { + if (hasActiveExecutionPath(issue.companyId, blocker.id, activeRuns, queuedWakeRequests)) continue; + findings.push(finding({ + issue, + state: "blocked_by_unassigned_issue", + reason: `${issueLabel(issue)} is blocked by unassigned issue ${issueLabel(blocker)} with no user owner.`, + dependencyPath: [issue, blocker], + recoveryIssue: blocker, + recommendedOwnerCandidateAgentIds: ownerCandidates.map((candidate) => candidate.agentId), + recommendedOwnerCandidates: ownerCandidates, + recommendedAction: + `Assign ${issueLabel(blocker)} to an owner who can complete it, or remove it from ${issueLabel(issue)}'s blockers if it is no longer required.`, + blockerIssueId: blocker.id, + })); + continue; + } + + if (!blocker.assigneeAgentId) continue; + if (hasActiveExecutionPath(issue.companyId, blocker.id, activeRuns, queuedWakeRequests)) continue; + + const blockerAgent = agentsById.get(blocker.assigneeAgentId); + if (!blockerAgent || blockerAgent.companyId !== issue.companyId || BLOCKING_AGENT_STATUSES.has(blockerAgent.status)) { + findings.push(finding({ + issue, + state: "blocked_by_uninvokable_assignee", + reason: blockerAgent + ? `${issueLabel(issue)} is blocked by ${issueLabel(blocker)}, but its assignee is ${blockerAgent.status}.` + : `${issueLabel(issue)} is blocked by ${issueLabel(blocker)}, but its assignee no longer exists.`, + dependencyPath: [issue, blocker], + recoveryIssue: blocker, + recommendedOwnerCandidateAgentIds: ownerCandidates.map((candidate) => candidate.agentId), + recommendedOwnerCandidates: ownerCandidates, + recommendedAction: + `Review ${issueLabel(blocker)} and assign it to an active owner or replace the blocker with an actionable issue.`, + blockerIssueId: blocker.id, + })); + } + } + } + + if (issue.status !== "in_review" || !issue.executionState) continue; + const ownerCandidates = ownerCandidatesForRecoveryIssue(issue, input.agents, agentsById); + const participant = issue.executionState.currentParticipant; + const participantAgentId = readPrincipalAgentId(participant); + if (participantAgentId) { + const participantAgent = agentsById.get(participantAgentId); + if (!isInvokableAgent(participantAgent) || participantAgent?.companyId !== issue.companyId) { + findings.push(finding({ + issue, + state: "invalid_review_participant", + reason: participantAgent + ? `${issueLabel(issue)} is in review, but current participant agent is ${participantAgent.status}.` + : `${issueLabel(issue)} is in review, but current participant agent cannot be resolved.`, + dependencyPath: [issue], + recoveryIssue: issue, + recommendedOwnerCandidateAgentIds: ownerCandidates.map((candidate) => candidate.agentId), + recommendedOwnerCandidates: ownerCandidates, + recommendedAction: + `Repair ${issueLabel(issue)}'s review participant or return the issue to an active assignee with a clear change request.`, + participantAgentId, + })); + } + continue; + } + + if (!principalIsResolvableUser(participant)) { + findings.push(finding({ + issue, + state: "invalid_review_participant", + reason: `${issueLabel(issue)} is in review, but its current participant cannot be resolved.`, + dependencyPath: [issue], + recoveryIssue: issue, + recommendedOwnerCandidateAgentIds: ownerCandidates.map((candidate) => candidate.agentId), + recommendedOwnerCandidates: ownerCandidates, + recommendedAction: + `Repair ${issueLabel(issue)}'s review participant or return the issue to an active assignee with a clear change request.`, + })); + } + } + + return findings; +} diff --git a/server/src/services/recovery/origins.ts b/server/src/services/recovery/origins.ts new file mode 100644 index 00000000..e86ff8a0 --- /dev/null +++ b/server/src/services/recovery/origins.ts @@ -0,0 +1,56 @@ +export const RECOVERY_ORIGIN_KINDS = { + issueGraphLivenessEscalation: "harness_liveness_escalation", + strandedIssueRecovery: "stranded_issue_recovery", + staleActiveRunEvaluation: "stale_active_run_evaluation", +} as const; + +export const RECOVERY_REASON_KINDS = { + runLivenessContinuation: "run_liveness_continuation", +} as const; + +export const RECOVERY_KEY_PREFIXES = { + issueGraphLivenessIncident: "harness_liveness", + issueGraphLivenessLeaf: "harness_liveness_leaf", +} as const; + +export type RecoveryOriginKind = typeof RECOVERY_ORIGIN_KINDS[keyof typeof RECOVERY_ORIGIN_KINDS]; +export type RecoveryReasonKind = typeof RECOVERY_REASON_KINDS[keyof typeof RECOVERY_REASON_KINDS]; +export type RecoveryKeyPrefix = typeof RECOVERY_KEY_PREFIXES[keyof typeof RECOVERY_KEY_PREFIXES]; + +export function buildIssueGraphLivenessIncidentKey(input: { + companyId: string; + issueId: string; + state: string; + blockerIssueId?: string | null; + participantAgentId?: string | null; +}) { + return [ + RECOVERY_KEY_PREFIXES.issueGraphLivenessIncident, + input.companyId, + input.issueId, + input.state, + input.blockerIssueId ?? input.participantAgentId ?? "none", + ].join(":"); +} + +export function parseIssueGraphLivenessIncidentKey(incidentKey: string | null | undefined) { + if (!incidentKey) return null; + const parts = incidentKey.split(":"); + if (parts.length !== 5 || parts[0] !== RECOVERY_KEY_PREFIXES.issueGraphLivenessIncident) return null; + const [, companyId, issueId, state, leafIssueId] = parts; + if (!companyId || !issueId || !state || !leafIssueId) return null; + return { companyId, issueId, state, leafIssueId }; +} + +export function buildIssueGraphLivenessLeafKey(input: { + companyId: string; + state: string; + leafIssueId: string; +}) { + return [ + RECOVERY_KEY_PREFIXES.issueGraphLivenessLeaf, + input.companyId, + input.state, + input.leafIssueId, + ].join(":"); +} diff --git a/server/src/services/recovery/pause-hold-guard.ts b/server/src/services/recovery/pause-hold-guard.ts new file mode 100644 index 00000000..9f996d81 --- /dev/null +++ b/server/src/services/recovery/pause-hold-guard.ts @@ -0,0 +1,14 @@ +import type { Db } from "@paperclipai/db"; +import { issueTreeControlService } from "../issue-tree-control.js"; + +type IssueTreeControlService = ReturnType; + +export async function isAutomaticRecoverySuppressedByPauseHold( + db: Db, + companyId: string, + issueId: string, + treeControlSvc: IssueTreeControlService = issueTreeControlService(db), +) { + const activePauseHold = await treeControlSvc.getActivePauseHoldGate(companyId, issueId); + return Boolean(activePauseHold); +} diff --git a/server/src/services/recovery/run-liveness-continuations.ts b/server/src/services/recovery/run-liveness-continuations.ts new file mode 100644 index 00000000..1b4d2cf4 --- /dev/null +++ b/server/src/services/recovery/run-liveness-continuations.ts @@ -0,0 +1,189 @@ +import { and, eq, inArray } from "drizzle-orm"; +import type { Db } from "@paperclipai/db"; +import { agentWakeupRequests, agents, heartbeatRuns, issues } from "@paperclipai/db"; +import type { RunLivenessState } from "@paperclipai/shared"; +import { RECOVERY_REASON_KINDS } from "./origins.js"; + +export const RUN_LIVENESS_CONTINUATION_REASON = RECOVERY_REASON_KINDS.runLivenessContinuation; +export const DEFAULT_MAX_LIVENESS_CONTINUATION_ATTEMPTS = 2; + +const ACTIONABLE_LIVENESS_STATES = new Set(["plan_only", "empty_response"]); +const CONTINUATION_ACTIVE_ISSUE_STATUSES = new Set(["todo", "in_progress"]); +// A prior adapter error should not permanently suppress bounded liveness +// continuations; the max-attempt/idempotency guards prevent unbounded retries. +const CONTINUATION_AGENT_STATUSES = new Set(["active", "idle", "running", "error"]); +const IDEMPOTENT_WAKE_STATUSES = ["queued", "deferred_issue_execution", "completed"]; + +type HeartbeatRunRow = typeof heartbeatRuns.$inferSelect; +type IssueRow = Pick< + typeof issues.$inferSelect, + "id" | "companyId" | "identifier" | "title" | "status" | "assigneeAgentId" | "executionState" | "projectId" +>; +type AgentRow = Pick; + +export type RunContinuationDecision = + | { + kind: "enqueue"; + nextAttempt: number; + idempotencyKey: string; + payload: Record; + contextSnapshot: Record; + } + | { + kind: "exhausted"; + attempt: number; + maxAttempts: number; + comment: string; + } + | { + kind: "skip"; + reason: string; + }; + +export function readContinuationAttempt(value: unknown): number { + const numeric = typeof value === "number" ? value : Number.parseInt(String(value ?? ""), 10); + return Number.isFinite(numeric) && numeric > 0 ? Math.floor(numeric) : 0; +} + +export function buildRunLivenessContinuationIdempotencyKey(input: { + issueId: string; + sourceRunId: string; + livenessState: RunLivenessState; + nextAttempt: number; +}) { + return [ + RUN_LIVENESS_CONTINUATION_REASON, + input.issueId, + input.sourceRunId, + input.livenessState, + String(input.nextAttempt), + ].join(":"); +} + +export async function findExistingRunLivenessContinuationWake( + db: Db, + input: { + companyId: string; + idempotencyKey: string; + }, +) { + return db + .select({ id: agentWakeupRequests.id, status: agentWakeupRequests.status }) + .from(agentWakeupRequests) + .where( + and( + eq(agentWakeupRequests.companyId, input.companyId), + eq(agentWakeupRequests.idempotencyKey, input.idempotencyKey), + inArray(agentWakeupRequests.status, IDEMPOTENT_WAKE_STATUSES), + ), + ) + .limit(1) + .then((rows) => rows[0] ?? null); +} + +export function decideRunLivenessContinuation(input: { + run: HeartbeatRunRow; + issue: IssueRow | null; + agent: AgentRow | null; + livenessState: RunLivenessState | null; + livenessReason: string | null; + nextAction: string | null; + budgetBlocked: boolean; + idempotentWakeExists: boolean; + maxAttempts?: number; +}): RunContinuationDecision { + const { + run, + issue, + agent, + livenessState, + livenessReason, + nextAction, + budgetBlocked, + idempotentWakeExists, + } = input; + const maxAttempts = input.maxAttempts ?? DEFAULT_MAX_LIVENESS_CONTINUATION_ATTEMPTS; + + if (!livenessState || !ACTIONABLE_LIVENESS_STATES.has(livenessState)) { + return { kind: "skip", reason: "liveness state is not actionable for continuation" }; + } + if (!issue) return { kind: "skip", reason: "issue not found" }; + if (!agent) return { kind: "skip", reason: "agent not found" }; + if (issue.companyId !== run.companyId || agent.companyId !== run.companyId) { + return { kind: "skip", reason: "company scope mismatch" }; + } + if (issue.assigneeAgentId !== run.agentId) { + return { kind: "skip", reason: "issue is no longer assigned to the source run agent" }; + } + if (!CONTINUATION_ACTIVE_ISSUE_STATUSES.has(issue.status)) { + return { kind: "skip", reason: `issue status ${issue.status} is not continuable` }; + } + if (issue.executionState) { + return { kind: "skip", reason: "issue is blocked by execution policy state" }; + } + if (!CONTINUATION_AGENT_STATUSES.has(agent.status)) { + return { kind: "skip", reason: `agent status ${agent.status} is not invokable` }; + } + if (budgetBlocked) { + return { kind: "skip", reason: "budget hard stop blocks continuation" }; + } + + const currentAttempt = readContinuationAttempt(run.continuationAttempt); + if (currentAttempt >= maxAttempts) { + return { + kind: "exhausted", + attempt: currentAttempt, + maxAttempts, + comment: [ + "Bounded liveness continuation exhausted", + "", + `- Last liveness state: \`${livenessState}\``, + `- Attempts used: ${currentAttempt}/${maxAttempts}`, + `- Reason: ${livenessReason ?? "Run ended without concrete progress"}`, + "- Next action: a human or manager should inspect the run and either clarify the task, mark it blocked, or assign a concrete follow-up.", + ].join("\n"), + }; + } + + const nextAttempt = currentAttempt + 1; + const idempotencyKey = buildRunLivenessContinuationIdempotencyKey({ + issueId: issue.id, + sourceRunId: run.id, + livenessState, + nextAttempt, + }); + if (idempotentWakeExists) { + return { kind: "skip", reason: "continuation wake already exists for this source run and attempt" }; + } + + const payload = { + issueId: issue.id, + sourceRunId: run.id, + livenessState, + livenessReason, + continuationAttempt: nextAttempt, + maxContinuationAttempts: maxAttempts, + instruction: + nextAction ?? + "The previous run ended without concrete progress. Take the first concrete action now or mark the issue blocked with a specific unblock request.", + }; + + return { + kind: "enqueue", + nextAttempt, + idempotencyKey, + payload, + contextSnapshot: { + issueId: issue.id, + taskId: issue.id, + taskKey: issue.id, + wakeReason: RUN_LIVENESS_CONTINUATION_REASON, + livenessContinuationAttempt: nextAttempt, + livenessContinuationMaxAttempts: maxAttempts, + livenessContinuationSourceRunId: run.id, + livenessContinuationState: livenessState, + livenessContinuationReason: livenessReason, + livenessContinuationInstruction: payload.instruction, + }, + }; +} diff --git a/server/src/services/recovery/service.ts b/server/src/services/recovery/service.ts new file mode 100644 index 00000000..f2c2f58d --- /dev/null +++ b/server/src/services/recovery/service.ts @@ -0,0 +1,2143 @@ +import { and, asc, desc, eq, gt, inArray, isNull, notInArray, sql } from "drizzle-orm"; +import type { Db } from "@paperclipai/db"; +import { + agents, + agentWakeupRequests, + companies, + heartbeatRunEvents, + heartbeatRunWatchdogDecisions, + heartbeatRuns, + issueRelations, + issues, +} from "@paperclipai/db"; +import { parseObject, asBoolean, asNumber } from "../../adapters/utils.js"; +import { runningProcesses } from "../../adapters/index.js"; +import { forbidden, notFound } from "../../errors.js"; +import { logger } from "../../middleware/logger.js"; +import { redactCurrentUserText } from "../../log-redaction.js"; +import { redactSensitiveText } from "../../redaction.js"; +import { logActivity } from "../activity-log.js"; +import { budgetService } from "../budgets.js"; +import { instanceSettingsService } from "../instance-settings.js"; +import { issueTreeControlService } from "../issue-tree-control.js"; +import { issueService } from "../issues.js"; +import { getRunLogStore } from "../run-log-store.js"; +import { + RECOVERY_ORIGIN_KINDS, + buildIssueGraphLivenessLeafKey, + parseIssueGraphLivenessIncidentKey, +} from "./origins.js"; +import { + classifyIssueGraphLiveness, + type IssueLivenessFinding, +} from "./issue-graph-liveness.js"; +import { isAutomaticRecoverySuppressedByPauseHold } from "./pause-hold-guard.js"; + +const EXECUTION_PATH_HEARTBEAT_RUN_STATUSES = ["queued", "running", "scheduled_retry"] as const; +const UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES = ["failed", "cancelled", "timed_out"] as const; +const ISSUE_GRAPH_LIVENESS_AUTO_RECOVERY_MIN_STALE_MS = 24 * 60 * 60 * 1000; +export const ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS = 60 * 60 * 1000; +export const ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS = 4 * 60 * 60 * 1000; +export const ACTIVE_RUN_OUTPUT_CONTINUE_REARM_MS = 30 * 60 * 1000; +const ACTIVE_RUN_OUTPUT_EVIDENCE_TAIL_BYTES = 8 * 1024; +const STRANDED_ISSUE_RECOVERY_ORIGIN_KIND = RECOVERY_ORIGIN_KINDS.strandedIssueRecovery; +const STALE_ACTIVE_RUN_EVALUATION_ORIGIN_KIND = RECOVERY_ORIGIN_KINDS.staleActiveRunEvaluation; +const DEFERRED_WAKE_CONTEXT_KEY = "_paperclipWakeContext"; + +type RecoveryWakeupOptions = { + source?: "timer" | "assignment" | "on_demand" | "automation"; + triggerDetail?: "manual" | "ping" | "callback" | "system"; + reason?: string | null; + payload?: Record | null; + idempotencyKey?: string | null; + requestedByActorType?: "user" | "agent" | "system"; + requestedByActorId?: string | null; + contextSnapshot?: Record; +}; + +type RecoveryWakeup = ( + agentId: string, + opts?: RecoveryWakeupOptions, +) => Promise; + +type LatestIssueRun = Pick< + typeof heartbeatRuns.$inferSelect, + "id" | "agentId" | "status" | "error" | "errorCode" | "contextSnapshot" +> | null; + +type WatchdogDecisionActor = + | { type: "board"; userId?: string | null; runId?: string | null } + | { type: "agent"; agentId?: string | null; runId?: string | null } + | { type: "none" }; + +export type RunOutputSilenceSummary = { + lastOutputAt: Date | null; + lastOutputSeq: number; + lastOutputStream: "stdout" | "stderr" | null; + silenceStartedAt: Date | null; + silenceAgeMs: number | null; + level: "not_applicable" | "ok" | "suspicious" | "critical" | "snoozed"; + suspicionThresholdMs: number; + criticalThresholdMs: number; + snoozedUntil: Date | null; + evaluationIssueId: string | null; + evaluationIssueIdentifier: string | null; +}; + +function readNonEmptyString(value: unknown): string | null { + return typeof value === "string" && value.trim().length > 0 ? value : null; +} + +function summarizeRunFailureForIssueComment(run: LatestIssueRun) { + if (!run) return null; + + const errorCode = readNonEmptyString(run.errorCode)?.trim() ?? null; + const rawError = readNonEmptyString(run.error)?.trim() ?? null; + const apiMessageMatch = rawError?.match(/"message"\s*:\s*"([^"]+)"/); + const firstLine = rawError + ?.split(/\r?\n/) + .map((line) => line.trim()) + .find(Boolean) ?? null; + const summarySource = apiMessageMatch?.[1] ?? firstLine; + const summary = + summarySource && summarySource.length > 240 + ? `${summarySource.slice(0, 237)}...` + : summarySource; + + if (errorCode && summary) return ` Latest retry failure: \`${errorCode}\` - ${summary}.`; + if (errorCode) return ` Latest retry failure: \`${errorCode}\`.`; + if (summary) return ` Latest retry failure: ${summary}.`; + return null; +} + +function didAutomaticRecoveryFail( + latestRun: LatestIssueRun, + expectedRetryReason: "assignment_recovery" | "issue_continuation_needed", +) { + if (!latestRun) return false; + + const latestContext = parseObject(latestRun.contextSnapshot); + const latestRetryReason = readNonEmptyString(latestContext.retryReason); + return latestRetryReason === expectedRetryReason && + UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES.includes( + latestRun.status as (typeof UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES)[number], + ); +} + +function issueIdFromRunContext(contextSnapshot: unknown) { + const context = parseObject(contextSnapshot); + return readNonEmptyString(context.issueId) ?? readNonEmptyString(context.taskId); +} + +function issueIdFromWakePayload(payload: unknown) { + const parsed = parseObject(payload); + const nestedContext = parseObject(parsed[DEFERRED_WAKE_CONTEXT_KEY]); + return readNonEmptyString(parsed.issueId) ?? + readNonEmptyString(nestedContext.issueId) ?? + readNonEmptyString(nestedContext.taskId); +} + +function issueUiLink(issue: { identifier: string | null; id: string }, prefix: string) { + const label = issue.identifier ?? issue.id; + return `[${label}](/${prefix}/issues/${label})`; +} + +function runUiLink(run: { id: string; agentId: string }, prefix: string) { + return `[${run.id}](/${prefix}/agents/${run.agentId}/runs/${run.id})`; +} + +function formatDuration(ms: number | null) { + if (ms === null) return "unknown"; + const minutes = Math.floor(ms / 60_000); + if (minutes < 60) return `${minutes}m`; + const hours = Math.floor(minutes / 60); + const remainingMinutes = minutes % 60; + return remainingMinutes > 0 ? `${hours}h ${remainingMinutes}m` : `${hours}h`; +} + +function formatIssueLinksForComment(relations: Array<{ identifier?: string | null }>) { + const identifiers = [ + ...new Set( + relations + .map((relation) => relation.identifier) + .filter((identifier): identifier is string => Boolean(identifier)), + ), + ]; + if (identifiers.length === 0) return "another open issue"; + return identifiers + .slice(0, 5) + .map((identifier) => { + const prefix = identifier.split("-")[0] || "PAP"; + return `[${identifier}](/${prefix}/issues/${identifier})`; + }) + .join(", "); +} + +function isAgentInvokable(agent: typeof agents.$inferSelect | null | undefined) { + return Boolean(agent && !["paused", "terminated", "pending_approval"].includes(agent.status)); +} + +function parseLivenessIncidentKey(incidentKey: string | null | undefined) { + if (!incidentKey) return null; + return parseIssueGraphLivenessIncidentKey(incidentKey); +} + +function livenessRecoveryLeafIssueId(finding: IssueLivenessFinding) { + return finding.recoveryIssueId; +} + +function livenessRecoveryLeafFingerprint(finding: IssueLivenessFinding) { + return buildIssueGraphLivenessLeafKey({ + companyId: finding.companyId, + state: finding.state, + leafIssueId: livenessRecoveryLeafIssueId(finding), + }); +} + +function livenessRecoveryLeafKey(companyId: string, state: string, leafIssueId: string) { + return buildIssueGraphLivenessLeafKey({ companyId, state, leafIssueId }); +} + +function isUniqueLivenessRecoveryConflict(error: unknown) { + if (!error || typeof error !== "object") return false; + const maybe = error as { code?: string; constraint?: string; message?: string }; + return maybe.code === "23505" && + ( + maybe.constraint === "issues_active_liveness_recovery_incident_uq" || + maybe.constraint === "issues_active_liveness_recovery_leaf_uq" || + typeof maybe.message === "string" && + ( + maybe.message.includes("issues_active_liveness_recovery_incident_uq") || + maybe.message.includes("issues_active_liveness_recovery_leaf_uq") + ) + ); +} + +function formatDependencyPath(finding: IssueLivenessFinding) { + return finding.dependencyPath + .map((entry) => entry.identifier ?? entry.issueId) + .join(" -> "); +} + +function buildLivenessEscalationDescription(finding: IssueLivenessFinding) { + const source = finding.dependencyPath[0]; + const recovery = finding.dependencyPath.find((entry) => entry.issueId === finding.recoveryIssueId); + const selectedOwner = finding.recommendedOwnerAgentId ?? "none"; + + return [ + "Paperclip detected a harness-level issue graph liveness incident.", + "", + "## Source", + "", + `- Source issue: ${source?.identifier ?? source?.issueId ?? finding.issueId}`, + `- Recovery target issue: ${recovery?.identifier ?? recovery?.issueId ?? finding.recoveryIssueId}`, + `- Incident key: \`${finding.incidentKey}\``, + `- Detected invariant: \`${finding.state}\``, + `- Dependency path: ${formatDependencyPath(finding)}`, + `- Reason: ${finding.reason}`, + "", + "## Ownership", + "", + `- Selected owner agent: \`${selectedOwner}\``, + `- Candidate owner agents: ${finding.recommendedOwnerCandidateAgentIds.length > 0 ? finding.recommendedOwnerCandidateAgentIds.map((id) => `\`${id}\``).join(", ") : "none"}`, + "", + "## Next Action", + "", + finding.recommendedAction, + "", + "Resolve the blocked chain, then mark this escalation issue done so the original issue can resume when all blockers are cleared.", + ].join("\n"); +} + +function buildLivenessOriginalIssueComment(finding: IssueLivenessFinding, escalation: typeof issues.$inferSelect) { + return [ + "Paperclip detected a harness-level liveness incident in this issue's dependency graph.", + "", + `- Escalation issue: ${escalation.identifier ?? escalation.id}`, + `- Incident key: \`${finding.incidentKey}\``, + `- Finding: \`${finding.state}\``, + `- Dependency path: ${formatDependencyPath(finding)}`, + `- Reason: ${finding.reason}`, + `- Manager action requested: ${finding.recommendedAction}`, + "", + "This issue now keeps its existing blockers and is also blocked by the escalation issue so dependency wakeups remain explicit.", + ].join("\n"); +} + +export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup }) { + const issuesSvc = issueService(db); + const treeControlSvc = issueTreeControlService(db); + const budgets = budgetService(db); + const instanceSettings = instanceSettingsService(db); + const runLogStore = getRunLogStore(); + + const getCurrentUserRedactionOptions = async () => ({ + enabled: (await instanceSettings.getGeneral()).censorUsernameInLogs, + }); + + async function getAgent(agentId: string) { + return db.select().from(agents).where(eq(agents.id, agentId)).then((rows) => rows[0] ?? null); + } + + async function getLatestIssueRun(companyId: string, issueId: string): Promise { + return db + .select({ + id: heartbeatRuns.id, + agentId: heartbeatRuns.agentId, + status: heartbeatRuns.status, + error: heartbeatRuns.error, + errorCode: heartbeatRuns.errorCode, + contextSnapshot: heartbeatRuns.contextSnapshot, + }) + .from(heartbeatRuns) + .where( + and( + eq(heartbeatRuns.companyId, companyId), + sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${issueId}`, + ), + ) + .orderBy(desc(heartbeatRuns.createdAt), desc(heartbeatRuns.id)) + .limit(1) + .then((rows) => rows[0] ?? null); + } + + async function hasActiveExecutionPath(companyId: string, issueId: string) { + const [run, deferredWake] = await Promise.all([ + db + .select({ id: heartbeatRuns.id }) + .from(heartbeatRuns) + .where( + and( + eq(heartbeatRuns.companyId, companyId), + inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES]), + sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${issueId}`, + ), + ) + .limit(1) + .then((rows) => rows[0] ?? null), + db + .select({ id: agentWakeupRequests.id }) + .from(agentWakeupRequests) + .where( + and( + eq(agentWakeupRequests.companyId, companyId), + eq(agentWakeupRequests.status, "deferred_issue_execution"), + sql`${agentWakeupRequests.payload} ->> 'issueId' = ${issueId}`, + ), + ) + .limit(1) + .then((rows) => rows[0] ?? null), + ]); + + return Boolean(run || deferredWake); + } + + async function enqueueStrandedIssueRecovery(input: { + issueId: string; + agentId: string; + reason: "issue_assignment_recovery" | "issue_continuation_needed"; + retryReason: "assignment_recovery" | "issue_continuation_needed"; + source: string; + retryOfRunId?: string | null; + }) { + const queued = await deps.enqueueWakeup(input.agentId, { + source: "automation", + triggerDetail: "system", + reason: input.reason, + payload: { + issueId: input.issueId, + ...(input.retryOfRunId ? { retryOfRunId: input.retryOfRunId } : {}), + }, + requestedByActorType: "system", + requestedByActorId: null, + contextSnapshot: { + issueId: input.issueId, + taskId: input.issueId, + wakeReason: input.reason, + retryReason: input.retryReason, + source: input.source, + ...(input.retryOfRunId ? { retryOfRunId: input.retryOfRunId } : {}), + }, + }); + + if (queued && input.retryOfRunId) { + return db + .update(heartbeatRuns) + .set({ + retryOfRunId: input.retryOfRunId, + updatedAt: new Date(), + }) + .where(eq(heartbeatRuns.id, queued.id)) + .returning() + .then((rows) => rows[0] ?? queued); + } + + return queued; + } + + async function reconcileUnassignedBlockingIssues() { + const candidates = await db + .select({ + id: issues.id, + companyId: issues.companyId, + identifier: issues.identifier, + status: issues.status, + createdByAgentId: issues.createdByAgentId, + }) + .from(issueRelations) + .innerJoin(issues, eq(issueRelations.issueId, issues.id)) + .where( + and( + eq(issueRelations.type, "blocks"), + inArray(issues.status, ["todo", "blocked"]), + isNull(issues.assigneeAgentId), + isNull(issues.assigneeUserId), + sql`${issues.createdByAgentId} is not null`, + sql`exists ( + select 1 + from issues blocked_issue + where blocked_issue.id = ${issueRelations.relatedIssueId} + and blocked_issue.company_id = ${issues.companyId} + and blocked_issue.status not in ('done', 'cancelled') + )`, + ), + ); + + let assigned = 0; + let skipped = 0; + const issueIds: string[] = []; + const seen = new Set(); + + for (const candidate of candidates) { + if (seen.has(candidate.id)) continue; + seen.add(candidate.id); + + const creatorAgentId = candidate.createdByAgentId; + if (!creatorAgentId) { + skipped += 1; + continue; + } + const creatorAgent = await getAgent(creatorAgentId); + if (!creatorAgent || creatorAgent.companyId !== candidate.companyId || !isAgentInvokable(creatorAgent)) { + skipped += 1; + continue; + } + + const relations = await issuesSvc.getRelationSummaries(candidate.id); + const blockingLinks = formatIssueLinksForComment(relations.blocks); + const updated = await issuesSvc.update(candidate.id, { + assigneeAgentId: creatorAgent.id, + assigneeUserId: null, + }); + if (!updated) { + skipped += 1; + continue; + } + + await issuesSvc.addComment( + candidate.id, + [ + "## Assigned Orphan Blocker", + "", + `Paperclip found this issue is blocking ${blockingLinks} but had no assignee, so no heartbeat could pick it up.`, + "", + "- Assigned it back to the agent that created the blocker.", + "- Next action: resolve this blocker or reassign it to the right owner.", + ].join("\n"), + {}, + ); + + await logActivity(db, { + companyId: candidate.companyId, + actorType: "system", + actorId: "system", + agentId: null, + runId: null, + action: "issue.updated", + entityType: "issue", + entityId: candidate.id, + details: { + identifier: candidate.identifier, + assigneeAgentId: creatorAgent.id, + source: "recovery.reconcile_unassigned_blocking_issue", + }, + }); + + const queued = await deps.enqueueWakeup(creatorAgent.id, { + source: "automation", + triggerDetail: "system", + reason: "issue_assigned", + payload: { + issueId: candidate.id, + mutation: "unassigned_blocker_recovery", + }, + requestedByActorType: "system", + requestedByActorId: null, + contextSnapshot: { + issueId: candidate.id, + taskId: candidate.id, + wakeReason: "issue_assigned", + source: "issue.unassigned_blocker_recovery", + }, + }); + + if (queued) { + assigned += 1; + issueIds.push(candidate.id); + } else { + skipped += 1; + } + } + + return { assigned, skipped, issueIds }; + } + + async function getCompanyIssuePrefix(companyId: string) { + return db + .select({ issuePrefix: companies.issuePrefix }) + .from(companies) + .where(eq(companies.id, companyId)) + .then((rows) => rows[0]?.issuePrefix ?? "PAP"); + } + + function staleActiveRunOriginFingerprint(companyId: string, runId: string) { + return `stale_active_run:${companyId}:${runId}`; + } + + function silenceStartedAtForRun(run: Pick) { + return run.lastOutputAt ?? run.processStartedAt ?? run.startedAt ?? run.createdAt ?? null; + } + + function silenceAgeMsForRun(run: Pick, now = new Date()) { + const startedAt = silenceStartedAtForRun(run); + return startedAt ? Math.max(0, now.getTime() - startedAt.getTime()) : null; + } + + async function latestActiveOutputQuietUntilDecision(companyId: string, runId: string, now = new Date()) { + const [row] = await db + .select() + .from(heartbeatRunWatchdogDecisions) + .where( + and( + eq(heartbeatRunWatchdogDecisions.companyId, companyId), + eq(heartbeatRunWatchdogDecisions.runId, runId), + inArray(heartbeatRunWatchdogDecisions.decision, ["snooze", "continue"]), + gt(heartbeatRunWatchdogDecisions.snoozedUntil, now), + ), + ) + .orderBy(desc(heartbeatRunWatchdogDecisions.createdAt)) + .limit(1); + return row ?? null; + } + + async function findOpenStaleRunEvaluation(companyId: string, runId: string) { + const [row] = await db + .select({ + id: issues.id, + identifier: issues.identifier, + status: issues.status, + priority: issues.priority, + assigneeAgentId: issues.assigneeAgentId, + updatedAt: issues.updatedAt, + }) + .from(issues) + .where( + and( + eq(issues.companyId, companyId), + eq(issues.originKind, STALE_ACTIVE_RUN_EVALUATION_ORIGIN_KIND), + eq(issues.originId, runId), + isNull(issues.hiddenAt), + notInArray(issues.status, ["done", "cancelled"]), + ), + ) + .limit(1); + return row ?? null; + } + + async function buildRunOutputSilence( + run: Pick< + typeof heartbeatRuns.$inferSelect, + "id" | "companyId" | "status" | "lastOutputAt" | "lastOutputSeq" | "lastOutputStream" | "processStartedAt" | "startedAt" | "createdAt" + >, + now = new Date(), + ): Promise { + const [quietUntilDecision, evaluation] = await Promise.all([ + latestActiveOutputQuietUntilDecision(run.companyId, run.id, now), + findOpenStaleRunEvaluation(run.companyId, run.id), + ]); + const silenceStartedAt = silenceStartedAtForRun(run); + const silenceAgeMs = run.status === "running" ? silenceAgeMsForRun(run, now) : null; + const level = run.status !== "running" + ? "not_applicable" + : quietUntilDecision + ? "snoozed" + : (silenceAgeMs ?? 0) >= ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS + ? "critical" + : (silenceAgeMs ?? 0) >= ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS + ? "suspicious" + : "ok"; + return { + lastOutputAt: run.lastOutputAt ?? null, + lastOutputSeq: run.lastOutputSeq ?? 0, + lastOutputStream: (run.lastOutputStream === "stdout" || run.lastOutputStream === "stderr") + ? run.lastOutputStream + : null, + silenceStartedAt, + silenceAgeMs, + level, + suspicionThresholdMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS, + criticalThresholdMs: ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS, + snoozedUntil: quietUntilDecision?.snoozedUntil ?? null, + evaluationIssueId: evaluation?.id ?? null, + evaluationIssueIdentifier: evaluation?.identifier ?? null, + }; + } + + function redactWatchdogEvidenceText(value: string, currentUserRedactionOptions: Awaited>) { + return redactSensitiveText(redactCurrentUserText(value, currentUserRedactionOptions)); + } + + function truncateEvidenceText(value: string, maxChars = 4000) { + if (value.length <= maxChars) return value; + return `${value.slice(value.length - maxChars)}\n[truncated earlier evidence]`; + } + + async function readRunLogTailForEvidence(run: typeof heartbeatRuns.$inferSelect) { + if (!run.logStore || !run.logRef || !run.logBytes) return ""; + try { + const offset = Math.max(0, run.logBytes - ACTIVE_RUN_OUTPUT_EVIDENCE_TAIL_BYTES); + const result = await runLogStore.read( + { store: run.logStore as "local_file", logRef: run.logRef }, + { offset, limitBytes: ACTIVE_RUN_OUTPUT_EVIDENCE_TAIL_BYTES }, + ); + return result.content; + } catch (err) { + logger.warn({ err, runId: run.id }, "failed to read stale-run watchdog evidence tail"); + return ""; + } + } + + async function resolveStaleRunSourceIssue(run: typeof heartbeatRuns.$inferSelect) { + const issueId = issueIdFromRunContext(run.contextSnapshot); + if (!issueId) return null; + const [issue] = await db + .select() + .from(issues) + .where(and(eq(issues.companyId, run.companyId), eq(issues.id, issueId), isNull(issues.hiddenAt))) + .limit(1); + return issue ?? null; + } + + async function resolveStaleRunOwnerAgentId(input: { + run: typeof heartbeatRuns.$inferSelect; + runningAgent: typeof agents.$inferSelect; + sourceIssue: typeof issues.$inferSelect | null; + }) { + const candidateIds: string[] = []; + if (input.sourceIssue?.assigneeAgentId) { + const sourceAssignee = await getAgent(input.sourceIssue.assigneeAgentId); + if (sourceAssignee?.reportsTo) candidateIds.push(sourceAssignee.reportsTo); + } + if (input.runningAgent.reportsTo) candidateIds.push(input.runningAgent.reportsTo); + const roleCandidates = await db + .select() + .from(agents) + .where(and(eq(agents.companyId, input.run.companyId), inArray(agents.role, ["cto", "ceo"]))) + .orderBy(sql`case when ${agents.role} = 'cto' then 0 else 1 end`, asc(agents.createdAt)); + candidateIds.push(...roleCandidates.map((agent) => agent.id)); + + const seen = new Set(); + for (const agentId of candidateIds) { + if (seen.has(agentId)) continue; + seen.add(agentId); + const candidate = await getAgent(agentId); + if (!candidate || candidate.companyId !== input.run.companyId) continue; + const budgetBlock = await budgets.getInvocationBlock(input.run.companyId, candidate.id, { + issueId: input.sourceIssue?.id ?? null, + projectId: input.sourceIssue?.projectId ?? null, + }); + if (isAgentInvokable(candidate) && !budgetBlock) return candidate.id; + } + + return null; + } + + async function collectStaleRunEvidence(input: { + run: typeof heartbeatRuns.$inferSelect; + runningAgent: typeof agents.$inferSelect; + sourceIssue: typeof issues.$inferSelect | null; + prefix: string; + now: Date; + }) { + const [tail, recentEvents, childIssues, blockers] = await Promise.all([ + readRunLogTailForEvidence(input.run), + db + .select({ + eventType: heartbeatRunEvents.eventType, + level: heartbeatRunEvents.level, + message: heartbeatRunEvents.message, + createdAt: heartbeatRunEvents.createdAt, + }) + .from(heartbeatRunEvents) + .where(and(eq(heartbeatRunEvents.companyId, input.run.companyId), eq(heartbeatRunEvents.runId, input.run.id))) + .orderBy(desc(heartbeatRunEvents.id)) + .limit(8), + input.sourceIssue + ? db + .select({ id: issues.id, identifier: issues.identifier, title: issues.title, status: issues.status }) + .from(issues) + .where(and(eq(issues.companyId, input.run.companyId), eq(issues.parentId, input.sourceIssue.id), isNull(issues.hiddenAt))) + .orderBy(desc(issues.updatedAt)) + .limit(8) + : Promise.resolve([]), + input.sourceIssue + ? db + .select({ id: issues.id, identifier: issues.identifier, title: issues.title, status: issues.status }) + .from(issueRelations) + .innerJoin(issues, eq(issueRelations.issueId, issues.id)) + .where( + and( + eq(issueRelations.companyId, input.run.companyId), + eq(issueRelations.relatedIssueId, input.sourceIssue.id), + eq(issueRelations.type, "blocks"), + ), + ) + .limit(8) + : Promise.resolve([]), + ]); + const currentUserRedactionOptions = await getCurrentUserRedactionOptions(); + const safeTail = truncateEvidenceText(redactWatchdogEvidenceText(tail, currentUserRedactionOptions)); + const silenceAgeMs = silenceAgeMsForRun(input.run, input.now); + return { + safeTail, + silenceAgeMs, + recentEvents: recentEvents.reverse().map((event) => ({ + eventType: event.eventType, + level: event.level, + createdAt: event.createdAt.toISOString(), + message: event.message ? truncateEvidenceText(redactWatchdogEvidenceText(event.message, currentUserRedactionOptions), 300) : null, + })), + childIssues, + blockers, + }; + } + + function buildStaleRunEvaluationDescription(input: { + run: typeof heartbeatRuns.$inferSelect; + runningAgent: typeof agents.$inferSelect; + sourceIssue: typeof issues.$inferSelect | null; + prefix: string; + evidence: Awaited>; + level: "suspicious" | "critical"; + now: Date; + }) { + const sourceIssue = input.sourceIssue + ? issueUiLink({ identifier: input.sourceIssue.identifier, id: input.sourceIssue.id }, input.prefix) + : "none"; + const recentEvents = input.evidence.recentEvents.length > 0 + ? input.evidence.recentEvents.map((event) => + `- ${event.createdAt} \`${event.eventType}\`${event.level ? ` ${event.level}` : ""}: ${event.message ?? "(no message)"}`, + ).join("\n") + : "- none"; + const childIssues = input.evidence.childIssues.length > 0 + ? input.evidence.childIssues.map((issue) => + `- ${issueUiLink({ identifier: issue.identifier, id: issue.id }, input.prefix)} \`${issue.status}\`: ${issue.title}`, + ).join("\n") + : "- none detected"; + const blockers = input.evidence.blockers.length > 0 + ? input.evidence.blockers.map((issue) => + `- ${issueUiLink({ identifier: issue.identifier, id: issue.id }, input.prefix)} \`${issue.status}\`: ${issue.title}`, + ).join("\n") + : "- none detected"; + return [ + `Paperclip detected ${input.level} output silence on an active heartbeat run.`, + "", + "## Run", + "", + `- Run: ${runUiLink(input.run, input.prefix)}`, + `- Agent: ${input.runningAgent.name} (${input.runningAgent.adapterType})`, + `- Invocation: ${input.run.invocationSource}${input.run.triggerDetail ? ` / ${input.run.triggerDetail}` : ""}`, + `- Source issue: ${sourceIssue}`, + `- Started at: ${input.run.startedAt?.toISOString() ?? "unknown"}`, + `- Process started at: ${input.run.processStartedAt?.toISOString() ?? "unknown"}`, + `- Last output at: ${input.run.lastOutputAt?.toISOString() ?? "none recorded"}`, + `- Last output sequence: ${input.run.lastOutputSeq ?? 0}`, + `- Silent for: ${formatDuration(input.evidence.silenceAgeMs)}`, + `- Thresholds: suspicious after ${formatDuration(ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS)}, critical after ${formatDuration(ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS)}`, + `- Process metadata: pid \`${input.run.processPid ?? "unknown"}\`, process group \`${input.run.processGroupId ?? "unknown"}\`, in-memory handle \`${runningProcesses.has(input.run.id) ? "yes" : "no"}\``, + "", + "## Last Output Excerpt", + "", + input.evidence.safeTail ? `\`\`\`text\n${input.evidence.safeTail}\n\`\`\`` : "_No run-log tail was available._", + "", + "## Recent Run Events", + "", + recentEvents, + "", + "## Related Work", + "", + "Active child issues:", + childIssues, + "", + "Current source blockers:", + blockers, + "", + "## Decision Checklist", + "", + "- Continue or snooze if the run is intentionally quiet.", + "- Ask the run owner for context if work may be delegated outside the transcript.", + "- Preserve artifacts, branch state, and useful output before cancellation.", + "- Cancel or recover through the explicit run recovery controls when authorized.", + "- Close this issue as a false positive only after recording the reason.", + ].join("\n"); + } + + function isUniqueStaleRunEvaluationConflict(error: unknown) { + if (!error || typeof error !== "object") return false; + const maybe = error as { code?: string; constraint?: string; message?: string }; + return maybe.code === "23505" && + ( + maybe.constraint === "issues_active_stale_run_evaluation_uq" || + typeof maybe.message === "string" && maybe.message.includes("issues_active_stale_run_evaluation_uq") + ); + } + + async function ensureSourceIssueBlockedByStaleEvaluation(input: { + sourceIssue: typeof issues.$inferSelect | null; + evaluationIssue: { id: string; identifier: string | null }; + run: typeof heartbeatRuns.$inferSelect; + }) { + if (!input.sourceIssue || ["done", "cancelled"].includes(input.sourceIssue.status)) return false; + const blockerIds = await existingBlockerIssueIds(input.sourceIssue.companyId, input.sourceIssue.id); + if (blockerIds.includes(input.evaluationIssue.id)) return false; + const nextBlockerIds = [...blockerIds, input.evaluationIssue.id]; + await issuesSvc.update(input.sourceIssue.id, { + ...(input.sourceIssue.status === "blocked" ? {} : { status: "blocked" }), + blockedByIssueIds: nextBlockerIds, + }); + await issuesSvc.addComment(input.sourceIssue.id, [ + "Paperclip detected critical output silence on this issue's active run.", + "", + `- Evaluation issue: ${input.evaluationIssue.identifier ?? input.evaluationIssue.id}`, + `- Run: \`${input.run.id}\``, + "", + "This blocks the source issue on the explicit review task without cancelling the active process.", + ].join("\n"), { runId: input.run.id }); + await logActivity(db, { + companyId: input.sourceIssue.companyId, + actorType: "system", + actorId: "system", + agentId: null, + runId: input.run.id, + action: "heartbeat.output_stale_escalated", + entityType: "issue", + entityId: input.sourceIssue.id, + details: { + source: "recovery.scan_silent_active_runs", + evaluationIssueId: input.evaluationIssue.id, + blockerIssueIds: nextBlockerIds, + }, + }); + return true; + } + + async function createOrUpdateStaleRunEvaluation(input: { + run: typeof heartbeatRuns.$inferSelect; + now: Date; + }) { + const runningAgent = await getAgent(input.run.agentId); + if (!runningAgent || runningAgent.companyId !== input.run.companyId) return { kind: "skipped" as const }; + const sourceIssue = await resolveStaleRunSourceIssue(input.run); + const prefix = await getCompanyIssuePrefix(input.run.companyId); + const evidence = await collectStaleRunEvidence({ + run: input.run, + runningAgent, + sourceIssue, + prefix, + now: input.now, + }); + const level = (evidence.silenceAgeMs ?? 0) >= ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS ? "critical" : "suspicious"; + const existing = await findOpenStaleRunEvaluation(input.run.companyId, input.run.id); + if (existing) { + if (level === "critical" && existing.priority !== "high") { + await issuesSvc.update(existing.id, { + priority: "high", + }); + await issuesSvc.addComment(existing.id, [ + "Critical output silence threshold crossed.", + "", + `- Run: \`${input.run.id}\``, + `- Silent for: ${formatDuration(evidence.silenceAgeMs)}`, + `- Last output at: ${input.run.lastOutputAt?.toISOString() ?? "none recorded"}`, + ].join("\n"), { runId: input.run.id }); + await ensureSourceIssueBlockedByStaleEvaluation({ + sourceIssue, + evaluationIssue: existing, + run: input.run, + }); + return { kind: "escalated" as const, evaluationIssueId: existing.id }; + } + if (level === "critical") { + await ensureSourceIssueBlockedByStaleEvaluation({ + sourceIssue, + evaluationIssue: existing, + run: input.run, + }); + } + return { kind: "existing" as const, evaluationIssueId: existing.id }; + } + + const ownerAgentId = await resolveStaleRunOwnerAgentId({ run: input.run, runningAgent, sourceIssue }); + const description = buildStaleRunEvaluationDescription({ + run: input.run, + runningAgent, + sourceIssue, + prefix, + evidence, + level, + now: input.now, + }); + let evaluation: Awaited>; + try { + evaluation = await issuesSvc.create(input.run.companyId, { + title: `Review silent active run for ${runningAgent.name}`, + description, + status: "todo", + priority: level === "critical" ? "high" : "medium", + parentId: sourceIssue && !["done", "cancelled"].includes(sourceIssue.status) ? sourceIssue.id : null, + projectId: sourceIssue?.projectId ?? null, + goalId: sourceIssue?.goalId ?? null, + billingCode: sourceIssue?.billingCode ?? null, + assigneeAgentId: ownerAgentId, + originKind: STALE_ACTIVE_RUN_EVALUATION_ORIGIN_KIND, + originId: input.run.id, + originRunId: input.run.id, + originFingerprint: staleActiveRunOriginFingerprint(input.run.companyId, input.run.id), + }); + } catch (error) { + if (!isUniqueStaleRunEvaluationConflict(error)) throw error; + const raced = await findOpenStaleRunEvaluation(input.run.companyId, input.run.id); + if (!raced) throw error; + return { kind: "existing" as const, evaluationIssueId: raced.id }; + } + + await logActivity(db, { + companyId: input.run.companyId, + actorType: "system", + actorId: "system", + agentId: ownerAgentId, + runId: input.run.id, + action: "heartbeat.output_stale_detected", + entityType: "issue", + entityId: evaluation.id, + details: { + source: "recovery.scan_silent_active_runs", + level, + sourceIssueId: sourceIssue?.id ?? null, + silenceAgeMs: evidence.silenceAgeMs, + lastOutputAt: input.run.lastOutputAt?.toISOString() ?? null, + }, + }); + if (level === "critical") { + await ensureSourceIssueBlockedByStaleEvaluation({ + sourceIssue, + evaluationIssue: evaluation, + run: input.run, + }); + } + if (ownerAgentId) { + await deps.enqueueWakeup(ownerAgentId, { + source: "assignment", + triggerDetail: "system", + reason: "issue_assigned", + payload: { + issueId: evaluation.id, + staleRunId: input.run.id, + sourceIssueId: sourceIssue?.id ?? null, + }, + requestedByActorType: "system", + requestedByActorId: null, + contextSnapshot: { + issueId: evaluation.id, + taskId: evaluation.id, + wakeReason: "issue_assigned", + source: STALE_ACTIVE_RUN_EVALUATION_ORIGIN_KIND, + staleRunId: input.run.id, + sourceIssueId: sourceIssue?.id ?? null, + }, + }); + } + return { kind: "created" as const, evaluationIssueId: evaluation.id }; + } + + async function scanSilentActiveRuns(opts?: { now?: Date; companyId?: string }) { + const now = opts?.now ?? new Date(); + const suspicionBefore = new Date(now.getTime() - ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS); + const candidates = await db + .select() + .from(heartbeatRuns) + .where( + and( + opts?.companyId ? eq(heartbeatRuns.companyId, opts.companyId) : undefined, + eq(heartbeatRuns.status, "running"), + sql`coalesce(${heartbeatRuns.lastOutputAt}, ${heartbeatRuns.processStartedAt}, ${heartbeatRuns.startedAt}, ${heartbeatRuns.createdAt}) <= ${suspicionBefore.toISOString()}::timestamptz`, + ), + ) + .orderBy(asc(heartbeatRuns.createdAt)) + .limit(100); + + const result = { + scanned: candidates.length, + created: 0, + existing: 0, + escalated: 0, + snoozed: 0, + skipped: 0, + evaluationIssueIds: [] as string[], + }; + + for (const run of candidates) { + if (await latestActiveOutputQuietUntilDecision(run.companyId, run.id, now)) { + result.snoozed += 1; + continue; + } + const outcome = await createOrUpdateStaleRunEvaluation({ run, now }); + if (outcome.kind === "created") result.created += 1; + else if (outcome.kind === "existing") result.existing += 1; + else if (outcome.kind === "escalated") result.escalated += 1; + else result.skipped += 1; + if ("evaluationIssueId" in outcome && outcome.evaluationIssueId) { + result.evaluationIssueIds.push(outcome.evaluationIssueId); + } + } + + return result; + } + + async function recordWatchdogDecision(input: { + runId: string; + actor: WatchdogDecisionActor; + decision: "snooze" | "continue" | "dismissed_false_positive"; + evaluationIssueId?: string | null; + reason?: string | null; + snoozedUntil?: Date | null; + createdByRunId?: string | null; + now?: Date; + }) { + const [run] = await db + .select() + .from(heartbeatRuns) + .where(eq(heartbeatRuns.id, input.runId)) + .limit(1); + if (!run) throw notFound("Heartbeat run not found"); + + let evaluationIssue: { + id: string; + assigneeAgentId: string | null; + companyId: string; + originKind: string; + originId: string | null; + hiddenAt: Date | null; + status: string; + } | null = null; + if (input.evaluationIssueId) { + evaluationIssue = await db + .select({ + id: issues.id, + assigneeAgentId: issues.assigneeAgentId, + companyId: issues.companyId, + originKind: issues.originKind, + originId: issues.originId, + hiddenAt: issues.hiddenAt, + status: issues.status, + }) + .from(issues) + .where(and(eq(issues.id, input.evaluationIssueId), eq(issues.companyId, run.companyId))) + .then((rows) => rows[0] ?? null); + if (!evaluationIssue) throw notFound("Evaluation issue not found"); + } + + const boardActor = input.actor.type === "board"; + const assignedRecoveryOwner = + input.actor.type === "agent" && + Boolean(input.actor.agentId) && + evaluationIssue !== null && + evaluationIssue.originKind === STALE_ACTIVE_RUN_EVALUATION_ORIGIN_KIND && + evaluationIssue.originId === run.id && + evaluationIssue.hiddenAt === null && + !["done", "cancelled"].includes(evaluationIssue.status) && + evaluationIssue?.assigneeAgentId === input.actor.agentId; + if (!boardActor && !assignedRecoveryOwner) { + throw forbidden("Only the board or the assigned recovery owner can record watchdog decisions"); + } + + if (evaluationIssue && ( + evaluationIssue.originKind !== STALE_ACTIVE_RUN_EVALUATION_ORIGIN_KIND || + evaluationIssue.originId !== run.id + )) { + throw forbidden("Watchdog decision evaluation issue is not bound to the target run"); + } + + if (input.actor.type === "agent" && !evaluationIssue) { + throw forbidden("Agent watchdog decisions require the target evaluation issue"); + } + + const createdByRunId = input.actor.type === "agent" + ? input.actor.runId ?? input.createdByRunId ?? null + : input.actor.type === "board" + ? input.actor.runId ?? input.createdByRunId ?? null + : null; + if (createdByRunId) { + const [creatorRun] = await db + .select({ id: heartbeatRuns.id, companyId: heartbeatRuns.companyId, agentId: heartbeatRuns.agentId }) + .from(heartbeatRuns) + .where(eq(heartbeatRuns.id, createdByRunId)) + .limit(1); + const sameCompany = creatorRun?.companyId === run.companyId; + const sameAgent = input.actor.type !== "agent" || creatorRun?.agentId === input.actor.agentId; + if (!creatorRun || !sameCompany || !sameAgent) { + throw forbidden("createdByRunId is not valid for this watchdog decision actor"); + } + } + + const decisionNow = input.now ?? new Date(); + const effectiveSnoozedUntil = input.decision === "snooze" + ? input.snoozedUntil ?? null + : input.decision === "continue" + ? input.snoozedUntil && input.snoozedUntil > decisionNow + ? input.snoozedUntil + : new Date(decisionNow.getTime() + ACTIVE_RUN_OUTPUT_CONTINUE_REARM_MS) + : null; + + const [row] = await db + .insert(heartbeatRunWatchdogDecisions) + .values({ + companyId: run.companyId, + runId: run.id, + evaluationIssueId: input.evaluationIssueId ?? null, + decision: input.decision, + snoozedUntil: effectiveSnoozedUntil, + reason: input.reason ?? null, + createdByAgentId: input.actor.type === "agent" ? input.actor.agentId ?? null : null, + createdByUserId: input.actor.type === "board" ? input.actor.userId ?? null : null, + createdByRunId, + }) + .returning(); + + await logActivity(db, { + companyId: run.companyId, + actorType: input.actor.type === "agent" ? "agent" : "user", + actorId: input.actor.type === "agent" + ? input.actor.agentId ?? "agent" + : input.actor.type === "board" + ? input.actor.userId ?? "board" + : "unknown", + agentId: input.actor.type === "agent" ? input.actor.agentId ?? null : null, + runId: run.id, + action: input.decision === "snooze" ? "heartbeat.watchdog_snoozed" : "heartbeat.watchdog_decision_recorded", + entityType: "heartbeat_run", + entityId: run.id, + details: { + source: "recovery.record_watchdog_decision", + decision: input.decision, + evaluationIssueId: input.evaluationIssueId ?? null, + snoozedUntil: effectiveSnoozedUntil?.toISOString() ?? null, + reason: input.reason ?? null, + }, + }); + + return row; + } + + async function findOpenStrandedIssueRecoveryIssue(companyId: string, sourceIssueId: string) { + return db + .select() + .from(issues) + .where( + and( + eq(issues.companyId, companyId), + eq(issues.originKind, STRANDED_ISSUE_RECOVERY_ORIGIN_KIND), + eq(issues.originId, sourceIssueId), + isNull(issues.hiddenAt), + notInArray(issues.status, ["done", "cancelled"]), + ), + ) + .orderBy(desc(issues.createdAt)) + .limit(1) + .then((rows) => rows[0] ?? null); + } + + async function resolveStrandedIssueRecoveryOwnerAgentId(issue: typeof issues.$inferSelect) { + const candidateIds: string[] = []; + if (issue.assigneeAgentId) { + const assignee = await getAgent(issue.assigneeAgentId); + if (assignee?.reportsTo) candidateIds.push(assignee.reportsTo); + } + if (issue.createdByAgentId) { + const creator = await getAgent(issue.createdByAgentId); + if (creator?.reportsTo) candidateIds.push(creator.reportsTo); + candidateIds.push(issue.createdByAgentId); + } + + const roleCandidates = await db + .select() + .from(agents) + .where(and(eq(agents.companyId, issue.companyId), inArray(agents.role, ["cto", "ceo"]))) + .orderBy(sql`case when ${agents.role} = 'cto' then 0 else 1 end`, asc(agents.createdAt)); + candidateIds.push(...roleCandidates.map((agent) => agent.id)); + if (issue.assigneeAgentId) candidateIds.push(issue.assigneeAgentId); + + const seen = new Set(); + for (const agentId of candidateIds) { + if (seen.has(agentId)) continue; + seen.add(agentId); + const candidate = await getAgent(agentId); + if (!candidate || candidate.companyId !== issue.companyId) continue; + const budgetBlock = await budgets.getInvocationBlock(issue.companyId, candidate.id, { + issueId: issue.id, + projectId: issue.projectId, + }); + if (isAgentInvokable(candidate) && !budgetBlock) return candidate.id; + } + + return null; + } + + function buildStrandedIssueRecoveryDescription(input: { + issue: typeof issues.$inferSelect; + latestRun: LatestIssueRun; + previousStatus: "todo" | "in_progress"; + prefix: string; + }) { + const sourceIssue = issueUiLink({ identifier: input.issue.identifier, id: input.issue.id }, input.prefix); + const runLink = input.latestRun + ? `[\`${input.latestRun.id}\`](/${input.prefix}/agents/${input.latestRun.agentId}/runs/${input.latestRun.id})` + : "none"; + const retryReason = readNonEmptyString(parseObject(input.latestRun?.contextSnapshot)?.retryReason) ?? "unknown"; + const failureSummary = summarizeRunFailureForIssueComment(input.latestRun); + + return [ + "Paperclip exhausted automatic recovery for an assigned issue and created this explicit recovery task.", + "", + "## Source", + "", + `- Source issue: ${sourceIssue}`, + `- Previous source status: \`${input.previousStatus}\``, + `- Latest retry run: ${runLink}`, + `- Latest retry status: \`${input.latestRun?.status ?? "unknown"}\``, + `- Detected invariant: \`stranded_assigned_issue\``, + `- Retry reason: \`${retryReason}\``, + failureSummary ? `- Failure: ${failureSummary.trim()}` : "- Failure: none recorded", + "", + "## Ownership", + "", + "- Selected owner: the first invokable manager/creator/executive candidate with budget available.", + "", + "## Required Action", + "", + "- Inspect the latest run and source issue state.", + "- Fix the runtime/adapter problem, reassign the source issue, or convert the source issue into a clear manual-review state.", + "- When the source issue has a live execution path or has been intentionally resolved, mark this recovery issue done.", + ].join("\n"); + } + + async function ensureStrandedIssueRecoveryIssue(input: { + issue: typeof issues.$inferSelect; + latestRun: LatestIssueRun; + previousStatus: "todo" | "in_progress"; + }) { + const existing = await findOpenStrandedIssueRecoveryIssue(input.issue.companyId, input.issue.id); + if (existing) return existing; + + const ownerAgentId = await resolveStrandedIssueRecoveryOwnerAgentId(input.issue); + if (!ownerAgentId) return null; + + const prefix = await getCompanyIssuePrefix(input.issue.companyId); + const recovery = await issuesSvc.create(input.issue.companyId, { + title: `Recover stalled issue ${input.issue.identifier ?? input.issue.title}`, + description: buildStrandedIssueRecoveryDescription({ + issue: input.issue, + latestRun: input.latestRun, + previousStatus: input.previousStatus, + prefix, + }), + status: "todo", + priority: input.issue.priority, + parentId: input.issue.id, + projectId: input.issue.projectId, + goalId: input.issue.goalId, + assigneeAgentId: ownerAgentId, + originKind: STRANDED_ISSUE_RECOVERY_ORIGIN_KIND, + originId: input.issue.id, + originRunId: input.latestRun?.id ?? null, + originFingerprint: [ + STRANDED_ISSUE_RECOVERY_ORIGIN_KIND, + input.issue.companyId, + input.issue.id, + input.latestRun?.id ?? "no-run", + ].join(":"), + billingCode: input.issue.billingCode, + inheritExecutionWorkspaceFromIssueId: input.issue.id, + }); + + await deps.enqueueWakeup(ownerAgentId, { + source: "assignment", + triggerDetail: "system", + reason: "issue_assigned", + payload: { + issueId: recovery.id, + sourceIssueId: input.issue.id, + strandedRunId: input.latestRun?.id ?? null, + }, + requestedByActorType: "system", + requestedByActorId: null, + contextSnapshot: { + issueId: recovery.id, + taskId: recovery.id, + wakeReason: "issue_assigned", + source: STRANDED_ISSUE_RECOVERY_ORIGIN_KIND, + sourceIssueId: input.issue.id, + strandedRunId: input.latestRun?.id ?? null, + }, + }); + + return recovery; + } + + async function existingBlockerIssueIds(companyId: string, issueId: string) { + return db + .select({ blockerIssueId: issueRelations.issueId }) + .from(issueRelations) + .where( + and( + eq(issueRelations.companyId, companyId), + eq(issueRelations.relatedIssueId, issueId), + eq(issueRelations.type, "blocks"), + ), + ) + .then((rows) => rows.map((row) => row.blockerIssueId)); + } + + async function existingUnresolvedBlockerIssueIds(companyId: string, issueId: string) { + return db + .select({ blockerIssueId: issueRelations.issueId }) + .from(issueRelations) + .innerJoin( + issues, + and( + eq(issues.companyId, issueRelations.companyId), + eq(issues.id, issueRelations.issueId), + ), + ) + .where( + and( + eq(issueRelations.companyId, companyId), + eq(issueRelations.relatedIssueId, issueId), + eq(issueRelations.type, "blocks"), + notInArray(issues.status, ["done", "cancelled"]), + ), + ) + .then((rows) => rows.map((row) => row.blockerIssueId)); + } + + async function escalateStrandedAssignedIssue(input: { + issue: typeof issues.$inferSelect; + previousStatus: "todo" | "in_progress"; + latestRun: LatestIssueRun; + comment: string; + }) { + const recoveryIssue = await ensureStrandedIssueRecoveryIssue({ + issue: input.issue, + previousStatus: input.previousStatus, + latestRun: input.latestRun, + }); + const blockerIds = await existingUnresolvedBlockerIssueIds(input.issue.companyId, input.issue.id); + const nextBlockerIds = recoveryIssue + ? [...new Set([...blockerIds, recoveryIssue.id])] + : blockerIds; + const updated = await issuesSvc.update(input.issue.id, { + status: "blocked", + blockedByIssueIds: nextBlockerIds, + }); + if (!updated) return null; + + const prefix = await getCompanyIssuePrefix(input.issue.companyId); + const recoveryLine = recoveryIssue + ? [ + "", + `- Recovery issue: ${issueUiLink({ identifier: recoveryIssue.identifier, id: recoveryIssue.id }, prefix)}`, + "- Next action: the recovery owner should either restore a live execution path or record the manual resolution, then mark the recovery issue done.", + ].join("\n") + : [ + "", + "- Recovery issue: none created because Paperclip could not find an invokable manager, creator, or executive owner with budget available.", + "- Next action: a board operator should assign an invokable recovery owner, fix the agent/runtime state, or record an intentional manual resolution.", + ].join("\n"); + + await issuesSvc.addComment(input.issue.id, `${input.comment}${recoveryLine}`, {}); + + await logActivity(db, { + companyId: input.issue.companyId, + actorType: "system", + actorId: "system", + agentId: null, + runId: null, + action: "issue.updated", + entityType: "issue", + entityId: input.issue.id, + details: { + identifier: input.issue.identifier, + status: "blocked", + previousStatus: input.previousStatus, + source: "recovery.reconcile_stranded_assigned_issue", + latestRunId: input.latestRun?.id ?? null, + latestRunStatus: input.latestRun?.status ?? null, + latestRunErrorCode: input.latestRun?.errorCode ?? null, + recoveryIssueId: recoveryIssue?.id ?? null, + blockerIssueIds: nextBlockerIds, + }, + }); + + return updated; + } + + async function reconcileStrandedAssignedIssues() { + const candidates = await db + .select() + .from(issues) + .where( + and( + isNull(issues.assigneeUserId), + inArray(issues.status, ["todo", "in_progress"]), + sql`${issues.assigneeAgentId} is not null`, + ), + ); + + const result = { + dispatchRequeued: 0, + continuationRequeued: 0, + orphanBlockersAssigned: 0, + escalated: 0, + skipped: 0, + issueIds: [] as string[], + }; + + for (const issue of candidates) { + const agentId = issue.assigneeAgentId; + if (!agentId) { + result.skipped += 1; + continue; + } + + const agent = await getAgent(agentId); + if (!agent || agent.companyId !== issue.companyId || !isAgentInvokable(agent)) { + result.skipped += 1; + continue; + } + + if (await hasActiveExecutionPath(issue.companyId, issue.id)) { + result.skipped += 1; + continue; + } + + if (await isAutomaticRecoverySuppressedByPauseHold(db, issue.companyId, issue.id, treeControlSvc)) { + result.skipped += 1; + continue; + } + + const latestRun = await getLatestIssueRun(issue.companyId, issue.id); + if (issue.status === "todo") { + if (!latestRun || latestRun.status === "succeeded") { + result.skipped += 1; + continue; + } + + if (didAutomaticRecoveryFail(latestRun, "assignment_recovery")) { + const failureSummary = summarizeRunFailureForIssueComment(latestRun); + const updated = await escalateStrandedAssignedIssue({ + issue, + previousStatus: "todo", + latestRun, + comment: + "Paperclip automatically retried dispatch for this assigned `todo` issue after a lost wake/run, " + + `but it still has no live execution path.${failureSummary ?? ""} ` + + "Moving it to `blocked` so it is visible for intervention.", + }); + if (updated) { + result.escalated += 1; + result.issueIds.push(issue.id); + } else { + result.skipped += 1; + } + continue; + } + + const queued = await enqueueStrandedIssueRecovery({ + issueId: issue.id, + agentId, + reason: "issue_assignment_recovery", + retryReason: "assignment_recovery", + source: "issue.assignment_recovery", + retryOfRunId: latestRun.id, + }); + if (queued) { + result.dispatchRequeued += 1; + result.issueIds.push(issue.id); + } else { + result.skipped += 1; + } + continue; + } + + if (!latestRun && !issue.checkoutRunId && !issue.executionRunId) { + result.skipped += 1; + continue; + } + if (didAutomaticRecoveryFail(latestRun, "issue_continuation_needed")) { + const failureSummary = summarizeRunFailureForIssueComment(latestRun); + const updated = await escalateStrandedAssignedIssue({ + issue, + previousStatus: "in_progress", + latestRun, + comment: + "Paperclip automatically retried continuation for this assigned `in_progress` issue after its live " + + `execution disappeared, but it still has no live execution path.${failureSummary ?? ""} ` + + "Moving it to `blocked` so it is visible for intervention.", + }); + if (updated) { + result.escalated += 1; + result.issueIds.push(issue.id); + } else { + result.skipped += 1; + } + continue; + } + + const queued = await enqueueStrandedIssueRecovery({ + issueId: issue.id, + agentId, + reason: "issue_continuation_needed", + retryReason: "issue_continuation_needed", + source: "issue.continuation_recovery", + retryOfRunId: latestRun?.id ?? issue.checkoutRunId ?? null, + }); + if (queued) { + result.continuationRequeued += 1; + result.issueIds.push(issue.id); + } else { + result.skipped += 1; + } + } + + const orphanBlockerRecovery = await reconcileUnassignedBlockingIssues(); + result.orphanBlockersAssigned = orphanBlockerRecovery.assigned; + result.skipped += orphanBlockerRecovery.skipped; + result.issueIds.push(...orphanBlockerRecovery.issueIds); + + return result; + } + + async function collectIssueGraphLivenessFindings() { + const [issueRows, relationRows, agentRows, activeRunRows, activeIssueRunRows, wakeRows] = await Promise.all([ + db + .select({ + id: issues.id, + companyId: issues.companyId, + identifier: issues.identifier, + title: issues.title, + status: issues.status, + projectId: issues.projectId, + goalId: issues.goalId, + parentId: issues.parentId, + assigneeAgentId: issues.assigneeAgentId, + assigneeUserId: issues.assigneeUserId, + createdByAgentId: issues.createdByAgentId, + createdByUserId: issues.createdByUserId, + executionState: issues.executionState, + }) + .from(issues) + .where( + and( + isNull(issues.hiddenAt), + notInArray(issues.originKind, [RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation]), + ), + ), + db + .select({ + companyId: issueRelations.companyId, + blockerIssueId: issueRelations.issueId, + blockedIssueId: issueRelations.relatedIssueId, + }) + .from(issueRelations) + .where(eq(issueRelations.type, "blocks")), + db + .select({ + id: agents.id, + companyId: agents.companyId, + name: agents.name, + role: agents.role, + title: agents.title, + status: agents.status, + reportsTo: agents.reportsTo, + }) + .from(agents), + db + .select({ + companyId: heartbeatRuns.companyId, + agentId: heartbeatRuns.agentId, + status: heartbeatRuns.status, + contextSnapshot: heartbeatRuns.contextSnapshot, + }) + .from(heartbeatRuns) + .where(inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES])), + db + .select({ + companyId: issues.companyId, + agentId: heartbeatRuns.agentId, + status: heartbeatRuns.status, + issueId: issues.id, + }) + .from(issues) + .innerJoin(heartbeatRuns, eq(issues.executionRunId, heartbeatRuns.id)) + .where( + and( + isNull(issues.hiddenAt), + notInArray(issues.originKind, [RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation]), + inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES]), + ), + ), + db + .select({ + companyId: agentWakeupRequests.companyId, + agentId: agentWakeupRequests.agentId, + status: agentWakeupRequests.status, + payload: agentWakeupRequests.payload, + }) + .from(agentWakeupRequests) + .where(inArray(agentWakeupRequests.status, ["queued", "deferred_issue_execution"])), + ]); + + return classifyIssueGraphLiveness({ + issues: issueRows, + relations: relationRows, + agents: agentRows, + activeRuns: activeRunRows.map((row) => ({ + companyId: row.companyId, + agentId: row.agentId, + status: row.status, + issueId: issueIdFromRunContext(row.contextSnapshot), + })).concat(activeIssueRunRows.map((row) => ({ + companyId: row.companyId, + agentId: row.agentId, + status: row.status, + issueId: row.issueId, + }))), + queuedWakeRequests: wakeRows.map((row) => ({ + companyId: row.companyId, + agentId: row.agentId, + status: row.status, + issueId: issueIdFromWakePayload(row.payload), + })), + }); + } + + async function findOpenLivenessEscalation(companyId: string, incidentKey: string) { + return db + .select() + .from(issues) + .where( + and( + eq(issues.companyId, companyId), + eq(issues.originKind, RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation), + eq(issues.originId, incidentKey), + isNull(issues.hiddenAt), + notInArray(issues.status, ["done", "cancelled"]), + ), + ) + .limit(1) + .then((rows) => rows[0] ?? null); + } + + async function findOpenLivenessRecoveryIssueForLeaf(finding: IssueLivenessFinding) { + const byFingerprint = await db + .select() + .from(issues) + .where( + and( + eq(issues.companyId, finding.companyId), + eq(issues.originKind, RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation), + eq(issues.originFingerprint, livenessRecoveryLeafFingerprint(finding)), + isNull(issues.hiddenAt), + notInArray(issues.status, ["done", "cancelled"]), + ), + ) + .limit(1) + .then((rows) => rows[0] ?? null); + if (byFingerprint) return byFingerprint; + + const leafIssueId = livenessRecoveryLeafIssueId(finding); + const openRecoveries = await db + .select() + .from(issues) + .where( + and( + eq(issues.companyId, finding.companyId), + eq(issues.originKind, RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation), + isNull(issues.hiddenAt), + notInArray(issues.status, ["done", "cancelled"]), + ), + ); + return openRecoveries.find((row) => { + const parsed = parseLivenessIncidentKey(row.originId); + return parsed?.state === finding.state && parsed.leafIssueId === leafIssueId; + }) ?? null; + } + + async function removeRecoveryBlockerFromSource(recovery: typeof issues.$inferSelect) { + const parsed = parseLivenessIncidentKey(recovery.originId); + if (!parsed) return false; + const sourceIssue = await db + .select() + .from(issues) + .where(and(eq(issues.companyId, recovery.companyId), eq(issues.id, parsed.issueId))) + .then((rows) => rows[0] ?? null); + if (!sourceIssue) return false; + + const blockerIds = await existingBlockerIssueIds(sourceIssue.companyId, sourceIssue.id); + if (!blockerIds.includes(recovery.id)) return false; + await issuesSvc.update(sourceIssue.id, { + blockedByIssueIds: blockerIds.filter((blockerId) => blockerId !== recovery.id), + }); + return true; + } + + async function hasActiveRunForIssueId(companyId: string, issueId: string) { + const [contextRun, issueRun] = await Promise.all([ + db + .select({ id: heartbeatRuns.id }) + .from(heartbeatRuns) + .where( + and( + eq(heartbeatRuns.companyId, companyId), + inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES]), + sql`(${heartbeatRuns.contextSnapshot}->>'issueId' = ${issueId} + OR ${heartbeatRuns.contextSnapshot}->>'taskId' = ${issueId})`, + ), + ) + .limit(1) + .then((rows) => rows[0] ?? null), + db + .select({ id: heartbeatRuns.id }) + .from(issues) + .innerJoin(heartbeatRuns, eq(issues.executionRunId, heartbeatRuns.id)) + .where( + and( + eq(issues.companyId, companyId), + eq(issues.id, issueId), + inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES]), + ), + ) + .limit(1) + .then((rows) => rows[0] ?? null), + ]); + return Boolean(contextRun || issueRun); + } + + async function retireObsoleteLivenessRecoveryIssues(findings: IssueLivenessFinding[]) { + const currentIncidentKeys = new Set(findings.map((finding) => finding.incidentKey)); + const currentLeafKeys = new Set( + findings.map((finding) => + livenessRecoveryLeafKey( + finding.companyId, + finding.state, + livenessRecoveryLeafIssueId(finding), + ), + ), + ); + const openRecoveries = await db + .select() + .from(issues) + .where( + and( + eq(issues.originKind, RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation), + isNull(issues.hiddenAt), + notInArray(issues.status, ["done", "cancelled"]), + ), + ); + const result = { + retired: 0, + activeSkipped: 0, + blockerRelationsRemoved: 0, + retiredIssueIds: [] as string[], + }; + + for (const recovery of openRecoveries) { + if (recovery.originId && currentIncidentKeys.has(recovery.originId)) continue; + const parsed = parseLivenessIncidentKey(recovery.originId); + if (!parsed) continue; + if ( + currentLeafKeys.has( + livenessRecoveryLeafKey(parsed.companyId, parsed.state, parsed.leafIssueId), + ) + ) { + continue; + } + if (await removeRecoveryBlockerFromSource(recovery)) { + result.blockerRelationsRemoved += 1; + } + if (await hasActiveRunForIssueId(recovery.companyId, recovery.id)) { + result.activeSkipped += 1; + continue; + } + await issuesSvc.update(recovery.id, { status: "cancelled" }); + result.retired += 1; + result.retiredIssueIds.push(recovery.id); + } + + return result; + } + + async function isLivenessFindingOldEnoughForAutoRecovery(finding: IssueLivenessFinding, now = new Date()) { + const issueIds = [...new Set(finding.dependencyPath.map((entry) => entry.issueId))]; + if (issueIds.length === 0) return false; + const rows = await db + .select({ id: issues.id, updatedAt: issues.updatedAt }) + .from(issues) + .where(and(eq(issues.companyId, finding.companyId), inArray(issues.id, issueIds))); + if (rows.length !== issueIds.length) return false; + const latestUpdatedAt = rows.reduce((latest, row) => + row.updatedAt > latest ? row.updatedAt : latest, + rows[0]!.updatedAt); + return now.getTime() - latestUpdatedAt.getTime() >= ISSUE_GRAPH_LIVENESS_AUTO_RECOVERY_MIN_STALE_MS; + } + + async function resolveEscalationOwnerAgentId( + finding: IssueLivenessFinding, + issue: typeof issues.$inferSelect, + ) { + const detailedCandidates = finding.recommendedOwnerCandidates.length > 0 + ? finding.recommendedOwnerCandidates + : finding.recommendedOwnerCandidateAgentIds.map((agentId) => ({ + agentId, + reason: "ordered_invokable_fallback" as const, + sourceIssueId: finding.recoveryIssueId, + })); + const seenCandidates = new Set(); + const candidates = detailedCandidates.filter((candidate) => { + if (seenCandidates.has(candidate.agentId)) return false; + seenCandidates.add(candidate.agentId); + return true; + }); + const budgetBlockedCandidateAgentIds: string[] = []; + + for (const candidate of candidates) { + const budgetBlock = await budgets.getInvocationBlock(issue.companyId, candidate.agentId, { + issueId: issue.id, + projectId: issue.projectId, + }); + if (!budgetBlock) { + return { + agentId: candidate.agentId, + reason: candidate.reason, + sourceIssueId: candidate.sourceIssueId, + candidateAgentIds: candidates.map((entry) => entry.agentId), + candidateReasons: candidates.map((entry) => ({ + agentId: entry.agentId, + reason: entry.reason, + sourceIssueId: entry.sourceIssueId, + })), + budgetBlockedCandidateAgentIds, + }; + } + budgetBlockedCandidateAgentIds.push(candidate.agentId); + } + + return null; + } + + function shouldReuseRecoveryExecutionWorkspace(input: { + finding: IssueLivenessFinding; + recoveryIssue: typeof issues.$inferSelect; + ownerAgentId: string; + }) { + if (input.finding.recoveryIssueId === input.finding.issueId) return false; + return input.recoveryIssue.assigneeAgentId === input.ownerAgentId; + } + + async function ensureIssueBlockedByEscalation(input: { + issue: typeof issues.$inferSelect; + escalationIssueId: string; + finding: IssueLivenessFinding; + runId?: string | null; + }) { + const blockerIds = await existingBlockerIssueIds(input.issue.companyId, input.issue.id); + const nextBlockerIds = [...new Set([...blockerIds, input.escalationIssueId])]; + const update: Partial & { blockedByIssueIds: string[] } = { + blockedByIssueIds: nextBlockerIds, + }; + if (input.issue.status !== "blocked") { + update.status = "blocked"; + } + + const updated = await issuesSvc.update(input.issue.id, update); + if (!updated) return null; + + await logActivity(db, { + companyId: input.issue.companyId, + actorType: "system", + actorId: "system", + agentId: null, + runId: input.runId ?? null, + action: "issue.blockers.updated", + entityType: "issue", + entityId: input.issue.id, + details: { + source: "recovery.reconcile_issue_graph_liveness", + incidentKey: input.finding.incidentKey, + findingState: input.finding.state, + blockerIssueIds: nextBlockerIds, + escalationIssueId: input.escalationIssueId, + status: update.status ?? input.issue.status, + previousStatus: input.issue.status, + }, + }); + + return updated; + } + + async function createIssueGraphLivenessEscalation(input: { + finding: IssueLivenessFinding; + runId?: string | null; + }) { + const issue = await db + .select() + .from(issues) + .where(eq(issues.id, input.finding.issueId)) + .then((rows) => rows[0] ?? null); + if (!issue || issue.companyId !== input.finding.companyId) return { kind: "skipped" as const }; + if (await isAutomaticRecoverySuppressedByPauseHold(db, issue.companyId, issue.id, treeControlSvc)) { + return { kind: "skipped" as const }; + } + + const recoveryIssue = await db + .select() + .from(issues) + .where(and(eq(issues.id, input.finding.recoveryIssueId), eq(issues.companyId, issue.companyId))) + .then((rows) => rows[0] ?? null); + if (!recoveryIssue) return { kind: "skipped" as const }; + + const existing = + await findOpenLivenessEscalation(issue.companyId, input.finding.incidentKey) ?? + await findOpenLivenessRecoveryIssueForLeaf(input.finding); + if (existing) { + await ensureIssueBlockedByEscalation({ + issue, + escalationIssueId: existing.id, + finding: input.finding, + runId: input.runId ?? null, + }); + return { kind: "existing" as const, escalationIssueId: existing.id }; + } + + const ownerSelection = await resolveEscalationOwnerAgentId(input.finding, recoveryIssue); + if (!ownerSelection) return { kind: "skipped" as const }; + const reuseRecoveryExecutionWorkspace = shouldReuseRecoveryExecutionWorkspace({ + finding: input.finding, + recoveryIssue, + ownerAgentId: ownerSelection.agentId, + }); + + let escalation: Awaited>; + try { + escalation = await issuesSvc.create(issue.companyId, { + title: `Unblock liveness incident for ${recoveryIssue.identifier ?? recoveryIssue.title}`, + description: buildLivenessEscalationDescription(input.finding), + status: "todo", + priority: "high", + parentId: recoveryIssue.id, + projectId: recoveryIssue.projectId, + goalId: recoveryIssue.goalId, + assigneeAgentId: ownerSelection.agentId, + originKind: RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation, + originId: input.finding.incidentKey, + originFingerprint: livenessRecoveryLeafFingerprint(input.finding), + billingCode: recoveryIssue.billingCode, + ...(reuseRecoveryExecutionWorkspace + ? { inheritExecutionWorkspaceFromIssueId: recoveryIssue.id } + : { + executionWorkspaceId: null, + executionWorkspacePreference: null, + executionWorkspaceSettings: null, + }), + }); + } catch (error) { + if (!isUniqueLivenessRecoveryConflict(error)) throw error; + const raced = + await findOpenLivenessEscalation(issue.companyId, input.finding.incidentKey) ?? + await findOpenLivenessRecoveryIssueForLeaf(input.finding); + if (!raced) throw error; + await ensureIssueBlockedByEscalation({ + issue, + escalationIssueId: raced.id, + finding: input.finding, + runId: input.runId ?? null, + }); + return { kind: "existing" as const, escalationIssueId: raced.id }; + } + + await ensureIssueBlockedByEscalation({ + issue, + escalationIssueId: escalation.id, + finding: input.finding, + runId: input.runId ?? null, + }); + + await issuesSvc.addComment( + issue.id, + buildLivenessOriginalIssueComment(input.finding, escalation), + { runId: input.runId ?? null }, + ); + + await logActivity(db, { + companyId: issue.companyId, + actorType: "system", + actorId: "system", + agentId: ownerSelection.agentId, + runId: input.runId ?? null, + action: "issue.harness_liveness_escalation_created", + entityType: "issue", + entityId: escalation.id, + details: { + source: "recovery.reconcile_issue_graph_liveness", + incidentKey: input.finding.incidentKey, + findingState: input.finding.state, + sourceIssueId: issue.id, + sourceIdentifier: issue.identifier, + recoveryIssueId: recoveryIssue.id, + recoveryIdentifier: recoveryIssue.identifier, + escalationIssueId: escalation.id, + escalationIdentifier: escalation.identifier, + dependencyPath: input.finding.dependencyPath, + ownerSelection: { + selectedAgentId: ownerSelection.agentId, + selectedReason: ownerSelection.reason, + selectedSourceIssueId: ownerSelection.sourceIssueId, + candidateAgentIds: ownerSelection.candidateAgentIds, + candidateReasons: ownerSelection.candidateReasons, + budgetBlockedCandidateAgentIds: ownerSelection.budgetBlockedCandidateAgentIds, + }, + workspaceSelection: { + reuseRecoveryExecutionWorkspace, + inheritedExecutionWorkspaceFromIssueId: reuseRecoveryExecutionWorkspace ? recoveryIssue.id : null, + projectWorkspaceSourceIssueId: recoveryIssue.id, + }, + }, + }); + + const wake = await deps.enqueueWakeup(ownerSelection.agentId, { + source: "assignment", + triggerDetail: "system", + reason: "issue_assigned", + payload: { + issueId: escalation.id, + sourceIssueId: issue.id, + recoveryIssueId: recoveryIssue.id, + incidentKey: input.finding.incidentKey, + }, + requestedByActorType: "system", + requestedByActorId: null, + contextSnapshot: { + issueId: escalation.id, + taskId: escalation.id, + wakeReason: "issue_assigned", + source: RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation, + sourceIssueId: issue.id, + recoveryIssueId: recoveryIssue.id, + incidentKey: input.finding.incidentKey, + }, + }); + + logger.warn({ + incidentKey: input.finding.incidentKey, + findingState: input.finding.state, + sourceIssueId: issue.id, + recoveryIssueId: recoveryIssue.id, + escalationIssueId: escalation.id, + ownerAgentId: ownerSelection.agentId, + ownerSelectionReason: ownerSelection.reason, + wakeupRunId: wake?.id ?? null, + }, "created issue graph liveness escalation"); + + return { kind: "created" as const, escalationIssueId: escalation.id }; + } + + async function reconcileIssueGraphLiveness(opts?: { runId?: string | null }) { + const findings = await collectIssueGraphLivenessFindings(); + const experimentalSettings = await instanceSettings.getExperimental(); + const autoRecoveryEnabled = asBoolean( + experimentalSettings.enableIssueGraphLivenessAutoRecovery, + false, + ); + const obsoleteRecoveryCleanup = await retireObsoleteLivenessRecoveryIssues(findings); + const result = { + findings: findings.length, + autoRecoveryEnabled, + escalationsCreated: 0, + existingEscalations: 0, + skipped: 0, + skippedAutoRecoveryDisabled: 0, + skippedAutoRecoveryTooYoung: 0, + obsoleteRecoveriesRetired: obsoleteRecoveryCleanup.retired, + obsoleteRecoveriesActiveSkipped: obsoleteRecoveryCleanup.activeSkipped, + obsoleteRecoveryBlockerRelationsRemoved: obsoleteRecoveryCleanup.blockerRelationsRemoved, + issueIds: [] as string[], + escalationIssueIds: [] as string[], + retiredRecoveryIssueIds: obsoleteRecoveryCleanup.retiredIssueIds, + }; + + if (!autoRecoveryEnabled) { + result.skippedAutoRecoveryDisabled = findings.length; + return result; + } + + const now = new Date(); + for (const finding of findings) { + if (!await isLivenessFindingOldEnoughForAutoRecovery(finding, now)) { + result.skippedAutoRecoveryTooYoung += 1; + result.skipped += 1; + continue; + } + const escalation = await createIssueGraphLivenessEscalation({ + finding, + runId: opts?.runId ?? null, + }); + if (escalation.kind === "created") { + result.escalationsCreated += 1; + result.issueIds.push(finding.issueId); + result.escalationIssueIds.push(escalation.escalationIssueId); + } else if (escalation.kind === "existing") { + result.existingEscalations += 1; + result.issueIds.push(finding.issueId); + result.escalationIssueIds.push(escalation.escalationIssueId); + } else { + result.skipped += 1; + } + } + + return result; + } + + function readRecoveryTimerIntervalMs(raw: unknown, fallback: number) { + return Math.max(1, Math.floor(asNumber(raw, fallback))); + } + + return { + buildRunOutputSilence, + escalateStrandedAssignedIssue, + recordWatchdogDecision, + scanSilentActiveRuns, + reconcileStrandedAssignedIssues, + reconcileIssueGraphLiveness, + readRecoveryTimerIntervalMs, + }; +} diff --git a/server/src/services/run-continuations.ts b/server/src/services/run-continuations.ts index 5269ad5a..c28d9a37 100644 --- a/server/src/services/run-continuations.ts +++ b/server/src/services/run-continuations.ts @@ -1,188 +1,11 @@ -import { and, eq, inArray } from "drizzle-orm"; -import type { Db } from "@paperclipai/db"; -import { agentWakeupRequests, agents, heartbeatRuns, issues } from "@paperclipai/db"; -import type { RunLivenessState } from "@paperclipai/shared"; - -export const RUN_LIVENESS_CONTINUATION_REASON = "run_liveness_continuation"; -export const DEFAULT_MAX_LIVENESS_CONTINUATION_ATTEMPTS = 2; - -const ACTIONABLE_LIVENESS_STATES = new Set(["plan_only", "empty_response"]); -const CONTINUATION_ACTIVE_ISSUE_STATUSES = new Set(["todo", "in_progress"]); -// A prior adapter error should not permanently suppress bounded liveness -// continuations; the max-attempt/idempotency guards prevent unbounded retries. -const CONTINUATION_AGENT_STATUSES = new Set(["active", "idle", "running", "error"]); -const IDEMPOTENT_WAKE_STATUSES = ["queued", "deferred_issue_execution", "completed"]; - -type HeartbeatRunRow = typeof heartbeatRuns.$inferSelect; -type IssueRow = Pick< - typeof issues.$inferSelect, - "id" | "companyId" | "identifier" | "title" | "status" | "assigneeAgentId" | "executionState" | "projectId" ->; -type AgentRow = Pick; - -export type RunContinuationDecision = - | { - kind: "enqueue"; - nextAttempt: number; - idempotencyKey: string; - payload: Record; - contextSnapshot: Record; - } - | { - kind: "exhausted"; - attempt: number; - maxAttempts: number; - comment: string; - } - | { - kind: "skip"; - reason: string; - }; - -export function readContinuationAttempt(value: unknown): number { - const numeric = typeof value === "number" ? value : Number.parseInt(String(value ?? ""), 10); - return Number.isFinite(numeric) && numeric > 0 ? Math.floor(numeric) : 0; -} - -export function buildRunLivenessContinuationIdempotencyKey(input: { - issueId: string; - sourceRunId: string; - livenessState: RunLivenessState; - nextAttempt: number; -}) { - return [ - "run_liveness_continuation", - input.issueId, - input.sourceRunId, - input.livenessState, - String(input.nextAttempt), - ].join(":"); -} - -export async function findExistingRunLivenessContinuationWake( - db: Db, - input: { - companyId: string; - idempotencyKey: string; - }, -) { - return db - .select({ id: agentWakeupRequests.id, status: agentWakeupRequests.status }) - .from(agentWakeupRequests) - .where( - and( - eq(agentWakeupRequests.companyId, input.companyId), - eq(agentWakeupRequests.idempotencyKey, input.idempotencyKey), - inArray(agentWakeupRequests.status, IDEMPOTENT_WAKE_STATUSES), - ), - ) - .limit(1) - .then((rows) => rows[0] ?? null); -} - -export function decideRunLivenessContinuation(input: { - run: HeartbeatRunRow; - issue: IssueRow | null; - agent: AgentRow | null; - livenessState: RunLivenessState | null; - livenessReason: string | null; - nextAction: string | null; - budgetBlocked: boolean; - idempotentWakeExists: boolean; - maxAttempts?: number; -}): RunContinuationDecision { - const { - run, - issue, - agent, - livenessState, - livenessReason, - nextAction, - budgetBlocked, - idempotentWakeExists, - } = input; - const maxAttempts = input.maxAttempts ?? DEFAULT_MAX_LIVENESS_CONTINUATION_ATTEMPTS; - - if (!livenessState || !ACTIONABLE_LIVENESS_STATES.has(livenessState)) { - return { kind: "skip", reason: "liveness state is not actionable for continuation" }; - } - if (!issue) return { kind: "skip", reason: "issue not found" }; - if (!agent) return { kind: "skip", reason: "agent not found" }; - if (issue.companyId !== run.companyId || agent.companyId !== run.companyId) { - return { kind: "skip", reason: "company scope mismatch" }; - } - if (issue.assigneeAgentId !== run.agentId) { - return { kind: "skip", reason: "issue is no longer assigned to the source run agent" }; - } - if (!CONTINUATION_ACTIVE_ISSUE_STATUSES.has(issue.status)) { - return { kind: "skip", reason: `issue status ${issue.status} is not continuable` }; - } - if (issue.executionState) { - return { kind: "skip", reason: "issue is blocked by execution policy state" }; - } - if (!CONTINUATION_AGENT_STATUSES.has(agent.status)) { - return { kind: "skip", reason: `agent status ${agent.status} is not invokable` }; - } - if (budgetBlocked) { - return { kind: "skip", reason: "budget hard stop blocks continuation" }; - } - - const currentAttempt = readContinuationAttempt(run.continuationAttempt); - if (currentAttempt >= maxAttempts) { - return { - kind: "exhausted", - attempt: currentAttempt, - maxAttempts, - comment: [ - "Bounded liveness continuation exhausted", - "", - `- Last liveness state: \`${livenessState}\``, - `- Attempts used: ${currentAttempt}/${maxAttempts}`, - `- Reason: ${livenessReason ?? "Run ended without concrete progress"}`, - "- Next action: a human or manager should inspect the run and either clarify the task, mark it blocked, or assign a concrete follow-up.", - ].join("\n"), - }; - } - - const nextAttempt = currentAttempt + 1; - const idempotencyKey = buildRunLivenessContinuationIdempotencyKey({ - issueId: issue.id, - sourceRunId: run.id, - livenessState, - nextAttempt, - }); - if (idempotentWakeExists) { - return { kind: "skip", reason: "continuation wake already exists for this source run and attempt" }; - } - - const payload = { - issueId: issue.id, - sourceRunId: run.id, - livenessState, - livenessReason, - continuationAttempt: nextAttempt, - maxContinuationAttempts: maxAttempts, - instruction: - nextAction ?? - "The previous run ended without concrete progress. Take the first concrete action now or mark the issue blocked with a specific unblock request.", - }; - - return { - kind: "enqueue", - nextAttempt, - idempotencyKey, - payload, - contextSnapshot: { - issueId: issue.id, - taskId: issue.id, - taskKey: issue.id, - wakeReason: RUN_LIVENESS_CONTINUATION_REASON, - livenessContinuationAttempt: nextAttempt, - livenessContinuationMaxAttempts: maxAttempts, - livenessContinuationSourceRunId: run.id, - livenessContinuationState: livenessState, - livenessContinuationReason: livenessReason, - livenessContinuationInstruction: payload.instruction, - }, - }; -} +export { + DEFAULT_MAX_LIVENESS_CONTINUATION_ATTEMPTS, + RUN_LIVENESS_CONTINUATION_REASON, + buildRunLivenessContinuationIdempotencyKey, + decideRunLivenessContinuation, + findExistingRunLivenessContinuationWake, + readContinuationAttempt, +} from "./recovery/run-liveness-continuations.js"; +export type { + RunContinuationDecision, +} from "./recovery/run-liveness-continuations.js"; diff --git a/server/src/services/run-liveness.ts b/server/src/services/run-liveness.ts index 75f82909..72be57e6 100644 --- a/server/src/services/run-liveness.ts +++ b/server/src/services/run-liveness.ts @@ -1,5 +1,12 @@ import type { HeartbeatRunStatus, IssueStatus, RunLivenessState } from "@paperclipai/shared"; +export type RunLivenessActionability = + | "runnable" + | "manager_review" + | "blocked_external" + | "approval_required" + | "unknown"; + export interface RunLivenessIssueInput { status: IssueStatus | string; title: string; @@ -21,6 +28,8 @@ export interface RunLivenessClassificationInput { runStatus: HeartbeatRunStatus | string; issue: RunLivenessIssueInput | null; resultJson?: Record | null; + issueCommentBodies?: string[] | null; + continuationSummaryBody?: string | null; stdoutExcerpt?: string | null; stderrExcerpt?: string | null; error?: string | null; @@ -35,6 +44,7 @@ export interface RunLivenessClassification { continuationAttempt: number; lastUsefulActionAt: Date | null; nextAction: string | null; + actionability: RunLivenessActionability; } const DEFAULT_EVIDENCE: RunLivenessEvidenceInput = { @@ -54,6 +64,14 @@ const NEXT_STEPS_RE = /^\s*(?:next steps?|plan)\s*:/im; const BLOCKER_RE = /\b(?:blocked|can't proceed|cannot proceed|unable to proceed|waiting on|need(?:s|ed)? .{0,80}\b(?:approval|access|credential|credentials|secret|api key|token|input|clarification)|requires? .{0,80}\b(?:approval|access|credential|credentials|secret|api key|token|input|clarification))\b/i; const NEGATED_BLOCKER_RE = /\b(?:not blocked|no blocker|no blockers|unblocked)\b/i; +const APPROVAL_REQUIRED_RE = + /\b(?:approval required|requires? .{0,80}\bapproval|need(?:s|ed)? .{0,80}\bapproval|waiting on .{0,80}\bapproval|pending approval|board approval|human approval|user approval|operator approval)\b/i; +const EXTERNAL_BLOCKER_RE = + /\b(?:can't proceed|cannot proceed|unable to proceed|waiting on|blocked by|blocked on|need(?:s|ed)?|requires?) .{0,120}\b(?:access|credential|credentials|secret|secrets|api key|token|password|login|account|permission|permissions|input|clarification)\b/i; +const MANAGER_REVIEW_RE = + /\b(?:manager review|human review|manual review|security review|escalate|production deploy|deploy(?:ing)? to production|deploy(?:ing)? to prod|prod deploy|production access|rotate .{0,40}\b(?:secret|key|token)|delete .{0,40}\bproduction|security-sensitive|credentialed operation|budget-sensitive|cost approval|spend approval)\b/i; +const RUNNABLE_RE = + /\b(?:(?:run|rerun|execute)\s+(?:pnpm|npm|yarn|bun|vitest|jest|pytest|cargo|go test|curl|tests?|typecheck|build|lint|package|verification)|(?:inspect|check|review|look|investigate|analy[sz]e|open|read|start|begin|continue|implement|fix|test|update|create|add|write|verify|validate|report)\b)/i; const PLAN_TASK_TITLE_RE = /\b(?:plan|planning|analysis|investigation|research|report|proposal|design doc|write-?up)\b/i; const PLAN_TASK_DESCRIPTION_RE = /\b(?:create|write|produce|draft|update|revise|prepare)\s+(?:a\s+|the\s+)?(?:plan|analysis|investigation|research report|report|proposal|design doc|write-?up)\b/i; @@ -76,12 +94,22 @@ function readText(value: unknown): string | null { return trimmed.length > 0 ? trimmed : null; } -function resultText(resultJson: Record | null | undefined) { +function resultFinalText(resultJson: Record | null | undefined) { if (!resultJson) return ""; return [ + readText(resultJson.nextAction), readText(resultJson.summary), readText(resultJson.result), readText(resultJson.message), + readText(resultJson.error), + ] + .filter((value): value is string => Boolean(value)) + .join("\n"); +} + +function resultRawText(resultJson: Record | null | undefined) { + if (!resultJson) return ""; + return [ readText(resultJson.stdout), readText(resultJson.stderr), ] @@ -89,16 +117,34 @@ function resultText(resultJson: Record | null | undefined) { .join("\n"); } -function combinedOutput(input: RunLivenessClassificationInput) { +function highSignalSources(input: RunLivenessClassificationInput) { return [ - resultText(input.resultJson), + ...(input.issueCommentBodies ?? []).map(readText), + readText(resultFinalText(input.resultJson)), + readText(input.continuationSummaryBody), + ].filter((value): value is string => Boolean(value)); +} + +function rawSources(input: RunLivenessClassificationInput) { + return [ + readText(resultRawText(input.resultJson)), readText(input.stdoutExcerpt), readText(input.stderrExcerpt), readText(input.error), ] .filter((value): value is string => Boolean(value)) - .join("\n") - .trim(); + .map(stripNoisyTranscriptLines) + .filter((value) => value.length > 0); +} + +function combinedOutput(input: RunLivenessClassificationInput) { + return [...highSignalSources(input), ...rawSources(input)].join("\n").trim(); +} + +function actionabilityText(input: RunLivenessClassificationInput) { + const highSignal = highSignalSources(input).join("\n").trim(); + if (highSignal) return highSignal; + return rawSources(input).join("\n").trim(); } export function hasUsefulOutput(input: RunLivenessClassificationInput) { @@ -107,15 +153,14 @@ export function hasUsefulOutput(input: RunLivenessClassificationInput) { export function declaredBlocker(input: RunLivenessClassificationInput) { if (input.issue?.status === "blocked") return true; - const text = combinedOutput(input); - if (!text || NEGATED_BLOCKER_RE.test(text)) return false; - return BLOCKER_RE.test(text); + const actionability = classifyRunActionability(input); + return actionability === "blocked_external" || actionability === "approval_required"; } export function looksLikePlanningOnly(input: RunLivenessClassificationInput) { - const text = combinedOutput(input); + const text = actionabilityText(input); if (!text) return false; - return PLANNING_ONLY_RE.test(text) || NEXT_STEPS_RE.test(text); + return PLANNING_ONLY_RE.test(text) || NEXT_STEPS_RE.test(text) || /^\s*next(?: steps?| action)?\s*:/im.test(text); } export function isPlanningOrDocumentTask(issue: RunLivenessIssueInput | null | undefined) { @@ -163,20 +208,92 @@ function evidenceReason(evidence: RunLivenessEvidenceInput) { return parts.join(", "); } -function extractNextAction(input: RunLivenessClassificationInput) { - const text = combinedOutput(input); - if (!text) return null; - const line = text +function stripMarkdownListPrefix(line: string) { + return line.replace(/^\s*(?:[-*]|\d+\.)\s+/, "").trim(); +} + +function isNoisyTranscriptLine(line: string) { + const trimmed = line.trim(); + if (!trimmed) return true; + return ( + /^(?:command|status|exit_code|tool|tool_call|tool_result|stdout|stderr|event|payload|session|cwd|ref_id)\s*:/i.test(trimmed) || + /^(?:\{|\[).{0,80}(?:tool|event|stdout|stderr|cmd|command|payload)/i.test(trimmed) || + /^\$?\s*(?:rg|sed|cat|ls|git|pnpm|npm|yarn|curl|node|python)\b/i.test(trimmed) + ); +} + +function stripNoisyTranscriptLines(text: string) { + return text .split(/\r?\n/) - .map((entry) => entry.trim()) - .find((entry) => PLANNING_ONLY_RE.test(entry) || /^next(?: steps?| action)?\s*:/i.test(entry)); - if (!line) return null; - return line.length <= 500 ? line : `${line.slice(0, 497)}...`; + .map((line) => line.trim()) + .filter((line) => !isNoisyTranscriptLine(line)) + .join("\n") + .trim(); +} + +function nextNonNoiseLine(lines: string[], startIndex: number) { + for (let i = startIndex + 1; i < lines.length; i += 1) { + const line = stripMarkdownListPrefix(lines[i] ?? ""); + if (!line || isNoisyTranscriptLine(line)) continue; + return line; + } + return null; +} + +function extractNextActionFromText(text: string) { + const lines = text.split(/\r?\n/).map((entry) => entry.trim()); + for (let i = 0; i < lines.length; i += 1) { + const rawLine = lines[i] ?? ""; + if (!rawLine || isNoisyTranscriptLine(rawLine)) continue; + const line = stripMarkdownListPrefix(rawLine); + const labeled = line.match(/^next(?: steps?| action)?\s*:\s*(.*)$/i); + if (labeled) { + const sameLine = stripMarkdownListPrefix(labeled[1] ?? ""); + return sameLine || nextNonNoiseLine(lines, i); + } + if (PLANNING_ONLY_RE.test(line)) return line; + } + return null; +} + +function extractNextAction(input: RunLivenessClassificationInput) { + const structuredNextAction = readText(input.resultJson?.nextAction); + const candidates = [ + ...(input.issueCommentBodies ?? []), + structuredNextAction ? `Next action: ${structuredNextAction}` : null, + resultFinalText(input.resultJson), + input.continuationSummaryBody, + ...rawSources(input), + ].filter((value): value is string => Boolean(readText(value))); + + for (const candidate of candidates) { + const line = extractNextActionFromText(candidate); + if (!line) continue; + return line.length <= 500 ? line : `${line.slice(0, 497)}...`; + } + return null; +} + +export function classifyRunActionability(input: RunLivenessClassificationInput): RunLivenessActionability { + const text = actionabilityText(input); + if (!text) return "unknown"; + if (NEGATED_BLOCKER_RE.test(text)) { + return RUNNABLE_RE.test(text) ? "runnable" : "unknown"; + } + if (APPROVAL_REQUIRED_RE.test(text)) return "approval_required"; + if (EXTERNAL_BLOCKER_RE.test(text) || BLOCKER_RE.test(text) && /\b(?:credential|secret|api key|token|access|input|clarification)\b/i.test(text)) { + return "blocked_external"; + } + if (MANAGER_REVIEW_RE.test(text)) return "manager_review"; + if (RUNNABLE_RE.test(text)) return "runnable"; + return "unknown"; } export function classifyRunLiveness(input: RunLivenessClassificationInput): RunLivenessClassification { const evidence = normalizeEvidence(input.evidence); const continuationAttempt = normalizeContinuationAttempt(input.continuationAttempt); + const actionability = classifyRunActionability(input); + const nextAction = extractNextAction(input); const issueStatus = input.issue?.status ?? null; const usefulOutput = hasUsefulOutput(input); const concreteEvidence = hasConcreteActionEvidence(evidence); @@ -189,6 +306,7 @@ export function classifyRunLiveness(input: RunLivenessClassificationInput): RunL continuationAttempt, lastUsefulActionAt: state === "advanced" || state === "completed" || state === "blocked" ? lastUsefulActionAt : null, nextAction, + actionability, }); if (input.runStatus !== "succeeded") { @@ -200,7 +318,7 @@ export function classifyRunLiveness(input: RunLivenessClassificationInput): RunL } if (declaredBlocker(input)) { - return output("blocked", issueStatus === "blocked" ? "Issue status is blocked" : "Run output declared a concrete blocker", extractNextAction(input)); + return output("blocked", issueStatus === "blocked" ? "Issue status is blocked" : "Run output declared a concrete blocker", nextAction); } if (!usefulOutput && !concreteEvidence) { @@ -215,12 +333,15 @@ export function classifyRunLiveness(input: RunLivenessClassificationInput): RunL return output("advanced", "Planning/document task produced useful output and is exempt from plan-only classification"); } - if (looksLikePlanningOnly(input)) { - return output("plan_only", "Run described future work without concrete action evidence", extractNextAction(input)); + if (looksLikePlanningOnly(input) || nextAction) { + if (actionability === "runnable") { + return output("plan_only", "Run described runnable future work without concrete action evidence", nextAction); + } + return output("needs_followup", "Run described future work that is not safe to auto-continue", nextAction); } if (usefulOutput) { - return output("needs_followup", "Run produced useful output but no concrete action evidence", extractNextAction(input)); + return output("needs_followup", "Run produced useful output but no concrete action evidence", nextAction); } return output("empty_response", "Run succeeded without useful output"); diff --git a/server/src/services/run-log-store.ts b/server/src/services/run-log-store.ts index 6429c5f2..9c6a8142 100644 --- a/server/src/services/run-log-store.ts +++ b/server/src/services/run-log-store.ts @@ -32,7 +32,7 @@ export interface RunLogStore { append( handle: RunLogHandle, event: { stream: "stdout" | "stderr" | "system"; chunk: string; ts: string }, - ): Promise; + ): Promise; finalize(handle: RunLogHandle): Promise; read(handle: RunLogHandle, opts?: RunLogReadOptions): Promise; } @@ -107,14 +107,16 @@ function createLocalFileRunLogStore(basePath: string): RunLogStore { }, async append(handle, event) { - if (handle.store !== "local_file") return; + if (handle.store !== "local_file") return 0; const absPath = resolveWithin(basePath, handle.logRef); const line = JSON.stringify({ ts: event.ts, stream: event.stream, chunk: event.chunk, }); - await fs.appendFile(absPath, `${line}\n`, "utf8"); + const persisted = `${line}\n`; + await fs.appendFile(absPath, persisted, "utf8"); + return Buffer.byteLength(persisted, "utf8"); }, async finalize(handle) { @@ -153,4 +155,3 @@ export function getRunLogStore() { cachedStore = createLocalFileRunLogStore(basePath); return cachedStore; } - diff --git a/ui/src/App.tsx b/ui/src/App.tsx index f495dc0e..c34d8d96 100644 --- a/ui/src/App.tsx +++ b/ui/src/App.tsx @@ -4,6 +4,7 @@ import { Layout } from "./components/Layout"; import { OnboardingWizard } from "./components/OnboardingWizard"; import { CloudAccessGate } from "./components/CloudAccessGate"; import { Dashboard } from "./pages/Dashboard"; +import { DashboardLive } from "./pages/DashboardLive"; import { Companies } from "./pages/Companies"; import { Agents } from "./pages/Agents"; import { AgentDetail } from "./pages/AgentDetail"; @@ -58,6 +59,7 @@ function boardRoutes() { <> } /> } /> + } /> } /> } /> } /> diff --git a/ui/src/api/heartbeats.test.ts b/ui/src/api/heartbeats.test.ts new file mode 100644 index 00000000..9ba8d163 --- /dev/null +++ b/ui/src/api/heartbeats.test.ts @@ -0,0 +1,30 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const mockApi = vi.hoisted(() => ({ + get: vi.fn(), +})); + +vi.mock("./client", () => ({ + api: mockApi, +})); + +import { heartbeatsApi } from "./heartbeats"; + +describe("heartbeatsApi.liveRunsForCompany", () => { + beforeEach(() => { + mockApi.get.mockReset(); + mockApi.get.mockResolvedValue([]); + }); + + it("keeps the legacy numeric minCount signature", async () => { + await heartbeatsApi.liveRunsForCompany("company-1", 4); + + expect(mockApi.get).toHaveBeenCalledWith("/companies/company-1/live-runs?minCount=4"); + }); + + it("passes minCount and limit options to the company live-runs endpoint", async () => { + await heartbeatsApi.liveRunsForCompany("company-1", { minCount: 50, limit: 50 }); + + expect(mockApi.get).toHaveBeenCalledWith("/companies/company-1/live-runs?minCount=50&limit=50"); + }); +}); diff --git a/ui/src/api/heartbeats.ts b/ui/src/api/heartbeats.ts index 72c8e0a7..b4bf89c8 100644 --- a/ui/src/api/heartbeats.ts +++ b/ui/src/api/heartbeats.ts @@ -1,4 +1,9 @@ -import type { HeartbeatRun, HeartbeatRunEvent, InstanceSchedulerHeartbeatAgent, WorkspaceOperation } from "@paperclipai/shared"; +import type { + HeartbeatRun, + HeartbeatRunEvent, + InstanceSchedulerHeartbeatAgent, + WorkspaceOperation, +} from "@paperclipai/shared"; import { api } from "./client"; export interface RunLivenessFields { @@ -20,12 +25,15 @@ export interface ActiveRunForIssue { agentId: string; agentName: string; adapterType: string; + logBytes?: number | null; + lastOutputBytes?: number | null; issueId?: string | null; livenessState?: RunLivenessFields["livenessState"]; livenessReason?: string | null; continuationAttempt?: number; lastUsefulActionAt?: string | Date | null; nextAction?: string | null; + outputSilence?: HeartbeatRun["outputSilence"]; } export interface LiveRunForIssue { @@ -39,12 +47,23 @@ export interface LiveRunForIssue { agentId: string; agentName: string; adapterType: string; + logBytes?: number | null; + lastOutputBytes?: number | null; issueId?: string | null; livenessState?: RunLivenessFields["livenessState"]; livenessReason?: string | null; continuationAttempt?: number; lastUsefulActionAt?: string | null; nextAction?: string | null; + outputSilence?: HeartbeatRun["outputSilence"]; +} + +export interface WatchdogDecisionInput { + runId: string; + decision: "snooze" | "continue" | "dismissed_false_positive"; + evaluationIssueId?: string | null; + reason?: string | null; + snoozedUntil?: string | null; } export const heartbeatsApi = { @@ -71,12 +90,31 @@ export const heartbeatsApi = { `/workspace-operations/${operationId}/log?offset=${encodeURIComponent(String(offset))}&limitBytes=${encodeURIComponent(String(limitBytes))}`, ), cancel: (runId: string) => api.post(`/heartbeat-runs/${runId}/cancel`, {}), + recordWatchdogDecision: (input: WatchdogDecisionInput) => + api.post(`/heartbeat-runs/${input.runId}/watchdog-decisions`, { + decision: input.decision, + evaluationIssueId: input.evaluationIssueId ?? null, + reason: input.reason ?? null, + snoozedUntil: input.snoozedUntil ?? null, + }), liveRunsForIssue: (issueId: string) => api.get(`/issues/${issueId}/live-runs`), activeRunForIssue: (issueId: string) => api.get(`/issues/${issueId}/active-run`), - liveRunsForCompany: (companyId: string, minCount?: number) => - api.get(`/companies/${companyId}/live-runs${minCount ? `?minCount=${minCount}` : ""}`), + liveRunsForCompany: ( + companyId: string, + options?: number | { minCount?: number; limit?: number }, + ) => { + const searchParams = new URLSearchParams(); + if (typeof options === "number") { + searchParams.set("minCount", String(options)); + } else if (options) { + if (options.minCount) searchParams.set("minCount", String(options.minCount)); + if (options.limit) searchParams.set("limit", String(options.limit)); + } + const qs = searchParams.toString(); + return api.get(`/companies/${companyId}/live-runs${qs ? `?${qs}` : ""}`); + }, listInstanceSchedulerAgents: () => api.get("/instance/scheduler-heartbeats"), }; diff --git a/ui/src/components/ActiveAgentsPanel.test.tsx b/ui/src/components/ActiveAgentsPanel.test.tsx new file mode 100644 index 00000000..5a528976 --- /dev/null +++ b/ui/src/components/ActiveAgentsPanel.test.tsx @@ -0,0 +1,152 @@ +// @vitest-environment jsdom + +import { act, type ReactNode } from "react"; +import { createRoot } from "react-dom/client"; +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { ActiveAgentsPanel } from "./ActiveAgentsPanel"; + +const mockHeartbeatsApi = vi.hoisted(() => ({ + liveRunsForCompany: vi.fn(), +})); + +const mockIssuesApi = vi.hoisted(() => ({ + list: vi.fn(), +})); + +vi.mock("@/lib/router", () => ({ + Link: ({ to, children, ...props }: { to: string; children: ReactNode }) => ( + + {children} + + ), +})); + +vi.mock("../api/heartbeats", () => ({ + heartbeatsApi: mockHeartbeatsApi, +})); + +vi.mock("../api/issues", () => ({ + issuesApi: mockIssuesApi, +})); + +vi.mock("./Identity", () => ({ + Identity: ({ name }: { name: string }) => {name}, +})); + +vi.mock("./RunChatSurface", () => ({ + RunChatSurface: () =>
Run output
, +})); + +vi.mock("./transcript/useLiveRunTranscripts", () => ({ + useLiveRunTranscripts: () => ({ + transcriptByRun: new Map(), + hasOutputForRun: () => false, + }), +})); + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +(globalThis as any).IS_REACT_ACT_ENVIRONMENT = true; + +async function flushReact() { + await act(async () => { + await Promise.resolve(); + await new Promise((resolve) => window.setTimeout(resolve, 0)); + }); +} + +function createRun(index: number) { + return { + id: `run-${index}`, + status: "running", + invocationSource: "assignment", + triggerDetail: null, + startedAt: "2026-04-24T12:00:00.000Z", + finishedAt: null, + createdAt: `2026-04-24T12:00:0${index}.000Z`, + agentId: `agent-${index}`, + agentName: `Agent ${index}`, + adapterType: "codex_local", + issueId: null, + }; +} + +describe("ActiveAgentsPanel", () => { + let container: HTMLDivElement; + + beforeEach(() => { + container = document.createElement("div"); + document.body.appendChild(container); + mockHeartbeatsApi.liveRunsForCompany.mockResolvedValue([1, 2, 3, 4, 5].map(createRun)); + mockIssuesApi.list.mockResolvedValue([]); + }); + + afterEach(() => { + container.remove(); + document.body.innerHTML = ""; + vi.clearAllMocks(); + }); + + it("links hidden active/recent runs to the full live dashboard", async () => { + const root = createRoot(container); + const queryClient = new QueryClient({ + defaultOptions: { queries: { retry: false } }, + }); + + await act(async () => { + root.render( + + + , + ); + }); + await flushReact(); + + expect(mockHeartbeatsApi.liveRunsForCompany).toHaveBeenCalledWith("company-1", { + minCount: 4, + limit: undefined, + }); + + const moreLink = [...container.querySelectorAll("a")].find((anchor) => + anchor.textContent?.includes("more active/recent"), + ); + expect(moreLink?.getAttribute("href")).toBe("/dashboard/live"); + + await act(async () => { + root.unmount(); + }); + }); + + it("can request the full live dashboard page limit without a hidden-runs link", async () => { + const root = createRoot(container); + const queryClient = new QueryClient({ + defaultOptions: { queries: { retry: false } }, + }); + + await act(async () => { + root.render( + + + , + ); + }); + await flushReact(); + + expect(mockHeartbeatsApi.liveRunsForCompany).toHaveBeenCalledWith("company-1", { + minCount: 50, + limit: 50, + }); + expect(container.textContent).not.toContain("more active/recent"); + + await act(async () => { + root.unmount(); + }); + }); +}); diff --git a/ui/src/components/ActiveAgentsPanel.tsx b/ui/src/components/ActiveAgentsPanel.tsx index 5a5534f9..1b49624a 100644 --- a/ui/src/components/ActiveAgentsPanel.tsx +++ b/ui/src/components/ActiveAgentsPanel.tsx @@ -25,16 +25,36 @@ function isRunActive(run: LiveRunForIssue): boolean { interface ActiveAgentsPanelProps { companyId: string; + title?: string; + minRunCount?: number; + fetchLimit?: number; + cardLimit?: number; + gridClassName?: string; + cardClassName?: string; + emptyMessage?: string; + queryScope?: string; + showMoreLink?: boolean; } -export function ActiveAgentsPanel({ companyId }: ActiveAgentsPanelProps) { +export function ActiveAgentsPanel({ + companyId, + title = "Agents", + minRunCount = MIN_DASHBOARD_RUNS, + fetchLimit, + cardLimit = DASHBOARD_RUN_CARD_LIMIT, + gridClassName, + cardClassName, + emptyMessage = "No recent agent runs.", + queryScope = "dashboard", + showMoreLink = true, +}: ActiveAgentsPanelProps) { const { data: liveRuns } = useQuery({ - queryKey: [...queryKeys.liveRuns(companyId), "dashboard"], - queryFn: () => heartbeatsApi.liveRunsForCompany(companyId, MIN_DASHBOARD_RUNS), + queryKey: [...queryKeys.liveRuns(companyId), queryScope, { minRunCount, fetchLimit }], + queryFn: () => heartbeatsApi.liveRunsForCompany(companyId, { minCount: minRunCount, limit: fetchLimit }), }); const runs = liveRuns ?? []; - const visibleRuns = useMemo(() => runs.slice(0, DASHBOARD_RUN_CARD_LIMIT), [runs]); + const visibleRuns = useMemo(() => runs.slice(0, cardLimit), [cardLimit, runs]); const hiddenRunCount = Math.max(0, runs.length - visibleRuns.length); const { data: issues } = useQuery({ queryKey: [...queryKeys.issues.list(companyId), "with-routine-executions"], @@ -62,14 +82,14 @@ export function ActiveAgentsPanel({ companyId }: ActiveAgentsPanelProps) { return (

- Agents + {title}

{runs.length === 0 ? (
-

No recent agent runs.

+

{emptyMessage}

) : ( -
+
{visibleRuns.map((run) => ( ))}
)} - {hiddenRunCount > 0 && ( + {showMoreLink && hiddenRunCount > 0 && (
- + {hiddenRunCount} more active/recent run{hiddenRunCount === 1 ? "" : "s"}
@@ -101,6 +122,7 @@ const AgentRunCard = memo(function AgentRunCard({ transcript, hasOutput, isActive, + className, }: { companyId: string; run: LiveRunForIssue; @@ -108,6 +130,7 @@ const AgentRunCard = memo(function AgentRunCard({ transcript: TranscriptEntry[]; hasOutput: boolean; isActive: boolean; + className?: string; }) { return (
diff --git a/ui/src/components/ActivityCharts.test.tsx b/ui/src/components/ActivityCharts.test.tsx index bfb4b470..e9fe8371 100644 --- a/ui/src/components/ActivityCharts.test.tsx +++ b/ui/src/components/ActivityCharts.test.tsx @@ -56,6 +56,10 @@ function createRun(overrides: Partial = {}): HeartbeatRun { logBytes: null, logSha256: null, logCompressed: false, + lastOutputAt: null, + lastOutputSeq: 0, + lastOutputStream: null, + lastOutputBytes: null, stdoutExcerpt: null, stderrExcerpt: null, errorCode: null, diff --git a/ui/src/components/CommentThread.test.tsx b/ui/src/components/CommentThread.test.tsx index 860ea0ad..18e77a43 100644 --- a/ui/src/components/CommentThread.test.tsx +++ b/ui/src/components/CommentThread.test.tsx @@ -178,6 +178,46 @@ describe("CommentThread", () => { }); }); + it("shows follow-up badges on explicit follow-up comments and timeline rows", () => { + const root = createRoot(container); + + act(() => { + root.render( + + {}} + /> + , + ); + }); + + expect(container.textContent).toContain("Follow-up"); + expect(container.textContent).toContain("requested follow-up"); + + act(() => { + root.unmount(); + }); + }); + it("hides the reopen control and infers reopen for closed agent-assigned issues", async () => { const root = createRoot(container); const onAdd = vi.fn(async () => {}); diff --git a/ui/src/components/CommentThread.tsx b/ui/src/components/CommentThread.tsx index 2d052d38..080b3e97 100644 --- a/ui/src/components/CommentThread.tsx +++ b/ui/src/components/CommentThread.tsx @@ -9,6 +9,7 @@ import type { IssueComment, } from "@paperclipai/shared"; import { Button } from "@/components/ui/button"; +import { Badge } from "@/components/ui/badge"; import { ArrowRight, Check, Copy, Paperclip } from "lucide-react"; import { Avatar, AvatarFallback } from "@/components/ui/avatar"; import { Identity } from "./Identity"; @@ -32,6 +33,7 @@ interface CommentWithRunMeta extends IssueComment { clientStatus?: "pending" | "queued"; queueState?: "queued"; queueTargetRunId?: string | null; + followUpRequested?: boolean; } interface LinkedRunItem { @@ -341,6 +343,7 @@ function CommentCard({ const isHighlighted = highlightCommentId === comment.id; const isPending = comment.clientStatus === "pending"; const isQueued = queued || comment.queueState === "queued" || comment.clientStatus === "queued"; + const followUpRequested = comment.followUpRequested === true; return (
) : null} + {followUpRequested ? ( + + Follow-up + + ) : null} {companyId && !isPending ? ( @@ -488,7 +497,7 @@ function TimelineEventCard({
{actorName} - updated this task + {actionLabel} (() => { - const commentItems: TimelineItem[] = comments.map((comment) => ({ - kind: "comment", - id: comment.id, - createdAtMs: new Date(comment.createdAt).getTime(), - comment, - })); + const followUpCommentIds = new Set( + timelineEvents + .filter((event) => event.followUpRequested && event.commentId) + .map((event) => event.commentId as string), + ); + const commentItems: TimelineItem[] = comments.map((comment) => { + const followUpRequested = comment.followUpRequested === true || followUpCommentIds.has(comment.id); + return { + kind: "comment", + id: comment.id, + createdAtMs: new Date(comment.createdAt).getTime(), + comment: followUpRequested ? { ...comment, followUpRequested } : comment, + }; + }); const approvalItems: TimelineItem[] = linkedApprovals.map((approval) => ({ kind: "approval", id: approval.id, diff --git a/ui/src/components/IssueChatThread.test.tsx b/ui/src/components/IssueChatThread.test.tsx index ae114005..5ec92b11 100644 --- a/ui/src/components/IssueChatThread.test.tsx +++ b/ui/src/components/IssueChatThread.test.tsx @@ -318,6 +318,50 @@ describe("IssueChatThread", () => { }); }); + it("shows explicit follow-up badges and event copy", () => { + const root = createRoot(container); + + act(() => { + root.render( + + {}} + showComposer={false} + enableLiveTranscriptPolling={false} + /> + , + ); + }); + + expect(container.textContent).toContain("Follow-up"); + expect(container.textContent).toContain("requested follow-up"); + + act(() => { + root.unmount(); + }); + }); + it("shows unresolved blocker context above the composer", () => { const root = createRoot(container); @@ -359,6 +403,59 @@ describe("IssueChatThread", () => { }); }); + it("shows terminal blocker context when an immediate blocker is transitively blocked", () => { + const root = createRoot(container); + + act(() => { + root.render( + + {}} + enableLiveTranscriptPolling={false} + /> + , + ); + }); + + expect(container.textContent).toContain("PAP-2167"); + expect(container.textContent).toContain("Phase 7 review"); + expect(container.textContent).toContain("Ultimately waiting on"); + expect(container.textContent).toContain("PAP-2201"); + expect(container.textContent).toContain("Security sign-off"); + expect(container.querySelector('[data-issue-path-id="PAP-2201"]')).not.toBeNull(); + + act(() => { + root.unmount(); + }); + }); + it("shows paused assigned agent context above the composer", () => { const root = createRoot(container); const pausedAgent = { @@ -1363,6 +1460,66 @@ describe("IssueChatThread", () => { }); }); + it("keeps a running chain-of-thought in the Working state between commands", () => { + const root = createRoot(container); + + act(() => { + root.render( + + {}} + enableLiveTranscriptPolling={false} + /> + , + ); + }); + + expect(container.textContent).toContain("Working"); + expect(container.textContent).not.toContain("Worked"); + + act(() => { + root.unmount(); + }); + }); + it("folds chain-of-thought when the same message transitions from running to complete", () => { expect(resolveAssistantMessageFoldedState({ messageId: "message-1", diff --git a/ui/src/components/IssueChatThread.tsx b/ui/src/components/IssueChatThread.tsx index 01fe8f56..f4bdaa8a 100644 --- a/ui/src/components/IssueChatThread.tsx +++ b/ui/src/components/IssueChatThread.tsx @@ -58,6 +58,7 @@ import { buildIssueThreadInteractionSummary, isIssueThreadInteraction } from ".. import { resolveIssueChatTranscriptRuns } from "../lib/issueChatTranscriptRuns"; import type { IssueTimelineAssignee, IssueTimelineEvent } from "../lib/issue-timeline-events"; import { Button } from "@/components/ui/button"; +import { Badge } from "@/components/ui/badge"; import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; import { Dialog, @@ -353,6 +354,26 @@ function IssueBlockedNotice({ if (blockers.length === 0 && issueStatus !== "blocked") return null; const blockerLabel = blockers.length === 1 ? "the linked issue" : "the linked issues"; + const terminalBlockers = blockers + .flatMap((blocker) => blocker.terminalBlockers ?? []) + .filter((blocker, index, all) => all.findIndex((candidate) => candidate.id === blocker.id) === index); + + const renderBlockerChip = (blocker: IssueRelationIssueSummary) => { + const issuePathId = blocker.identifier ?? blocker.id; + return ( + + {blocker.identifier ?? blocker.id.slice(0, 8)} + + {blocker.title} + + + ); + }; return (
@@ -366,22 +387,15 @@ function IssueBlockedNotice({

{blockers.length > 0 ? (
- {blockers.map((blocker) => { - const issuePathId = blocker.identifier ?? blocker.id; - return ( - - {blocker.identifier ?? blocker.id.slice(0, 8)} - - {blocker.title} - - - ); - })} + {blockers.map(renderBlockerChip)} +
+ ) : null} + {terminalBlockers.length > 0 ? ( +
+ + Ultimately waiting on + + {terminalBlockers.map(renderBlockerChip)}
) : null}
@@ -754,8 +768,7 @@ function IssueChatChainOfThought({ (p): p is ToolCallMessagePart => p.type === "tool-call", ); - const hasActiveTool = toolParts.some((t) => t.result === undefined); - const isActive = isMessageRunning && hasActiveTool; + const isActive = isMessageRunning; const [expanded, setExpanded] = useState(isActive); const rawSegments = Array.isArray(custom.chainOfThoughtSegments) @@ -1196,6 +1209,7 @@ function IssueChatUserMessage({ message }: { message: ThreadMessage }) { const authorName = typeof custom.authorName === "string" ? custom.authorName : null; const authorUserId = typeof custom.authorUserId === "string" ? custom.authorUserId : null; const queued = custom.queueState === "queued" || custom.clientStatus === "queued"; + const followUpRequested = custom.followUpRequested === true; const queueReason = typeof custom.queueReason === "string" ? custom.queueReason : null; const queueBadgeLabel = queueReason === "hold" ? "\u23f8 Deferred wake" : "Queued"; const pending = custom.clientStatus === "pending"; @@ -1221,6 +1235,11 @@ function IssueChatUserMessage({ message }: { message: ThreadMessage }) {
{resolvedAuthorName} + {followUpRequested ? ( + + Follow-up + + ) : null}
@@ -1429,6 +1449,11 @@ function IssueChatAssistantMessage({ message }: { message: ThreadMessage }) { ) : (
{authorName} + {followUpRequested ? ( + + Follow-up + + ) : null} {isRunning ? ( @@ -1944,7 +1969,9 @@ function IssueChatSystemMessage({ message }: { message: ThreadMessage }) {
{actorName} - updated this task + + {custom.followUpRequested === true ? "requested follow-up" : "updated this task"} + new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime()); diff --git a/ui/src/components/IssueColumns.tsx b/ui/src/components/IssueColumns.tsx index 492ba2fb..54813689 100644 --- a/ui/src/components/IssueColumns.tsx +++ b/ui/src/components/IssueColumns.tsx @@ -150,7 +150,7 @@ export function InboxIssueMetaLeading({ <> {showStatus ? ( - {statusSlot ?? } + {statusSlot ?? } ) : null} {showIdentifier ? ( diff --git a/ui/src/components/IssueFiltersPopover.test.tsx b/ui/src/components/IssueFiltersPopover.test.tsx index d09c48c9..74a40304 100644 --- a/ui/src/components/IssueFiltersPopover.test.tsx +++ b/ui/src/components/IssueFiltersPopover.test.tsx @@ -79,5 +79,6 @@ describe("IssueFiltersPopover", () => { element.className.includes("md:grid-cols-3"), ); expect(layoutGrid?.className).toContain("grid-cols-1"); + expect(popoverContent?.textContent).toContain("Live runs only"); }); }); diff --git a/ui/src/components/IssueFiltersPopover.tsx b/ui/src/components/IssueFiltersPopover.tsx index 2702ea0c..19a844ad 100644 --- a/ui/src/components/IssueFiltersPopover.tsx +++ b/ui/src/components/IssueFiltersPopover.tsx @@ -344,9 +344,16 @@ export function IssueFiltersPopover({
) : null} - {enableRoutineVisibilityFilter ? ( -
- Visibility +
+ Visibility + + {enableRoutineVisibilityFilter ? ( -
- ) : null} + ) : null} +
diff --git a/ui/src/components/IssueLinkQuicklook.tsx b/ui/src/components/IssueLinkQuicklook.tsx index 1d66b2d0..16b16822 100644 --- a/ui/src/components/IssueLinkQuicklook.tsx +++ b/ui/src/components/IssueLinkQuicklook.tsx @@ -44,7 +44,7 @@ export function IssueQuicklookCard({ return (
- + ({ })); vi.mock("./StatusIcon", () => ({ - StatusIcon: ({ status }: { status: string }) => {status}, + StatusIcon: ({ status, blockerAttention }: { status: string; blockerAttention?: Issue["blockerAttention"] }) => ( + {status} + ), })); vi.mock("./PriorityIcon", () => ({ @@ -392,6 +394,29 @@ describe("IssueProperties", () => { act(() => root.unmount()); }); + it("passes blocker attention to the sidebar status icon", async () => { + const root = renderProperties(container, { + issue: createIssue({ + status: "blocked", + blockerAttention: { + state: "covered", + reason: "active_child", + unresolvedBlockerCount: 1, + coveredBlockerCount: 1, + attentionBlockerCount: 0, + sampleBlockerIdentifier: "PAP-2", + }, + }), + childIssues: [], + onUpdate: vi.fn(), + }); + await flush(); + + expect(container.querySelector('[data-status-icon-state="covered"]')?.textContent).toBe("blocked"); + + act(() => root.unmount()); + }); + it("renders blocked-by issues as direct chips and edits them from an add action", async () => { const onUpdate = vi.fn(); mockIssuesApi.list.mockResolvedValue([ diff --git a/ui/src/components/IssueProperties.tsx b/ui/src/components/IssueProperties.tsx index 27a49bbf..c11631a8 100644 --- a/ui/src/components/IssueProperties.tsx +++ b/ui/src/components/IssueProperties.tsx @@ -1044,6 +1044,7 @@ export function IssueProperties({ onUpdate({ status })} showLabel /> diff --git a/ui/src/components/IssueRow.tsx b/ui/src/components/IssueRow.tsx index 3bd8a2ea..7e5514df 100644 --- a/ui/src/components/IssueRow.tsx +++ b/ui/src/components/IssueRow.tsx @@ -69,7 +69,7 @@ export function IssueRow({ )} > - {mobileLeading ?? } + {mobileLeading ?? } @@ -82,7 +82,7 @@ export function IssueRow({ {desktopMetaLeading ?? ( <> - + {identifier} diff --git a/ui/src/components/IssueRunLedger.test.tsx b/ui/src/components/IssueRunLedger.test.tsx index 25527066..adefcf4f 100644 --- a/ui/src/components/IssueRunLedger.test.tsx +++ b/ui/src/components/IssueRunLedger.test.tsx @@ -6,6 +6,7 @@ import { createRoot, type Root } from "react-dom/client"; import type { Issue, RunLivenessState } from "@paperclipai/shared"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { RunForIssue } from "../api/activity"; +import type { ActiveRunForIssue } from "../api/heartbeats"; import { IssueRunLedgerContent } from "./IssueRunLedger"; vi.mock("@/lib/router", () => ({ @@ -99,6 +100,35 @@ function createIssue(overrides: Partial = {}): Issue { }; } +function createActiveRun(overrides: Partial = {}): ActiveRunForIssue { + return { + id: "run-live-1", + status: "running", + invocationSource: "assignment", + triggerDetail: null, + startedAt: "2026-04-18T19:58:00.000Z", + finishedAt: null, + createdAt: "2026-04-18T19:58:00.000Z", + agentId: "agent-1", + agentName: "CodexCoder", + adapterType: "codex_local", + outputSilence: { + lastOutputAt: "2026-04-18T19:00:00.000Z", + lastOutputSeq: 4, + lastOutputStream: "stdout", + silenceStartedAt: "2026-04-18T19:30:00.000Z", + silenceAgeMs: 45 * 60 * 1000, + level: "critical", + suspicionThresholdMs: 10 * 60 * 1000, + criticalThresholdMs: 30 * 60 * 1000, + snoozedUntil: null, + evaluationIssueId: "issue-eval-1", + evaluationIssueIdentifier: "PAP-404", + }, + ...overrides, + }; +} + function renderLedger(props: Partial> = {}) { render( , ); } @@ -223,7 +255,8 @@ describe("IssueRunLedger", () => { expect(container.textContent).toContain("Transient failure"); expect(container.textContent).toContain("Next retry"); expect(container.textContent).toContain("Retry exhausted"); - expect(container.textContent).toContain("No further automatic retry queued"); + expect(container.textContent).toContain("no further automatic retry will be queued"); + expect(container.textContent).toContain("Manual intervention required"); }); it("shows timeout, cancel, and budget stop reasons without raw logs", () => { @@ -302,4 +335,35 @@ describe("IssueRunLedger", () => { expect(container.textContent).toContain("2 older runs not shown"); }); + + it("renders stale-run banner, watchdog actions, and silence badge for live runs", () => { + const onWatchdogDecision = vi.fn(); + renderLedger({ + runs: [createRun({ runId: "run-live-1", status: "running", finishedAt: null })], + activeRun: createActiveRun(), + onWatchdogDecision, + }); + + expect(container.textContent).toContain("Stale-run watchdog alert"); + expect(container.textContent).toContain("PAP-404"); + expect(container.textContent).toContain("Stale run"); + const watchdogBanner = Array.from(container.querySelectorAll("p")) + .find((node) => node.textContent?.includes("Stale-run watchdog alert")) + ?.closest("div"); + expect(watchdogBanner?.className).toContain("border-red-500/30"); + expect(watchdogBanner?.className).toContain("bg-red-500/10"); + + const continueButton = Array.from(container.querySelectorAll("button")).find( + (button) => button.textContent?.includes("Continue monitoring"), + ); + expect(continueButton).not.toBeUndefined(); + act(() => { + continueButton?.dispatchEvent(new MouseEvent("click", { bubbles: true })); + }); + expect(onWatchdogDecision).toHaveBeenCalledWith({ + runId: "run-live-1", + decision: "continue", + evaluationIssueId: "issue-eval-1", + }); + }); }); diff --git a/ui/src/components/IssueRunLedger.tsx b/ui/src/components/IssueRunLedger.tsx index ec34bc58..54ceaf0a 100644 --- a/ui/src/components/IssueRunLedger.tsx +++ b/ui/src/components/IssueRunLedger.tsx @@ -1,9 +1,14 @@ import { useMemo } from "react"; import type { Issue, Agent } from "@paperclipai/shared"; -import { useQuery } from "@tanstack/react-query"; +import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"; import { Link } from "@/lib/router"; import { activityApi, type RunForIssue, type RunLivenessState } from "../api/activity"; -import { heartbeatsApi, type ActiveRunForIssue, type LiveRunForIssue } from "../api/heartbeats"; +import { + heartbeatsApi, + type ActiveRunForIssue, + type LiveRunForIssue, + type WatchdogDecisionInput, +} from "../api/heartbeats"; import { cn, relativeTime } from "../lib/utils"; import { queryKeys } from "../lib/queryKeys"; import { keepPreviousDataForSameQueryTail } from "../lib/query-placeholder-data"; @@ -24,11 +29,14 @@ type IssueRunLedgerContentProps = { issueStatus: Issue["status"]; childIssues: Issue[]; agentMap: ReadonlyMap>; + pendingWatchdogDecision?: WatchdogDecisionInput["decision"] | null; + onWatchdogDecision?: (input: WatchdogDecisionInput) => void; }; type LedgerRun = RunForIssue & { isLive?: boolean; agentName?: string; + outputSilence?: ActiveRunForIssue["outputSilence"]; }; type LivenessCopy = { @@ -96,6 +104,28 @@ const MISSING_LIVENESS_COPY: LivenessCopy = { const TERMINAL_CHILD_STATUSES = new Set(["done", "cancelled"]); const ACTIVE_RUN_STATUSES = new Set(["queued", "running"]); +type RunOutputSilenceLevel = NonNullable["level"]; + +type RunOutputSilenceCopy = { + label: string; + tone: string; +}; + +const RUN_OUTPUT_SILENCE_COPY: Partial> = { + suspicious: { + label: "Silence watch", + tone: "border-amber-500/30 bg-amber-500/10 text-amber-700 dark:text-amber-300", + }, + critical: { + label: "Stale run", + tone: "border-red-500/30 bg-red-500/10 text-red-700 dark:text-red-300", + }, + snoozed: { + label: "Silence snoozed", + tone: "border-cyan-500/30 bg-cyan-500/10 text-cyan-700 dark:text-cyan-300", + }, +}; + function asRecord(value: unknown): Record | null { if (typeof value !== "object" || value === null || Array.isArray(value)) return null; return value as Record; @@ -143,6 +173,7 @@ function liveRunToLedgerRun(run: LiveRunForIssue | ActiveRunForIssue): LedgerRun usageJson: null, resultJson: null, isLive: run.status === "queued" || run.status === "running", + outputSilence: run.outputSilence, }; } @@ -155,10 +186,25 @@ function mergeRuns( for (const run of runs) byId.set(run.runId, run); for (const run of liveRuns ?? []) { const existing = byId.get(run.id); - byId.set(run.id, existing ? { ...existing, isLive: true, agentName: run.agentName } : liveRunToLedgerRun(run)); + byId.set( + run.id, + existing + ? { ...existing, isLive: true, agentName: run.agentName, outputSilence: run.outputSilence } + : liveRunToLedgerRun(run), + ); } - if (activeRun && !byId.has(activeRun.id)) { - byId.set(activeRun.id, liveRunToLedgerRun(activeRun)); + if (activeRun) { + const existing = byId.get(activeRun.id); + if (existing) { + byId.set(activeRun.id, { + ...existing, + isLive: isActiveRun(existing) || isActiveRun(activeRun), + agentName: activeRun.agentName, + outputSilence: activeRun.outputSilence, + }); + } else { + byId.set(activeRun.id, liveRunToLedgerRun(activeRun)); + } } return [...byId.values()].sort((a, b) => { @@ -252,6 +298,17 @@ function compactAgentName(run: LedgerRun, agentMap: ReadonlyMap activityApi.runsForIssue(issueId), @@ -279,6 +337,13 @@ export function IssueRunLedger({ refetchInterval: hasLiveRuns ? false : 3000, placeholderData: keepPreviousDataForSameQueryTail(issueId), }); + const watchdogDecision = useMutation({ + mutationFn: (input: WatchdogDecisionInput) => heartbeatsApi.recordWatchdogDecision(input), + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: queryKeys.issues.activeRun(issueId) }); + queryClient.invalidateQueries({ queryKey: queryKeys.issues.liveRuns(issueId) }); + }, + }); return ( watchdogDecision.mutate(input)} /> ); } @@ -299,9 +366,19 @@ export function IssueRunLedgerContent({ issueStatus, childIssues, agentMap, + pendingWatchdogDecision, + onWatchdogDecision, }: IssueRunLedgerContentProps) { const ledgerRuns = useMemo(() => mergeRuns(runs, liveRuns, activeRun), [activeRun, liveRuns, runs]); const latestRun = ledgerRuns[0] ?? null; + const latestSilentRun = useMemo( + () => + ledgerRuns.find((run) => + isActiveRun(run) + && (run.outputSilence?.level === "critical" || run.outputSilence?.level === "suspicious"), + ) ?? null, + [ledgerRuns], + ); const children = childIssueSummary(childIssues); return ( @@ -360,6 +437,86 @@ export function IssueRunLedgerContent({
) : null} + {latestSilentRun?.outputSilence ? ( +
+

+ {latestSilentRun.outputSilence.level === "critical" + ? "Stale-run watchdog alert" + : "Output silence watchdog warning"} +

+

+ Latest active run has been silent for{" "} + {formatSilenceAge(latestSilentRun.outputSilence.silenceAgeMs) ?? "an extended period"}. + {latestSilentRun.outputSilence.evaluationIssueIdentifier ? ( + <> + {" "} + Review{" "} + + {latestSilentRun.outputSilence.evaluationIssueIdentifier} + + {" "}for recovery context. + + ) : null} +

+ {onWatchdogDecision ? ( +
+ + + +
+ ) : null} +
+ ) : null} + {ledgerRuns.length === 0 ? (
Historical runs without liveness metadata will appear here once linked to this issue. @@ -418,6 +575,16 @@ export function IssueRunLedgerContent({ {retryState.badgeLabel} ) : null} + {run.outputSilence && RUN_OUTPUT_SILENCE_COPY[run.outputSilence.level] ? ( + + {RUN_OUTPUT_SILENCE_COPY[run.outputSilence.level]?.label} + + ) : null}
diff --git a/ui/src/components/IssuesList.test.tsx b/ui/src/components/IssuesList.test.tsx index a3718570..ec625f91 100644 --- a/ui/src/components/IssuesList.test.tsx +++ b/ui/src/components/IssuesList.test.tsx @@ -170,6 +170,24 @@ async function waitForAssertion(assertion: () => void, attempts = 20) { throw lastError; } +async function waitForMicrotaskAssertion(assertion: () => void, attempts = 20) { + let lastError: unknown; + + for (let attempt = 0; attempt < attempts; attempt += 1) { + try { + assertion(); + return; + } catch (error) { + lastError = error; + await act(async () => { + await Promise.resolve(); + }); + } + } + + throw lastError; +} + function renderWithQueryClient(node: ReactNode, container: HTMLDivElement) { const root = createRoot(container); const queryClient = new QueryClient({ @@ -393,6 +411,10 @@ describe("IssuesList", () => { }), ); + localStorage.setItem( + "paperclip:test-issues:company-1", + JSON.stringify({ statuses: ["done"] }), + ); mockIssuesApi.list.mockResolvedValue(serverIssues); const { root } = renderWithQueryClient( @@ -407,14 +429,14 @@ describe("IssuesList", () => { container, ); - await waitForAssertion(() => { + await waitForMicrotaskAssertion(() => { expect(container.textContent).toContain("Showing up to 200 matches. Refine the search to narrow further."); }); act(() => { root.unmount(); }); - }); + }, 10_000); it("loads board issues with a separate result limit for each status column", async () => { localStorage.setItem( @@ -544,8 +566,8 @@ describe("IssuesList", () => { ); await waitForAssertion(() => { - expect(container.querySelectorAll('[data-testid="issue-row"]')).toHaveLength(150); - expect(container.textContent).toContain("Rendering 150 of 220 issues"); + expect(container.querySelectorAll('[data-testid="issue-row"]')).toHaveLength(100); + expect(container.textContent).toContain("Rendering 100 of 220 issues"); }); act(() => { diff --git a/ui/src/components/IssuesList.tsx b/ui/src/components/IssuesList.tsx index 3a63a92a..01d2479e 100644 --- a/ui/src/components/IssuesList.tsx +++ b/ui/src/components/IssuesList.tsx @@ -23,6 +23,7 @@ import { issuePriorityOrder, normalizeIssueFilterState, resolveIssueFilterWorkspaceId, + shouldIncludeIssueFilterWorkspaceOption, issueStatusOrder, type IssueFilterState, } from "../lib/issue-filters"; @@ -61,7 +62,7 @@ import { ISSUE_STATUSES, type Issue, type Project } from "@paperclipai/shared"; const ISSUE_SEARCH_DEBOUNCE_MS = 250; const ISSUE_SEARCH_RESULT_LIMIT = 200; const ISSUE_BOARD_COLUMN_RESULT_LIMIT = 200; -const INITIAL_ISSUE_ROW_RENDER_LIMIT = 150; +const INITIAL_ISSUE_ROW_RENDER_LIMIT = 100; const ISSUE_ROW_RENDER_BATCH_SIZE = 150; const ISSUE_ROW_RENDER_BATCH_DELAY_MS = 0; const boardIssueStatuses = ISSUE_STATUSES; @@ -483,6 +484,10 @@ export function IssuesList({ } return map; }, [projects]); + const defaultProjectWorkspaceIds = useMemo( + () => new Set(defaultProjectWorkspaceIdByProjectId.values()), + [defaultProjectWorkspaceIdByProjectId], + ); const executionWorkspaceById = useMemo(() => { const map = new Map ({ + executionWorkspaceById, + defaultProjectWorkspaceIdByProjectId, + }), [defaultProjectWorkspaceIdByProjectId, executionWorkspaceById]); const workspaceNameMap = useMemo(() => { const map = new Map(); for (const [workspaceId, workspace] of projectWorkspaceById) { + if (!shouldIncludeIssueFilterWorkspaceOption({ id: workspaceId }, defaultProjectWorkspaceIds)) continue; map.set(workspaceId, workspace.name); } for (const [workspaceId, workspace] of executionWorkspaceById) { + if (!shouldIncludeIssueFilterWorkspaceOption({ + id: workspaceId, + mode: workspace.mode, + projectWorkspaceId: workspace.projectWorkspaceId, + }, defaultProjectWorkspaceIds)) continue; map.set(workspaceId, workspace.name); } return map; - }, [executionWorkspaceById, projectWorkspaceById]); + }, [defaultProjectWorkspaceIds, executionWorkspaceById, projectWorkspaceById]); const workspaceOptions = useMemo(() => { const options = new Map(); @@ -635,9 +650,27 @@ export function IssuesList({ const searchScopedIssues = normalizedIssueSearch.length > 0 && searchWithinLoadedIssues ? sourceIssues.filter((issue) => issueMatchesLocalSearch(issue, normalizedIssueSearch)) : sourceIssues; - const filteredByControls = applyIssueFilters(searchScopedIssues, viewState, currentUserId, enableRoutineVisibilityFilter); + const filteredByControls = applyIssueFilters( + searchScopedIssues, + viewState, + currentUserId, + enableRoutineVisibilityFilter, + liveIssueIds, + issueFilterWorkspaceContext, + ); return sortIssues(filteredByControls, viewState); - }, [boardIssues, issues, searchedIssues, searchWithinLoadedIssues, viewState, normalizedIssueSearch, currentUserId, enableRoutineVisibilityFilter]); + }, [ + boardIssues, + issues, + searchedIssues, + searchWithinLoadedIssues, + viewState, + normalizedIssueSearch, + currentUserId, + enableRoutineVisibilityFilter, + liveIssueIds, + issueFilterWorkspaceContext, + ]); const { data: labels } = useQuery({ queryKey: queryKeys.issues.labels(selectedCompanyId!), @@ -664,7 +697,10 @@ export function IssuesList({ .map((p) => ({ key: p, label: issueFilterLabel(p), items: groups[p]! })); } if (viewState.groupBy === "workspace") { - const groups = groupBy(filtered, (issue) => resolveIssueFilterWorkspaceId(issue) ?? "__no_workspace"); + const groups = groupBy( + filtered, + (issue) => resolveIssueFilterWorkspaceId(issue, issueFilterWorkspaceContext) ?? "__no_workspace", + ); return Object.keys(groups) .sort((a, b) => { // Groups with items first, "no workspace" last @@ -708,7 +744,17 @@ export function IssuesList({ : (agentName(key) ?? key.slice(0, 8)), items: groups[key]!, })); - }, [filtered, viewState.groupBy, agents, agentName, currentUserId, workspaceNameMap, issueTitleMap, companyUserLabelMap]); + }, [ + filtered, + issueFilterWorkspaceContext, + viewState.groupBy, + agents, + agentName, + currentUserId, + workspaceNameMap, + issueTitleMap, + companyUserLabelMap, + ]); useEffect(() => { if (viewState.viewMode !== "list") return; @@ -1087,7 +1133,7 @@ export function IssuesList({ ) : ( { e.preventDefault(); e.stopPropagation(); }}> - onUpdateIssue(issue.id, { status: s })} /> + onUpdateIssue(issue.id, { status: s })} /> ) } @@ -1111,7 +1157,7 @@ export function IssuesList({ showIdentifier={visibleIssueColumnSet.has("id") && availableIssueColumnSet.has("id")} statusSlot={( { e.preventDefault(); e.stopPropagation(); }}> - onUpdateIssue(issue.id, { status: s })} /> + onUpdateIssue(issue.id, { status: s })} /> )} /> @@ -1125,7 +1171,7 @@ export function IssuesList({ columns={visibleTrailingIssueColumns} projectName={issueProject?.name ?? null} projectColor={issueProject?.color ?? null} - workspaceId={resolveIssueFilterWorkspaceId(issue)} + workspaceId={resolveIssueFilterWorkspaceId(issue, issueFilterWorkspaceContext)} workspaceName={resolveIssueWorkspaceName(issue, { executionWorkspaceById, projectWorkspaceById, diff --git a/ui/src/components/LiveRunWidget.tsx b/ui/src/components/LiveRunWidget.tsx index e9c2fe7b..26a5a3ac 100644 --- a/ui/src/components/LiveRunWidget.tsx +++ b/ui/src/components/LiveRunWidget.tsx @@ -59,6 +59,8 @@ export function LiveRunWidget({ issueId, companyId }: LiveRunWidgetProps) { agentId: activeRun.agentId, agentName: activeRun.agentName, adapterType: activeRun.adapterType, + logBytes: activeRun.logBytes, + lastOutputBytes: activeRun.lastOutputBytes, issueId, }); } diff --git a/ui/src/components/StatusIcon.test.tsx b/ui/src/components/StatusIcon.test.tsx new file mode 100644 index 00000000..b2900632 --- /dev/null +++ b/ui/src/components/StatusIcon.test.tsx @@ -0,0 +1,72 @@ +// @vitest-environment node + +import { renderToStaticMarkup } from "react-dom/server"; +import { describe, expect, it } from "vitest"; +import { StatusIcon } from "./StatusIcon"; + +describe("StatusIcon", () => { + it("renders covered blocked issues with the cyan covered state visual", () => { + const html = renderToStaticMarkup( + , + ); + + expect(html).toContain('data-blocker-attention-state="covered"'); + expect(html).toContain('aria-label="Blocked · waiting on active sub-issue PAP-2"'); + expect(html).toContain('title="Blocked · waiting on active sub-issue PAP-2"'); + expect(html).toContain("border-cyan-600"); + expect(html).not.toContain("border-red-600"); + expect(html).not.toContain("border-dashed"); + expect(html).toContain("-bottom-0.5"); + }); + + it("uses covered blocked copy for the active dependency count matrix", () => { + const html = renderToStaticMarkup( + , + ); + + expect(html).toContain('aria-label="Blocked · covered by 2 active dependencies"'); + expect(html).toContain("border-cyan-600"); + expect(html).not.toContain("border-dashed"); + }); + + it("keeps normal blocked issues on the attention-required visual", () => { + const html = renderToStaticMarkup( + , + ); + + expect(html).not.toContain('data-blocker-attention-state="covered"'); + expect(html).toContain('aria-label="Blocked · 1 unresolved blocker needs attention"'); + expect(html).toContain("border-red-600"); + expect(html).not.toContain("border-dashed"); + }); +}); diff --git a/ui/src/components/StatusIcon.tsx b/ui/src/components/StatusIcon.tsx index bafbb35b..c1865595 100644 --- a/ui/src/components/StatusIcon.tsx +++ b/ui/src/components/StatusIcon.tsx @@ -1,4 +1,5 @@ import { useState } from "react"; +import type { IssueBlockerAttention } from "@paperclipai/shared"; import { cn } from "../lib/utils"; import { issueStatusIcon, issueStatusIconDefault } from "../lib/status-colors"; import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; @@ -12,15 +13,49 @@ function statusLabel(status: string): string { interface StatusIconProps { status: string; + blockerAttention?: IssueBlockerAttention | null; onChange?: (status: string) => void; className?: string; showLabel?: boolean; } -export function StatusIcon({ status, onChange, className, showLabel }: StatusIconProps) { +function blockedAttentionLabel(blockerAttention: IssueBlockerAttention | null | undefined) { + if (!blockerAttention || blockerAttention.state === "none") return "Blocked"; + + if (blockerAttention.reason === "active_child") { + const count = blockerAttention.coveredBlockerCount; + if (count === 1 && blockerAttention.sampleBlockerIdentifier) { + return `Blocked · waiting on active sub-issue ${blockerAttention.sampleBlockerIdentifier}`; + } + if (count === 1) return "Blocked · waiting on 1 active sub-issue"; + return `Blocked · waiting on ${count} active sub-issues`; + } + + if (blockerAttention.reason === "active_dependency") { + const count = blockerAttention.coveredBlockerCount; + if (count === 1 && blockerAttention.sampleBlockerIdentifier) { + return `Blocked · covered by active dependency ${blockerAttention.sampleBlockerIdentifier}`; + } + if (count === 1) return "Blocked · covered by 1 active dependency"; + return `Blocked · covered by ${count} active dependencies`; + } + + if (blockerAttention.reason === "attention_required") { + const count = blockerAttention.unresolvedBlockerCount; + return `Blocked · ${count} unresolved ${count === 1 ? "blocker needs" : "blockers need"} attention`; + } + + return "Blocked"; +} + +export function StatusIcon({ status, blockerAttention, onChange, className, showLabel }: StatusIconProps) { const [open, setOpen] = useState(false); - const colorClass = issueStatusIcon[status] ?? issueStatusIconDefault; + const isCoveredBlocked = status === "blocked" && blockerAttention?.state === "covered"; + const colorClass = isCoveredBlocked + ? "text-cyan-600 border-cyan-600 dark:text-cyan-400 dark:border-cyan-400" + : issueStatusIcon[status] ?? issueStatusIconDefault; const isDone = status === "done"; + const ariaLabel = status === "blocked" ? blockedAttentionLabel(blockerAttention) : statusLabel(status); const circle = ( {isDone && ( )} + {isCoveredBlocked && ( + + )} ); diff --git a/ui/src/components/transcript/RunTranscriptView.test.tsx b/ui/src/components/transcript/RunTranscriptView.test.tsx index 41f635d2..46f9f3d9 100644 --- a/ui/src/components/transcript/RunTranscriptView.test.tsx +++ b/ui/src/components/transcript/RunTranscriptView.test.tsx @@ -110,4 +110,23 @@ describe("RunTranscriptView", () => { expect(html).toMatch(/]*>posted issue update<\/li>/); expect(html).not.toContain("result"); }); + + it("windows large raw transcripts instead of rendering every entry at once", () => { + const entries: TranscriptEntry[] = Array.from({ length: 500 }, (_, index) => ({ + kind: "stdout", + ts: `2026-03-12T00:${String(index % 60).padStart(2, "0")}:00.000Z`, + text: `line-${index}`, + })); + + const html = renderToStaticMarkup( + + + , + ); + + expect(html).toContain("line-0"); + expect(html).toContain("line-179"); + expect(html).not.toContain("line-250"); + expect(html).not.toContain("line-499"); + }); }); diff --git a/ui/src/components/transcript/RunTranscriptView.tsx b/ui/src/components/transcript/RunTranscriptView.tsx index fee3a561..a9130f8c 100644 --- a/ui/src/components/transcript/RunTranscriptView.tsx +++ b/ui/src/components/transcript/RunTranscriptView.tsx @@ -1,4 +1,4 @@ -import { useMemo, useState } from "react"; +import { useEffect, useMemo, useRef, useState } from "react"; import type { TranscriptEntry } from "../../adapters"; import { MarkdownBody } from "../MarkdownBody"; import { cn, formatTokens } from "../../lib/utils"; @@ -16,6 +16,11 @@ import { export type TranscriptMode = "nice" | "raw"; export type TranscriptDensity = "comfortable" | "compact"; +const RAW_VIRTUALIZATION_THRESHOLD = 300; +const RAW_OVERSCAN_ROWS = 40; +const RAW_ESTIMATED_ROW_HEIGHT = 36; +const RAW_INITIAL_ROWS = 180; + interface RunTranscriptViewProps { entries: TranscriptEntry[]; mode?: TranscriptMode; @@ -1347,6 +1352,34 @@ function TranscriptStdoutRow({ ); } +function findScrollParent(element: HTMLElement): HTMLElement | Window { + let current = element.parentElement; + while (current) { + const style = window.getComputedStyle(current); + if (/(auto|scroll)/.test(style.overflowY) && current.scrollHeight > current.clientHeight) { + return current; + } + current = current.parentElement; + } + return window; +} + +function rawEntryContent(entry: TranscriptEntry): string { + if (entry.kind === "tool_call") { + return `${entry.name}\n${formatToolPayload(entry.input)}`; + } + if (entry.kind === "tool_result") { + return formatToolPayload(entry.content); + } + if (entry.kind === "result") { + return `${entry.text}\n${formatTokens(entry.inputTokens)} / ${formatTokens(entry.outputTokens)} / $${entry.costUsd.toFixed(6)}`; + } + if (entry.kind === "init") { + return `model=${entry.model}${entry.sessionId ? ` session=${entry.sessionId}` : ""}`; + } + return entry.text; +} + function RawTranscriptView({ entries, density, @@ -1355,11 +1388,63 @@ function RawTranscriptView({ density: TranscriptDensity; }) { const compact = density === "compact"; + const listRef = useRef(null); + const shouldVirtualize = entries.length > RAW_VIRTUALIZATION_THRESHOLD; + const [range, setRange] = useState(() => ({ + start: 0, + end: Math.min(entries.length, shouldVirtualize ? RAW_INITIAL_ROWS : entries.length), + })); + + useEffect(() => { + if (!shouldVirtualize) { + setRange({ start: 0, end: entries.length }); + return; + } + + const list = listRef.current; + if (!list) return; + + const scrollParent = findScrollParent(list); + const updateRange = () => { + const scrollElement: HTMLElement | null = scrollParent === window ? null : (scrollParent as HTMLElement); + const scrollerTop = scrollElement ? scrollElement.getBoundingClientRect().top : 0; + const scrollerHeight = scrollElement ? scrollElement.clientHeight : window.innerHeight; + const listTop = list.getBoundingClientRect().top; + const visibleTop = Math.max(0, scrollerTop - listTop); + const visibleBottom = Math.max(visibleTop + scrollerHeight, 0); + const nextStart = Math.max(0, Math.floor(visibleTop / RAW_ESTIMATED_ROW_HEIGHT) - RAW_OVERSCAN_ROWS); + const nextEnd = Math.min( + entries.length, + Math.ceil(visibleBottom / RAW_ESTIMATED_ROW_HEIGHT) + RAW_OVERSCAN_ROWS, + ); + setRange((current) => ( + current.start === nextStart && current.end === nextEnd + ? current + : { start: nextStart, end: nextEnd } + )); + }; + + updateRange(); + const frame = window.requestAnimationFrame(updateRange); + scrollParent.addEventListener("scroll", updateRange, { passive: true }); + window.addEventListener("resize", updateRange); + return () => { + window.cancelAnimationFrame(frame); + scrollParent.removeEventListener("scroll", updateRange); + window.removeEventListener("resize", updateRange); + }; + }, [entries.length, shouldVirtualize]); + + const visibleEntries = shouldVirtualize ? entries.slice(range.start, range.end) : entries; + const topSpacer = shouldVirtualize ? range.start * RAW_ESTIMATED_ROW_HEIGHT : 0; + const bottomSpacer = shouldVirtualize ? Math.max(0, entries.length - range.end) * RAW_ESTIMATED_ROW_HEIGHT : 0; + return ( -
- {entries.map((entry, idx) => ( +
+ {topSpacer > 0 &&