d734bd43d1
## Thinking Path > - Paperclip is the control plane for autonomous AI companies, so agent work needs visible ownership, recovery, and operator controls. > - This local branch had accumulated several related control-plane reliability and operator-experience fixes across recovery actions, watchdog folding, model-profile defaults, mentions, markdown editing, plugin launchers, and small UI polish. > - The branch needed to be converted into a PR against the current `origin/master` without losing dirty work or including lockfile/workflow churn. > - The safest standalone shape is a single rollup PR because the recovery/server/UI files overlap heavily across the local commits and splitting would create avoidable conflicts. > - This pull request replays the local branch onto latest `origin/master`, preserves the uncommitted work as logical commits, and adds a Zod 4 validator compatibility fix found during verification. > - The benefit is that the May 17 local branch can be reviewed and merged as one coherent, conflict-free branch under the 100-file Greptile limit. ## What Changed - Rebased the local May 17 branch work onto current `origin/master` in a dedicated worktree. - Preserved and committed previously dirty changes for recovery retry handling, plugin/sidebar launcher polish, and `.herenow` ignores. - Added recovery-action behavior for returning source issues to `todo` when retrying source-scoped recovery. - Included the existing local recovery/liveness/watchdog fold, Codex cheap-profile, markdown/mention, duplicate-agent, and UI polish commits from the branch. - Normalized shared validator `z.record(...)` schemas to explicit string-key records for Zod 4 compatibility. - Confirmed the PR has no `pnpm-lock.yaml` or `.github/workflows/*` changes and stays below the 100-file Greptile limit. ## Verification - `pnpm install --frozen-lockfile --ignore-scripts` - `npm run install` in `node_modules/.pnpm/sqlite3@5.1.7/node_modules/sqlite3` to build the local native sqlite3 binding after installing with scripts disabled - `pnpm exec vitest run packages/shared/src/validators/issue.test.ts packages/shared/src/project-mentions.test.ts packages/adapter-utils/src/server-utils.test.ts server/src/__tests__/heartbeat-model-profile.test.ts server/src/__tests__/issue-recovery-actions.test.ts server/src/__tests__/issue-agent-mutation-ownership-routes.test.ts server/src/__tests__/heartbeat-active-run-output-watchdog.test.ts server/src/__tests__/plugin-local-folders.test.ts ui/src/components/IssueRecoveryActionCard.test.tsx ui/src/components/Sidebar.test.tsx ui/src/components/SidebarAccountMenu.test.tsx ui/src/components/IssueProperties.test.tsx ui/src/components/MarkdownEditor.test.tsx ui/src/components/MarkdownBody.test.tsx ui/src/lib/duplicate-agent-payload.test.ts ui/src/pages/Routines.test.tsx` - First pass: 13 files passed with 201 passing tests; 3 server files failed before sqlite3 native binding was built. - After rebuilding sqlite3: `server/src/__tests__/heartbeat-model-profile.test.ts`, `server/src/__tests__/issue-recovery-actions.test.ts`, and `server/src/__tests__/heartbeat-active-run-output-watchdog.test.ts` passed/loaded; embedded Postgres tests were skipped by the local host guard. - `pnpm --filter @paperclipai/shared typecheck` - `pnpm --filter @paperclipai/adapter-utils typecheck` - `pnpm --filter @paperclipai/server typecheck` - `pnpm --filter @paperclipai/ui typecheck` ## Risks - Medium risk: this is a broad rollup PR across recovery semantics, server tests, shared validators, and UI surfaces. - Some embedded Postgres tests skipped locally due the host guard, so CI should provide the stronger database-backed signal. - UI changes were covered by component tests, but no browser screenshot was captured in this PR creation pass. - This branch may overlap with existing recovery/liveness PR work; merge this PR independently or restack/close overlapping branches rather than merging duplicate implementations together. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex, GPT-5-based coding agent, tool-enabled local repository and GitHub workflow, medium reasoning effort. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [ ] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip <noreply@paperclip.ing>
802 lines
28 KiB
TypeScript
802 lines
28 KiB
TypeScript
import { randomUUID } from "node:crypto";
|
|
import { and, eq, sql } from "drizzle-orm";
|
|
import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from "vitest";
|
|
import {
|
|
activityLog,
|
|
agents,
|
|
companies,
|
|
createDb,
|
|
heartbeatRunEvents,
|
|
heartbeatRunWatchdogDecisions,
|
|
heartbeatRuns,
|
|
issueComments,
|
|
issueRecoveryActions,
|
|
issueRelations,
|
|
issues,
|
|
} from "@paperclipai/db";
|
|
import {
|
|
getEmbeddedPostgresTestSupport,
|
|
startEmbeddedPostgresTestDatabase,
|
|
} from "./helpers/embedded-postgres.js";
|
|
import {
|
|
ACTIVE_RUN_OUTPUT_CONTINUE_REARM_MS,
|
|
ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS,
|
|
ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS,
|
|
heartbeatService,
|
|
} from "../services/heartbeat.ts";
|
|
import { recoveryService } from "../services/recovery/service.ts";
|
|
import { getRunLogStore } from "../services/run-log-store.ts";
|
|
|
|
const mockAdapterExecute = vi.hoisted(() =>
|
|
vi.fn(async () => ({
|
|
exitCode: 0,
|
|
signal: null,
|
|
timedOut: false,
|
|
errorMessage: null,
|
|
summary: "Acknowledged stale-run evaluation.",
|
|
provider: "test",
|
|
model: "test-model",
|
|
})),
|
|
);
|
|
|
|
vi.mock("../telemetry.ts", () => ({
|
|
getTelemetryClient: () => ({ track: vi.fn() }),
|
|
}));
|
|
|
|
vi.mock("@paperclipai/shared/telemetry", async () => {
|
|
const actual = await vi.importActual<typeof import("@paperclipai/shared/telemetry")>(
|
|
"@paperclipai/shared/telemetry",
|
|
);
|
|
return {
|
|
...actual,
|
|
trackAgentFirstHeartbeat: vi.fn(),
|
|
};
|
|
});
|
|
|
|
vi.mock("../adapters/index.ts", async () => {
|
|
const actual = await vi.importActual<typeof import("../adapters/index.ts")>("../adapters/index.ts");
|
|
return {
|
|
...actual,
|
|
getServerAdapter: vi.fn(() => ({
|
|
supportsLocalAgentJwt: false,
|
|
execute: mockAdapterExecute,
|
|
})),
|
|
};
|
|
});
|
|
|
|
const embeddedPostgresSupport = await getEmbeddedPostgresTestSupport();
|
|
const describeEmbeddedPostgres = embeddedPostgresSupport.supported ? describe : describe.skip;
|
|
|
|
if (!embeddedPostgresSupport.supported) {
|
|
console.warn(
|
|
`Skipping embedded Postgres active-run output watchdog tests on this host: ${embeddedPostgresSupport.reason ?? "unsupported environment"}`,
|
|
);
|
|
}
|
|
|
|
describeEmbeddedPostgres("active-run output watchdog", () => {
|
|
let tempDb: Awaited<ReturnType<typeof startEmbeddedPostgresTestDatabase>> | null = null;
|
|
let db: ReturnType<typeof createDb>;
|
|
|
|
beforeAll(async () => {
|
|
tempDb = await startEmbeddedPostgresTestDatabase("paperclip-active-run-output-watchdog-");
|
|
db = createDb(tempDb.connectionString);
|
|
}, 30_000);
|
|
|
|
afterEach(async () => {
|
|
for (let attempt = 0; attempt < 100; attempt += 1) {
|
|
const activeRuns = await db
|
|
.select({ id: heartbeatRuns.id })
|
|
.from(heartbeatRuns)
|
|
.where(sql`${heartbeatRuns.status} in ('queued', 'running')`);
|
|
if (activeRuns.length === 0) break;
|
|
await new Promise((resolve) => setTimeout(resolve, 25));
|
|
}
|
|
await db.execute(sql.raw(`TRUNCATE TABLE "companies" CASCADE`));
|
|
});
|
|
|
|
afterAll(async () => {
|
|
await tempDb?.cleanup();
|
|
});
|
|
|
|
async function seedRunningRun(opts: {
|
|
now: Date;
|
|
ageMs: number;
|
|
withOutput?: boolean;
|
|
logChunk?: string;
|
|
sourceStatus?: "in_progress" | "done" | "cancelled";
|
|
sourceOriginKind?: string;
|
|
sameRunTerminalEvidence?: "activity" | "comment";
|
|
}) {
|
|
const companyId = randomUUID();
|
|
const managerId = randomUUID();
|
|
const coderId = randomUUID();
|
|
const issueId = randomUUID();
|
|
const runId = randomUUID();
|
|
const issuePrefix = `W${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
|
|
const startedAt = new Date(opts.now.getTime() - opts.ageMs);
|
|
const lastOutputAt = opts.withOutput ? new Date(opts.now.getTime() - 5 * 60 * 1000) : null;
|
|
const sourceStatus = opts.sourceStatus ?? "in_progress";
|
|
const terminalEvidenceAt = new Date(startedAt.getTime() + 10 * 60 * 1000);
|
|
|
|
await db.insert(companies).values({
|
|
id: companyId,
|
|
name: "Watchdog Co",
|
|
issuePrefix,
|
|
requireBoardApprovalForNewAgents: false,
|
|
});
|
|
await db.insert(agents).values([
|
|
{
|
|
id: managerId,
|
|
companyId,
|
|
name: "CTO",
|
|
role: "cto",
|
|
status: "idle",
|
|
adapterType: "codex_local",
|
|
adapterConfig: {},
|
|
runtimeConfig: {},
|
|
permissions: {},
|
|
},
|
|
{
|
|
id: coderId,
|
|
companyId,
|
|
name: "Coder",
|
|
role: "engineer",
|
|
status: "running",
|
|
reportsTo: managerId,
|
|
adapterType: "codex_local",
|
|
adapterConfig: {},
|
|
runtimeConfig: {},
|
|
permissions: {},
|
|
},
|
|
]);
|
|
await db.insert(issues).values({
|
|
id: issueId,
|
|
companyId,
|
|
title: "Long running implementation",
|
|
status: sourceStatus,
|
|
priority: "medium",
|
|
assigneeAgentId: coderId,
|
|
issueNumber: 1,
|
|
identifier: `${issuePrefix}-1`,
|
|
originKind: opts.sourceOriginKind ?? "manual",
|
|
completedAt: sourceStatus === "done" ? terminalEvidenceAt : null,
|
|
cancelledAt: sourceStatus === "cancelled" ? terminalEvidenceAt : null,
|
|
updatedAt: startedAt,
|
|
createdAt: startedAt,
|
|
});
|
|
await db.insert(heartbeatRuns).values({
|
|
id: runId,
|
|
companyId,
|
|
agentId: coderId,
|
|
status: "running",
|
|
invocationSource: "assignment",
|
|
triggerDetail: "system",
|
|
startedAt,
|
|
processStartedAt: startedAt,
|
|
lastOutputAt,
|
|
lastOutputSeq: opts.withOutput ? 3 : 0,
|
|
lastOutputStream: opts.withOutput ? "stdout" : null,
|
|
contextSnapshot: { issueId },
|
|
stdoutExcerpt: "OPENAI_API_KEY=sk-test-secret-value should not leak",
|
|
logBytes: 0,
|
|
});
|
|
if (opts.logChunk) {
|
|
const store = getRunLogStore();
|
|
const handle = await store.begin({ companyId, agentId: coderId, runId });
|
|
const logBytes = await store.append(handle, {
|
|
stream: "stdout",
|
|
chunk: opts.logChunk,
|
|
ts: startedAt.toISOString(),
|
|
});
|
|
await db
|
|
.update(heartbeatRuns)
|
|
.set({
|
|
logStore: handle.store,
|
|
logRef: handle.logRef,
|
|
logBytes,
|
|
})
|
|
.where(eq(heartbeatRuns.id, runId));
|
|
}
|
|
await db.update(issues).set({ executionRunId: runId }).where(eq(issues.id, issueId));
|
|
if (opts.sameRunTerminalEvidence === "activity") {
|
|
await db.insert(activityLog).values({
|
|
companyId,
|
|
actorType: "agent",
|
|
actorId: coderId,
|
|
agentId: coderId,
|
|
runId,
|
|
action: "issue.updated",
|
|
entityType: "issue",
|
|
entityId: issueId,
|
|
details: {
|
|
identifier: `${issuePrefix}-1`,
|
|
status: sourceStatus,
|
|
_previous: { status: "in_progress" },
|
|
},
|
|
createdAt: terminalEvidenceAt,
|
|
});
|
|
} else if (opts.sameRunTerminalEvidence === "comment") {
|
|
await db.insert(issueComments).values({
|
|
companyId,
|
|
issueId,
|
|
authorAgentId: coderId,
|
|
authorType: "agent",
|
|
createdByRunId: runId,
|
|
body: "Completed and verified.",
|
|
createdAt: terminalEvidenceAt,
|
|
updatedAt: terminalEvidenceAt,
|
|
});
|
|
}
|
|
return { companyId, managerId, coderId, issueId, runId, issuePrefix };
|
|
}
|
|
|
|
it("creates one medium-priority evaluation issue for a suspicious silent run", async () => {
|
|
const now = new Date("2026-04-22T20:00:00.000Z");
|
|
const { companyId, managerId, runId } = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS + 60_000,
|
|
});
|
|
const heartbeat = heartbeatService(db);
|
|
|
|
const first = await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
const second = await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
|
|
expect(first.created).toBe(1);
|
|
expect(second.created).toBe(0);
|
|
expect(second.existing).toBe(1);
|
|
|
|
const evaluations = await db
|
|
.select()
|
|
.from(issues)
|
|
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stale_active_run_evaluation")));
|
|
expect(evaluations).toHaveLength(1);
|
|
expect(["todo", "in_progress"]).toContain(evaluations[0]?.status);
|
|
expect(evaluations[0]).toMatchObject({
|
|
priority: "medium",
|
|
assigneeAgentId: managerId,
|
|
assigneeAdapterOverrides: { modelProfile: "cheap" },
|
|
originId: runId,
|
|
originFingerprint: `stale_active_run:${companyId}:${runId}`,
|
|
});
|
|
expect(evaluations[0]?.description).toContain("Decision Checklist");
|
|
expect(evaluations[0]?.description).not.toContain("sk-test-secret-value");
|
|
});
|
|
|
|
it("redacts sensitive values from actual run-log evidence", async () => {
|
|
const now = new Date("2026-04-22T20:00:00.000Z");
|
|
const leakedJwt = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c";
|
|
const leakedGithubToken = "ghp_1234567890abcdefghijklmnopqrstuvwxyz";
|
|
const { companyId } = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS + 60_000,
|
|
logChunk: [
|
|
"Authorization: Bearer live-bearer-token-value",
|
|
`POST payload {"apiKey":"json-secret-value","token":"${leakedJwt}"}`,
|
|
`GITHUB_TOKEN=${leakedGithubToken}`,
|
|
].join("\n"),
|
|
});
|
|
const heartbeat = heartbeatService(db);
|
|
|
|
await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
|
|
const [evaluation] = await db
|
|
.select()
|
|
.from(issues)
|
|
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stale_active_run_evaluation")));
|
|
expect(evaluation?.description).toContain("***REDACTED***");
|
|
expect(evaluation?.description).not.toContain("live-bearer-token-value");
|
|
expect(evaluation?.description).not.toContain("json-secret-value");
|
|
expect(evaluation?.description).not.toContain(leakedJwt);
|
|
expect(evaluation?.description).not.toContain(leakedGithubToken);
|
|
});
|
|
|
|
it("raises critical stale-run evaluations and blocks the source issue", async () => {
|
|
const now = new Date("2026-04-22T20:00:00.000Z");
|
|
const { companyId, issueId } = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS + 60_000,
|
|
});
|
|
const heartbeat = heartbeatService(db);
|
|
|
|
const result = await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
|
|
expect(result.created).toBe(1);
|
|
const [evaluation] = await db
|
|
.select()
|
|
.from(issues)
|
|
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stale_active_run_evaluation")));
|
|
expect(evaluation?.priority).toBe("high");
|
|
|
|
const [blocker] = await db
|
|
.select()
|
|
.from(issueRelations)
|
|
.where(and(eq(issueRelations.companyId, companyId), eq(issueRelations.relatedIssueId, issueId)));
|
|
expect(blocker?.issueId).toBe(evaluation?.id);
|
|
|
|
const [source] = await db.select().from(issues).where(eq(issues.id, issueId));
|
|
expect(source?.status).toBe("blocked");
|
|
});
|
|
|
|
it("folds terminal source issues with same-run durable evidence instead of creating watchdog work", async () => {
|
|
const now = new Date("2026-04-22T20:00:00.000Z");
|
|
const { companyId, coderId, issueId, runId } = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS + 60_000,
|
|
sourceStatus: "done",
|
|
sameRunTerminalEvidence: "activity",
|
|
});
|
|
const heartbeat = heartbeatService(db);
|
|
|
|
const result = await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
|
|
expect(result).toMatchObject({ created: 0, folded: 1, skipped: 0 });
|
|
const evaluations = await db
|
|
.select()
|
|
.from(issues)
|
|
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stale_active_run_evaluation")));
|
|
expect(evaluations).toHaveLength(0);
|
|
|
|
const [run] = await db.select().from(heartbeatRuns).where(eq(heartbeatRuns.id, runId));
|
|
expect(run?.status).toBe("succeeded");
|
|
expect(run?.errorCode).toBeNull();
|
|
expect(run?.finishedAt?.toISOString()).toBe(now.toISOString());
|
|
expect(run?.resultJson).toMatchObject({
|
|
sourceResolvedWatchdogFold: {
|
|
sourceIssueId: issueId,
|
|
sourceIssueStatus: "done",
|
|
sameRunEvidenceKind: "activity",
|
|
evaluationIssueId: null,
|
|
evaluationIssueIdentifier: null,
|
|
cleanup: { outcome: "no_process_metadata" },
|
|
},
|
|
});
|
|
|
|
const [source] = await db.select().from(issues).where(eq(issues.id, issueId));
|
|
expect(source?.executionRunId).toBeNull();
|
|
const [agent] = await db.select().from(agents).where(eq(agents.id, coderId));
|
|
expect(agent?.status).toBe("idle");
|
|
const [decision] = await db
|
|
.select()
|
|
.from(heartbeatRunWatchdogDecisions)
|
|
.where(eq(heartbeatRunWatchdogDecisions.runId, runId));
|
|
expect(decision?.decision).toBe("dismissed_false_positive");
|
|
const [event] = await db
|
|
.select()
|
|
.from(heartbeatRunEvents)
|
|
.where(eq(heartbeatRunEvents.runId, runId));
|
|
expect(event?.message).toContain("Source-resolved watchdog fold");
|
|
});
|
|
|
|
it("still escalates terminal source issues without same-run terminal evidence", async () => {
|
|
const now = new Date("2026-04-22T20:00:00.000Z");
|
|
const { companyId, runId } = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS + 60_000,
|
|
sourceStatus: "done",
|
|
});
|
|
const heartbeat = heartbeatService(db);
|
|
|
|
const result = await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
|
|
expect(result).toMatchObject({ created: 1, folded: 0 });
|
|
const [run] = await db.select().from(heartbeatRuns).where(eq(heartbeatRuns.id, runId));
|
|
expect(run?.status).toBe("running");
|
|
const [evaluation] = await db
|
|
.select()
|
|
.from(issues)
|
|
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stale_active_run_evaluation")));
|
|
expect(evaluation?.originId).toBe(runId);
|
|
expect(evaluation?.parentId).toBeNull();
|
|
});
|
|
|
|
it("still escalates when a same-run comment is followed by another actor marking the source done", async () => {
|
|
const now = new Date("2026-04-22T20:00:00.000Z");
|
|
const { companyId, issueId, runId, issuePrefix } = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS + 60_000,
|
|
sourceStatus: "in_progress",
|
|
sameRunTerminalEvidence: "comment",
|
|
});
|
|
const completedAt = new Date(now.getTime() - 5 * 60_000);
|
|
await db
|
|
.update(issues)
|
|
.set({ status: "done", completedAt, updatedAt: completedAt })
|
|
.where(eq(issues.id, issueId));
|
|
await db.insert(activityLog).values({
|
|
companyId,
|
|
actorType: "user",
|
|
actorId: "board-user",
|
|
agentId: null,
|
|
runId: null,
|
|
action: "issue.updated",
|
|
entityType: "issue",
|
|
entityId: issueId,
|
|
details: {
|
|
identifier: `${issuePrefix}-1`,
|
|
status: "done",
|
|
_previous: { status: "in_progress" },
|
|
},
|
|
createdAt: completedAt,
|
|
});
|
|
const heartbeat = heartbeatService(db);
|
|
|
|
const result = await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
|
|
expect(result).toMatchObject({ created: 1, folded: 0 });
|
|
const [run] = await db.select().from(heartbeatRuns).where(eq(heartbeatRuns.id, runId));
|
|
expect(run?.status).toBe("running");
|
|
const [evaluation] = await db
|
|
.select()
|
|
.from(issues)
|
|
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stale_active_run_evaluation")));
|
|
expect(evaluation?.originId).toBe(runId);
|
|
expect(evaluation?.parentId).toBeNull();
|
|
});
|
|
|
|
it("folds existing evaluation and active watchdog recovery action idempotently", async () => {
|
|
const now = new Date("2026-04-22T20:00:00.000Z");
|
|
const { companyId, managerId, issueId, runId, issuePrefix } = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS + 60_000,
|
|
sourceStatus: "done",
|
|
sameRunTerminalEvidence: "activity",
|
|
});
|
|
const evaluationIssueId = randomUUID();
|
|
await db.insert(issues).values({
|
|
id: evaluationIssueId,
|
|
companyId,
|
|
title: "Existing stale evaluation",
|
|
status: "todo",
|
|
priority: "high",
|
|
assigneeAgentId: managerId,
|
|
issueNumber: 2,
|
|
identifier: `${issuePrefix}-2`,
|
|
originKind: "stale_active_run_evaluation",
|
|
originId: runId,
|
|
originRunId: runId,
|
|
originFingerprint: `stale_active_run:${companyId}:${runId}`,
|
|
});
|
|
await db.insert(issueRelations).values({
|
|
companyId,
|
|
issueId: evaluationIssueId,
|
|
relatedIssueId: issueId,
|
|
type: "blocks",
|
|
});
|
|
await db.insert(issueRecoveryActions).values({
|
|
companyId,
|
|
sourceIssueId: issueId,
|
|
recoveryIssueId: evaluationIssueId,
|
|
kind: "active_run_watchdog",
|
|
status: "active",
|
|
ownerType: "agent",
|
|
ownerAgentId: managerId,
|
|
cause: "active_run_watchdog",
|
|
fingerprint: `active-run-watchdog:${companyId}:${runId}:${issueId}`,
|
|
evidence: { runId },
|
|
nextAction: "Review stale active run",
|
|
});
|
|
const heartbeat = heartbeatService(db);
|
|
|
|
const first = await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
const second = await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
|
|
expect(first).toMatchObject({ created: 0, folded: 1 });
|
|
expect(second).toMatchObject({ scanned: 0, created: 0, folded: 0 });
|
|
const [evaluation] = await db.select().from(issues).where(eq(issues.id, evaluationIssueId));
|
|
expect(evaluation?.status).toBe("done");
|
|
const [run] = await db.select().from(heartbeatRuns).where(eq(heartbeatRuns.id, runId));
|
|
expect(run?.resultJson).toMatchObject({
|
|
sourceResolvedWatchdogFold: {
|
|
sourceIssueId: issueId,
|
|
sourceIssueStatus: "done",
|
|
evaluationIssueId,
|
|
evaluationIssueIdentifier: `${issuePrefix}-2`,
|
|
},
|
|
});
|
|
const [action] = await db.select().from(issueRecoveryActions).where(eq(issueRecoveryActions.sourceIssueId, issueId));
|
|
expect(action?.status).toBe("resolved");
|
|
expect(action?.outcome).toBe("false_positive");
|
|
const decisions = await db
|
|
.select()
|
|
.from(heartbeatRunWatchdogDecisions)
|
|
.where(eq(heartbeatRunWatchdogDecisions.runId, runId));
|
|
expect(decisions).toHaveLength(1);
|
|
});
|
|
|
|
it("refuses recovery-on-recovery stale-run recursion", async () => {
|
|
const now = new Date("2026-04-22T20:00:00.000Z");
|
|
const { companyId } = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS + 60_000,
|
|
sourceOriginKind: "stale_active_run_evaluation",
|
|
});
|
|
const heartbeat = heartbeatService(db);
|
|
|
|
const result = await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
|
|
expect(result).toMatchObject({ created: 0, skipped: 1 });
|
|
const evaluations = await db
|
|
.select()
|
|
.from(issues)
|
|
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stale_active_run_evaluation")));
|
|
expect(evaluations).toHaveLength(1);
|
|
});
|
|
|
|
it("skips snoozed runs and healthy noisy runs", async () => {
|
|
const now = new Date("2026-04-22T20:00:00.000Z");
|
|
const stale = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS + 60_000,
|
|
});
|
|
const noisy = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS + 60_000,
|
|
withOutput: true,
|
|
});
|
|
await db.insert(heartbeatRunWatchdogDecisions).values({
|
|
companyId: stale.companyId,
|
|
runId: stale.runId,
|
|
decision: "snooze",
|
|
snoozedUntil: new Date(now.getTime() + 60 * 60 * 1000),
|
|
reason: "Intentional quiet run",
|
|
});
|
|
const heartbeat = heartbeatService(db);
|
|
|
|
const staleResult = await heartbeat.scanSilentActiveRuns({ now, companyId: stale.companyId });
|
|
const noisyResult = await heartbeat.scanSilentActiveRuns({ now, companyId: noisy.companyId });
|
|
|
|
expect(staleResult).toMatchObject({ created: 0, snoozed: 1 });
|
|
expect(noisyResult).toMatchObject({ scanned: 0, created: 0 });
|
|
});
|
|
|
|
it("records watchdog decisions through recovery owner authorization", async () => {
|
|
const now = new Date("2026-04-22T20:00:00.000Z");
|
|
const { companyId, managerId, runId } = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS + 60_000,
|
|
});
|
|
const heartbeat = heartbeatService(db);
|
|
const recovery = recoveryService(db, { enqueueWakeup: vi.fn() });
|
|
|
|
const scan = await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
const evaluationIssueId = scan.evaluationIssueIds[0];
|
|
expect(evaluationIssueId).toBeTruthy();
|
|
|
|
await expect(
|
|
recovery.recordWatchdogDecision({
|
|
runId,
|
|
actor: { type: "agent", agentId: randomUUID() },
|
|
decision: "continue",
|
|
evaluationIssueId,
|
|
reason: "not my recovery issue",
|
|
}),
|
|
).rejects.toMatchObject({ status: 403 });
|
|
|
|
const snoozedUntil = new Date(now.getTime() + 60 * 60 * 1000);
|
|
const decision = await recovery.recordWatchdogDecision({
|
|
runId,
|
|
actor: { type: "agent", agentId: managerId },
|
|
decision: "snooze",
|
|
evaluationIssueId,
|
|
reason: "Long compile with no output",
|
|
snoozedUntil,
|
|
});
|
|
|
|
expect(decision).toMatchObject({
|
|
runId,
|
|
evaluationIssueId,
|
|
decision: "snooze",
|
|
createdByAgentId: managerId,
|
|
});
|
|
await expect(recovery.buildRunOutputSilence({
|
|
id: runId,
|
|
companyId,
|
|
status: "running",
|
|
lastOutputAt: null,
|
|
lastOutputSeq: 0,
|
|
lastOutputStream: null,
|
|
processStartedAt: new Date(now.getTime() - ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS - 60_000),
|
|
startedAt: new Date(now.getTime() - ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS - 60_000),
|
|
createdAt: new Date(now.getTime() - ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS - 60_000),
|
|
}, now)).resolves.toMatchObject({
|
|
level: "snoozed",
|
|
snoozedUntil,
|
|
evaluationIssueId,
|
|
});
|
|
});
|
|
|
|
it("re-arms continue decisions after the default quiet window", async () => {
|
|
const now = new Date("2026-04-22T20:00:00.000Z");
|
|
const { companyId, managerId, runId } = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS + 60_000,
|
|
});
|
|
const heartbeat = heartbeatService(db);
|
|
const recovery = recoveryService(db, { enqueueWakeup: vi.fn() });
|
|
|
|
const scan = await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
const evaluationIssueId = scan.evaluationIssueIds[0];
|
|
expect(evaluationIssueId).toBeTruthy();
|
|
|
|
const decision = await recovery.recordWatchdogDecision({
|
|
runId,
|
|
actor: { type: "agent", agentId: managerId },
|
|
decision: "continue",
|
|
evaluationIssueId,
|
|
reason: "Current evidence is acceptable; keep watching.",
|
|
now,
|
|
});
|
|
const rearmAt = new Date(now.getTime() + ACTIVE_RUN_OUTPUT_CONTINUE_REARM_MS);
|
|
expect(decision).toMatchObject({
|
|
runId,
|
|
evaluationIssueId,
|
|
decision: "continue",
|
|
createdByAgentId: managerId,
|
|
});
|
|
expect(decision.snoozedUntil?.toISOString()).toBe(rearmAt.toISOString());
|
|
|
|
await db.update(issues).set({ status: "done" }).where(eq(issues.id, evaluationIssueId));
|
|
|
|
const beforeRearm = await heartbeat.scanSilentActiveRuns({
|
|
now: new Date(rearmAt.getTime() - 60_000),
|
|
companyId,
|
|
});
|
|
expect(beforeRearm).toMatchObject({ created: 0, snoozed: 1 });
|
|
|
|
const afterRearm = await heartbeat.scanSilentActiveRuns({
|
|
now: new Date(rearmAt.getTime() + 60_000),
|
|
companyId,
|
|
});
|
|
expect(afterRearm.created).toBe(1);
|
|
expect(afterRearm.evaluationIssueIds[0]).not.toBe(evaluationIssueId);
|
|
|
|
const evaluations = await db
|
|
.select()
|
|
.from(issues)
|
|
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "stale_active_run_evaluation")));
|
|
expect(evaluations.filter((issue) => !["done", "cancelled"].includes(issue.status))).toHaveLength(1);
|
|
});
|
|
|
|
it("rejects agent watchdog decisions using issues not bound to the target run", async () => {
|
|
const now = new Date("2026-04-22T20:00:00.000Z");
|
|
const { companyId, managerId, coderId, runId, issuePrefix } = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS + 60_000,
|
|
});
|
|
const heartbeat = heartbeatService(db);
|
|
const recovery = recoveryService(db, { enqueueWakeup: vi.fn() });
|
|
|
|
const scan = await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
const evaluationIssueId = scan.evaluationIssueIds[0];
|
|
expect(evaluationIssueId).toBeTruthy();
|
|
|
|
const unrelatedIssueId = randomUUID();
|
|
await db.insert(issues).values({
|
|
id: unrelatedIssueId,
|
|
companyId,
|
|
title: "Assigned but unrelated",
|
|
status: "todo",
|
|
priority: "medium",
|
|
assigneeAgentId: managerId,
|
|
issueNumber: 20,
|
|
identifier: `${issuePrefix}-20`,
|
|
});
|
|
|
|
const otherRunId = randomUUID();
|
|
const otherEvaluationIssueId = randomUUID();
|
|
await db.insert(heartbeatRuns).values({
|
|
id: otherRunId,
|
|
companyId,
|
|
agentId: coderId,
|
|
status: "running",
|
|
invocationSource: "assignment",
|
|
triggerDetail: "system",
|
|
startedAt: new Date(now.getTime() - ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS - 120_000),
|
|
processStartedAt: new Date(now.getTime() - ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS - 120_000),
|
|
lastOutputAt: null,
|
|
lastOutputSeq: 0,
|
|
lastOutputStream: null,
|
|
contextSnapshot: {},
|
|
logBytes: 0,
|
|
});
|
|
await db.insert(issues).values({
|
|
id: otherEvaluationIssueId,
|
|
companyId,
|
|
title: "Other run evaluation",
|
|
status: "todo",
|
|
priority: "medium",
|
|
assigneeAgentId: managerId,
|
|
issueNumber: 21,
|
|
identifier: `${issuePrefix}-21`,
|
|
originKind: "stale_active_run_evaluation",
|
|
originId: otherRunId,
|
|
originFingerprint: `stale_active_run:${companyId}:${otherRunId}`,
|
|
});
|
|
|
|
const attempts = [
|
|
{ decision: "continue" as const, evaluationIssueId: unrelatedIssueId },
|
|
{ decision: "dismissed_false_positive" as const, evaluationIssueId: unrelatedIssueId },
|
|
{
|
|
decision: "snooze" as const,
|
|
evaluationIssueId: unrelatedIssueId,
|
|
snoozedUntil: new Date(now.getTime() + 60 * 60 * 1000),
|
|
},
|
|
{ decision: "continue" as const, evaluationIssueId: otherEvaluationIssueId },
|
|
];
|
|
|
|
for (const attempt of attempts) {
|
|
await expect(
|
|
recovery.recordWatchdogDecision({
|
|
runId,
|
|
actor: { type: "agent", agentId: managerId },
|
|
reason: "malicious or stale binding",
|
|
...attempt,
|
|
}),
|
|
).rejects.toMatchObject({ status: 403 });
|
|
}
|
|
|
|
await db.update(issues).set({ status: "done" }).where(eq(issues.id, evaluationIssueId));
|
|
await expect(
|
|
recovery.recordWatchdogDecision({
|
|
runId,
|
|
actor: { type: "agent", agentId: managerId },
|
|
decision: "continue",
|
|
evaluationIssueId,
|
|
reason: "closed evaluation should not authorize",
|
|
}),
|
|
).rejects.toMatchObject({ status: 403 });
|
|
});
|
|
|
|
it("validates createdByRunId before storing watchdog decisions", async () => {
|
|
const now = new Date("2026-04-22T20:00:00.000Z");
|
|
const { companyId, managerId, runId } = await seedRunningRun({
|
|
now,
|
|
ageMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS + 60_000,
|
|
});
|
|
const heartbeat = heartbeatService(db);
|
|
const recovery = recoveryService(db, { enqueueWakeup: vi.fn() });
|
|
|
|
const scan = await heartbeat.scanSilentActiveRuns({ now, companyId });
|
|
const evaluationIssueId = scan.evaluationIssueIds[0];
|
|
expect(evaluationIssueId).toBeTruthy();
|
|
|
|
await expect(
|
|
recovery.recordWatchdogDecision({
|
|
runId,
|
|
actor: { type: "agent", agentId: managerId },
|
|
decision: "continue",
|
|
evaluationIssueId,
|
|
reason: "client supplied another agent run",
|
|
createdByRunId: runId,
|
|
}),
|
|
).rejects.toMatchObject({ status: 403 });
|
|
|
|
const managerRunId = randomUUID();
|
|
await db.insert(heartbeatRuns).values({
|
|
id: managerRunId,
|
|
companyId,
|
|
agentId: managerId,
|
|
status: "running",
|
|
invocationSource: "assignment",
|
|
triggerDetail: "system",
|
|
startedAt: now,
|
|
processStartedAt: now,
|
|
lastOutputAt: now,
|
|
lastOutputSeq: 1,
|
|
lastOutputStream: "stdout",
|
|
contextSnapshot: {},
|
|
logBytes: 0,
|
|
});
|
|
|
|
const decision = await recovery.recordWatchdogDecision({
|
|
runId,
|
|
actor: { type: "agent", agentId: managerId, runId: managerRunId },
|
|
decision: "continue",
|
|
evaluationIssueId,
|
|
reason: "valid current actor run",
|
|
createdByRunId: randomUUID(),
|
|
});
|
|
expect(decision.createdByRunId).toBe(managerRunId);
|
|
});
|
|
});
|