import { randomUUID } from "node:crypto"; import { eq, sql } from "drizzle-orm"; import { afterAll, afterEach, beforeAll, describe, expect, it } from "vitest"; import { agents, agentWakeupRequests, companies, createDb, heartbeatRunEvents, heartbeatRuns, } from "@paperclipai/db"; import { getEmbeddedPostgresTestSupport, startEmbeddedPostgresTestDatabase, } from "./helpers/embedded-postgres.js"; import { BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS, heartbeatService, } from "../services/heartbeat.ts"; const embeddedPostgresSupport = await getEmbeddedPostgresTestSupport(); const describeEmbeddedPostgres = embeddedPostgresSupport.supported ? describe : describe.skip; if (!embeddedPostgresSupport.supported) { console.warn( `Skipping embedded Postgres heartbeat retry scheduling tests on this host: ${embeddedPostgresSupport.reason ?? "unsupported environment"}`, ); } describeEmbeddedPostgres("heartbeat bounded retry scheduling", () => { let db!: ReturnType; let heartbeat!: ReturnType; let tempDb: Awaited> | null = null; beforeAll(async () => { tempDb = await startEmbeddedPostgresTestDatabase("paperclip-heartbeat-retry-scheduling-"); db = createDb(tempDb.connectionString); heartbeat = heartbeatService(db); }, 20_000); afterEach(async () => { await db.delete(heartbeatRunEvents); await db.delete(heartbeatRuns); await db.delete(agentWakeupRequests); await db.delete(agents); await db.delete(companies); }); afterAll(async () => { await tempDb?.cleanup(); }); async function seedRetryFixture(input: { runId: string; companyId: string; agentId: string; now: Date; errorCode: string; scheduledRetryAttempt?: number; }) { await db.insert(companies).values({ id: input.companyId, name: "Paperclip", issuePrefix: `T${input.companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`, requireBoardApprovalForNewAgents: false, }); await db.insert(agents).values({ id: input.agentId, companyId: input.companyId, name: "CodexCoder", role: "engineer", status: "active", adapterType: "codex_local", adapterConfig: {}, runtimeConfig: { heartbeat: { wakeOnDemand: true, maxConcurrentRuns: 1, }, }, permissions: {}, }); await db.insert(heartbeatRuns).values({ id: input.runId, companyId: input.companyId, agentId: input.agentId, invocationSource: "assignment", status: "failed", error: "upstream overload", errorCode: input.errorCode, finishedAt: input.now, scheduledRetryAttempt: input.scheduledRetryAttempt ?? 0, scheduledRetryReason: input.scheduledRetryAttempt ? "transient_failure" : null, contextSnapshot: { issueId: randomUUID(), wakeReason: "issue_assigned", }, updatedAt: input.now, createdAt: input.now, }); } it("schedules a retry with durable metadata and only promotes it when due", async () => { const companyId = randomUUID(); const agentId = randomUUID(); const sourceRunId = randomUUID(); const now = new Date("2026-04-20T12:00:00.000Z"); await db.insert(companies).values({ id: companyId, name: "Paperclip", issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`, requireBoardApprovalForNewAgents: false, }); await db.insert(agents).values({ id: agentId, companyId, name: "CodexCoder", role: "engineer", status: "active", adapterType: "codex_local", adapterConfig: {}, runtimeConfig: { heartbeat: { wakeOnDemand: true, maxConcurrentRuns: 1, }, }, permissions: {}, }); await db.insert(heartbeatRuns).values({ id: sourceRunId, companyId, agentId, invocationSource: "assignment", status: "failed", error: "upstream overload", errorCode: "adapter_failed", finishedAt: now, contextSnapshot: { issueId: randomUUID(), wakeReason: "issue_assigned", }, updatedAt: now, createdAt: now, }); const scheduled = await heartbeat.scheduleBoundedRetry(sourceRunId, { now, random: () => 0.5, }); expect(scheduled.outcome).toBe("scheduled"); if (scheduled.outcome !== "scheduled") return; const expectedDueAt = new Date(now.getTime() + BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS[0]); expect(scheduled.attempt).toBe(1); expect(scheduled.dueAt.toISOString()).toBe(expectedDueAt.toISOString()); const retryRun = await db .select() .from(heartbeatRuns) .where(eq(heartbeatRuns.id, scheduled.run.id)) .then((rows) => rows[0] ?? null); expect(retryRun).toMatchObject({ status: "scheduled_retry", retryOfRunId: sourceRunId, scheduledRetryAttempt: 1, scheduledRetryReason: "transient_failure", }); expect(retryRun?.scheduledRetryAt?.toISOString()).toBe(expectedDueAt.toISOString()); const earlyPromotion = await heartbeat.promoteDueScheduledRetries(new Date("2026-04-20T12:01:59.000Z")); expect(earlyPromotion).toEqual({ promoted: 0, runIds: [] }); const stillScheduled = await db .select({ status: heartbeatRuns.status }) .from(heartbeatRuns) .where(eq(heartbeatRuns.id, scheduled.run.id)) .then((rows) => rows[0] ?? null); expect(stillScheduled?.status).toBe("scheduled_retry"); const duePromotion = await heartbeat.promoteDueScheduledRetries(expectedDueAt); expect(duePromotion).toEqual({ promoted: 1, runIds: [scheduled.run.id] }); const promotedRun = await db .select({ status: heartbeatRuns.status }) .from(heartbeatRuns) .where(eq(heartbeatRuns.id, scheduled.run.id)) .then((rows) => rows[0] ?? null); expect(promotedRun?.status).toBe("queued"); }); it("exhausts bounded retries after the hard cap", async () => { const companyId = randomUUID(); const agentId = randomUUID(); const cappedRunId = randomUUID(); const now = new Date("2026-04-20T18:00:00.000Z"); await db.insert(companies).values({ id: companyId, name: "Paperclip", issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`, requireBoardApprovalForNewAgents: false, }); await db.insert(agents).values({ id: agentId, companyId, name: "CodexCoder", role: "engineer", status: "active", adapterType: "codex_local", adapterConfig: {}, runtimeConfig: { heartbeat: { wakeOnDemand: true, maxConcurrentRuns: 1, }, }, permissions: {}, }); await db.insert(heartbeatRuns).values({ id: cappedRunId, companyId, agentId, invocationSource: "automation", status: "failed", error: "still transient", errorCode: "adapter_failed", finishedAt: now, scheduledRetryAttempt: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS.length, scheduledRetryReason: "transient_failure", contextSnapshot: { wakeReason: "transient_failure_retry", }, updatedAt: now, createdAt: now, }); const exhausted = await heartbeat.scheduleBoundedRetry(cappedRunId, { now, random: () => 0.5, }); expect(exhausted).toEqual({ outcome: "retry_exhausted", attempt: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS.length + 1, maxAttempts: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS.length, }); const runCount = await db .select({ count: sql`count(*)::int` }) .from(heartbeatRuns) .where(eq(heartbeatRuns.companyId, companyId)) .then((rows) => rows[0]?.count ?? 0); expect(runCount).toBe(1); const exhaustionEvent = await db .select({ message: heartbeatRunEvents.message, payload: heartbeatRunEvents.payload, }) .from(heartbeatRunEvents) .where(eq(heartbeatRunEvents.runId, cappedRunId)) .orderBy(sql`${heartbeatRunEvents.id} desc`) .then((rows) => rows[0] ?? null); expect(exhaustionEvent?.message).toContain("Bounded retry exhausted"); expect(exhaustionEvent?.payload).toMatchObject({ retryReason: "transient_failure", scheduledRetryAttempt: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS.length, maxAttempts: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_DELAYS_MS.length, }); }); it("advances codex transient fallback stages across bounded retry attempts", async () => { const fallbackModes = [ "same_session", "safer_invocation", "fresh_session", "fresh_session_safer_invocation", ] as const; for (const [index, expectedMode] of fallbackModes.entries()) { const companyId = randomUUID(); const agentId = randomUUID(); const runId = randomUUID(); const now = new Date(`2026-04-20T1${index}:00:00.000Z`); await seedRetryFixture({ runId, companyId, agentId, now, errorCode: "codex_transient_upstream", scheduledRetryAttempt: index, }); const scheduled = await heartbeat.scheduleBoundedRetry(runId, { now, random: () => 0.5, }); expect(scheduled.outcome).toBe("scheduled"); if (scheduled.outcome !== "scheduled") continue; const retryRun = await db .select({ contextSnapshot: heartbeatRuns.contextSnapshot, wakeupRequestId: heartbeatRuns.wakeupRequestId, }) .from(heartbeatRuns) .where(eq(heartbeatRuns.id, scheduled.run.id)) .then((rows) => rows[0] ?? null); expect((retryRun?.contextSnapshot as Record | null)?.codexTransientFallbackMode).toBe(expectedMode); const wakeupRequest = await db .select({ payload: agentWakeupRequests.payload }) .from(agentWakeupRequests) .where(eq(agentWakeupRequests.id, retryRun?.wakeupRequestId ?? "")) .then((rows) => rows[0] ?? null); expect((wakeupRequest?.payload as Record | null)?.codexTransientFallbackMode).toBe(expectedMode); await db.delete(heartbeatRunEvents); await db.delete(heartbeatRuns); await db.delete(agentWakeupRequests); await db.delete(agents); await db.delete(companies); } }); });