[codex] Add runtime lifecycle recovery and live issue visibility (#4419)

This commit is contained in:
Dotta
2026-04-24 15:50:32 -05:00
committed by GitHub
parent 9a8d219949
commit 5a0c1979cf
121 changed files with 9625 additions and 2044 deletions
@@ -4,13 +4,16 @@ import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from "vitest
import {
activityLog,
agents,
agentWakeupRequests,
companies,
createDb,
executionWorkspaces,
heartbeatRuns,
issueComments,
issueRelations,
issueTreeHolds,
issues,
projects,
projectWorkspaces,
} from "@paperclipai/db";
import {
getEmbeddedPostgresTestSupport,
@@ -55,6 +58,7 @@ vi.mock("../adapters/index.ts", async () => {
});
import { heartbeatService } from "../services/heartbeat.ts";
import { instanceSettingsService } from "../services/instance-settings.ts";
import { runningProcesses } from "../adapters/index.ts";
const embeddedPostgresSupport = await getEmbeddedPostgresTestSupport();
@@ -94,13 +98,23 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
}
await new Promise((resolve) => setTimeout(resolve, 50));
await db.execute(sql.raw(`TRUNCATE TABLE "companies" CASCADE`));
await instanceSettingsService(db).updateExperimental({
enableIssueGraphLivenessAutoRecovery: false,
enableIsolatedWorkspaces: false,
});
});
afterAll(async () => {
await tempDb?.cleanup();
});
async function seedBlockedChain() {
async function enableAutoRecovery() {
await instanceSettingsService(db).updateExperimental({
enableIssueGraphLivenessAutoRecovery: true,
});
}
async function seedBlockedChain(opts: { stale?: boolean } = {}) {
const companyId = randomUUID();
const managerId = randomUUID();
const coderId = randomUUID();
@@ -124,7 +138,7 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
status: "idle",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {},
runtimeConfig: { heartbeat: { wakeOnDemand: false } },
permissions: {},
},
{
@@ -136,11 +150,14 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
reportsTo: managerId,
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {},
runtimeConfig: { heartbeat: { wakeOnDemand: false } },
permissions: {},
},
]);
const issueTimestamp = opts.stale === false
? new Date()
: new Date(Date.now() - 25 * 60 * 60 * 1000);
await db.insert(issues).values([
{
id: blockedIssueId,
@@ -151,6 +168,8 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
assigneeAgentId: coderId,
issueNumber: 1,
identifier: `${issuePrefix}-1`,
createdAt: issueTimestamp,
updatedAt: issueTimestamp,
},
{
id: blockerIssueId,
@@ -160,6 +179,8 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
priority: "medium",
issueNumber: 2,
identifier: `${issuePrefix}-2`,
createdAt: issueTimestamp,
updatedAt: issueTimestamp,
},
]);
@@ -173,7 +194,91 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
return { companyId, managerId, blockedIssueId, blockerIssueId };
}
it("creates one manager escalation, preserves blockers, and wakes the assignee", async () => {
it("keeps liveness findings advisory when auto recovery is disabled", async () => {
const { companyId } = await seedBlockedChain();
const heartbeat = heartbeatService(db);
const result = await heartbeat.reconcileIssueGraphLiveness();
expect(result.findings).toBe(1);
expect(result.autoRecoveryEnabled).toBe(false);
expect(result.escalationsCreated).toBe(0);
expect(result.skippedAutoRecoveryDisabled).toBe(1);
const escalations = await db
.select()
.from(issues)
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "harness_liveness_escalation")));
expect(escalations).toHaveLength(0);
});
it("does not create recovery issues until the dependency path is stale for 24 hours", async () => {
await enableAutoRecovery();
const { companyId } = await seedBlockedChain({ stale: false });
const heartbeat = heartbeatService(db);
const result = await heartbeat.reconcileIssueGraphLiveness();
expect(result.findings).toBe(1);
expect(result.escalationsCreated).toBe(0);
expect(result.skippedAutoRecoveryTooYoung).toBe(1);
const escalations = await db
.select()
.from(issues)
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "harness_liveness_escalation")));
expect(escalations).toHaveLength(0);
});
it("suppresses liveness escalation when the source issue is under an active pause hold", async () => {
await enableAutoRecovery();
const { companyId, blockedIssueId } = await seedBlockedChain();
await db.insert(issueTreeHolds).values({
companyId,
rootIssueId: blockedIssueId,
mode: "pause",
status: "active",
reason: "pause liveness recovery subtree",
releasePolicy: { strategy: "manual" },
});
const result = await heartbeatService(db).reconcileIssueGraphLiveness();
expect(result.findings).toBe(1);
expect(result.escalationsCreated).toBe(0);
expect(result.existingEscalations).toBe(0);
expect(result.skipped).toBe(1);
const escalations = await db
.select()
.from(issues)
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "harness_liveness_escalation")));
expect(escalations).toHaveLength(0);
});
it("treats an active executionRunId on the leaf blocker as a live execution path", async () => {
await enableAutoRecovery();
const { companyId, managerId, blockedIssueId, blockerIssueId } = await seedBlockedChain();
const runId = randomUUID();
await db.insert(heartbeatRuns).values({
id: runId,
companyId,
agentId: managerId,
status: "running",
contextSnapshot: { issueId: blockedIssueId },
});
await db.update(issues).set({ executionRunId: runId }).where(eq(issues.id, blockerIssueId));
const heartbeat = heartbeatService(db);
const result = await heartbeat.reconcileIssueGraphLiveness();
expect(result.findings).toBe(0);
expect(result.escalationsCreated).toBe(0);
});
it("creates one manager escalation, preserves blockers, and records owner selection", async () => {
await enableAutoRecovery();
const { companyId, managerId, blockedIssueId, blockerIssueId } = await seedBlockedChain();
const heartbeat = heartbeatService(db);
@@ -182,7 +287,6 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
expect(first.escalationsCreated).toBe(1);
expect(second.escalationsCreated).toBe(0);
expect(second.existingEscalations).toBe(1);
const escalations = await db
.select()
@@ -195,9 +299,15 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
);
expect(escalations).toHaveLength(1);
expect(escalations[0]).toMatchObject({
parentId: blockedIssueId,
parentId: blockerIssueId,
assigneeAgentId: managerId,
status: expect.stringMatching(/^(todo|in_progress|done)$/),
originFingerprint: [
"harness_liveness_leaf",
companyId,
"blocked_by_unassigned_issue",
blockerIssueId,
].join(":"),
});
const blockers = await db
@@ -213,15 +323,217 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
expect(comments[0]?.body).toContain("harness-level liveness incident");
expect(comments[0]?.body).toContain(escalations[0]?.identifier ?? escalations[0]!.id);
const wakes = await db.select().from(agentWakeupRequests).where(eq(agentWakeupRequests.agentId, managerId));
expect(wakes.some((wake) => wake.reason === "issue_assigned")).toBe(true);
const events = await db.select().from(activityLog).where(eq(activityLog.companyId, companyId));
expect(events.some((event) => event.action === "issue.harness_liveness_escalation_created")).toBe(true);
const createdEvent = events.find((event) => event.action === "issue.harness_liveness_escalation_created");
expect(createdEvent).toBeTruthy();
expect(createdEvent?.details).toMatchObject({
recoveryIssueId: blockerIssueId,
ownerSelection: {
selectedAgentId: managerId,
selectedReason: "root_agent",
selectedSourceIssueId: blockerIssueId,
},
workspaceSelection: {
reuseRecoveryExecutionWorkspace: false,
inheritedExecutionWorkspaceFromIssueId: null,
projectWorkspaceSourceIssueId: blockerIssueId,
},
});
expect(events.some((event) => event.action === "issue.blockers.updated")).toBe(true);
});
it("parents recovery under the leaf blocker without inheriting dependent or blocker execution state for manager-owned recovery", async () => {
await enableAutoRecovery();
await instanceSettingsService(db).updateExperimental({ enableIsolatedWorkspaces: true });
const companyId = randomUUID();
const managerId = randomUUID();
const blockedIssueId = randomUUID();
const blockerIssueId = randomUUID();
const dependentProjectId = randomUUID();
const blockerProjectId = randomUUID();
const dependentProjectWorkspaceId = randomUUID();
const blockerProjectWorkspaceId = randomUUID();
const dependentExecutionWorkspaceId = randomUUID();
const blockerExecutionWorkspaceId = randomUUID();
const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
const issueTimestamp = new Date(Date.now() - 25 * 60 * 60 * 1000);
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix,
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values({
id: managerId,
companyId,
name: "Root Operator",
role: "operator",
status: "idle",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: { heartbeat: { wakeOnDemand: false } },
permissions: {},
});
await db.insert(projects).values([
{
id: dependentProjectId,
companyId,
name: "Dependent workspace project",
status: "in_progress",
},
{
id: blockerProjectId,
companyId,
name: "Blocker workspace project",
status: "in_progress",
},
]);
await db.insert(projectWorkspaces).values([
{
id: dependentProjectWorkspaceId,
companyId,
projectId: dependentProjectId,
name: "Dependent primary",
},
{
id: blockerProjectWorkspaceId,
companyId,
projectId: blockerProjectId,
name: "Blocker primary",
},
]);
await db.insert(executionWorkspaces).values([
{
id: dependentExecutionWorkspaceId,
companyId,
projectId: dependentProjectId,
projectWorkspaceId: dependentProjectWorkspaceId,
mode: "operator_branch",
strategyType: "git_worktree",
name: "Dependent branch",
status: "active",
providerType: "git_worktree",
},
{
id: blockerExecutionWorkspaceId,
companyId,
projectId: blockerProjectId,
projectWorkspaceId: blockerProjectWorkspaceId,
mode: "operator_branch",
strategyType: "git_worktree",
name: "Blocker branch",
status: "active",
providerType: "git_worktree",
},
]);
await db.insert(issues).values([
{
id: blockedIssueId,
companyId,
projectId: dependentProjectId,
projectWorkspaceId: dependentProjectWorkspaceId,
executionWorkspaceId: dependentExecutionWorkspaceId,
executionWorkspacePreference: "reuse_existing",
executionWorkspaceSettings: { mode: "operator_branch" },
title: "Blocked dependent",
status: "blocked",
priority: "medium",
issueNumber: 1,
identifier: `${issuePrefix}-1`,
createdAt: issueTimestamp,
updatedAt: issueTimestamp,
},
{
id: blockerIssueId,
companyId,
projectId: blockerProjectId,
projectWorkspaceId: blockerProjectWorkspaceId,
executionWorkspaceId: blockerExecutionWorkspaceId,
executionWorkspacePreference: "reuse_existing",
executionWorkspaceSettings: { mode: "operator_branch" },
title: "Unassigned leaf blocker",
status: "todo",
priority: "medium",
issueNumber: 2,
identifier: `${issuePrefix}-2`,
createdAt: issueTimestamp,
updatedAt: issueTimestamp,
},
]);
await db.insert(issueRelations).values({
companyId,
issueId: blockerIssueId,
relatedIssueId: blockedIssueId,
type: "blocks",
});
const result = await heartbeatService(db).reconcileIssueGraphLiveness();
expect(result.escalationsCreated).toBe(1);
const escalations = await db
.select()
.from(issues)
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "harness_liveness_escalation")));
expect(escalations).toHaveLength(1);
expect(escalations[0]).toMatchObject({
parentId: blockerIssueId,
projectId: blockerProjectId,
projectWorkspaceId: blockerProjectWorkspaceId,
executionWorkspaceId: null,
executionWorkspacePreference: null,
assigneeAgentId: managerId,
});
});
it("reuses one open recovery issue for multiple dependents with the same leaf blocker", async () => {
await enableAutoRecovery();
const { companyId, blockedIssueId, blockerIssueId } = await seedBlockedChain();
const secondBlockedIssueId = randomUUID();
const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
const issueTimestamp = new Date(Date.now() - 25 * 60 * 60 * 1000);
await db.insert(issues).values({
id: secondBlockedIssueId,
companyId,
title: "Second blocked parent",
status: "blocked",
priority: "medium",
issueNumber: 3,
identifier: `${issuePrefix}-3`,
createdAt: issueTimestamp,
updatedAt: issueTimestamp,
});
await db.insert(issueRelations).values({
companyId,
issueId: blockerIssueId,
relatedIssueId: secondBlockedIssueId,
type: "blocks",
});
const heartbeat = heartbeatService(db);
const result = await heartbeat.reconcileIssueGraphLiveness();
expect(result.findings).toBe(2);
expect(result.escalationsCreated).toBe(1);
expect(result.existingEscalations).toBe(1);
const escalations = await db
.select()
.from(issues)
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, "harness_liveness_escalation")));
expect(escalations).toHaveLength(1);
const blockers = await db
.select({ blockedIssueId: issueRelations.relatedIssueId })
.from(issueRelations)
.where(and(eq(issueRelations.companyId, companyId), eq(issueRelations.issueId, escalations[0]!.id)));
expect(blockers.map((row) => row.blockedIssueId).sort()).toEqual(
[blockedIssueId, secondBlockedIssueId].sort(),
);
});
it("creates a fresh escalation when the previous matching escalation is terminal", async () => {
await enableAutoRecovery();
const { companyId, managerId, blockedIssueId, blockerIssueId } = await seedBlockedChain();
const heartbeat = heartbeatService(db);
const incidentKey = [
@@ -265,7 +577,7 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
expect(openEscalations).toHaveLength(2);
const freshEscalation = openEscalations.find((issue) => issue.status !== "done");
expect(freshEscalation).toMatchObject({
parentId: blockedIssueId,
parentId: blockerIssueId,
assigneeAgentId: managerId,
status: expect.stringMatching(/^(todo|in_progress|done)$/),
});