[codex] harden heartbeat run summaries and recovery context (#3742)

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies
> - Heartbeat runs are the control-plane record of what agents did, why
they woke up, and what operators should see next
> - Run lists, stranded issue comments, and live log polling all depend
on compact but accurate heartbeat summaries
> - The current branch had a focused backend slice that improves how run
result JSON is summarized, how stale process recovery comments are
written, and how live log polling resolves the active run
> - This pull request isolates that heartbeat/runtime reliability work
from the unrelated UI and dev-tooling changes
> - The benefit is more reliable issue context and cheaper run lookups
without dragging unrelated board UI changes into the same review

## What Changed

- Include the latest run failure in stranded issue comments during
orphaned process recovery.
- Bound heartbeat `result_json` payloads for list responses while
preserving the raw stored payloads.
- Narrow heartbeat log endpoint lookups so issue polling resolves the
relevant active run with less unnecessary scanning.
- Add focused tests for heartbeat list summaries, live run polling,
orphaned process recovery, and the run context/result summary helpers.

## Verification

- `pnpm vitest run
server/src/__tests__/heartbeat-context-summary.test.ts
server/src/__tests__/heartbeat-list.test.ts
server/src/__tests__/agent-live-run-routes.test.ts
server/src/__tests__/heartbeat-process-recovery.test.ts`

## Risks

- The main risk is accidentally hiding a field that some client still
expects from summarized `result_json`, or over-constraining the live log
lookup path for edge-case run routing.
- Recovery comments now surface the latest failure more aggressively, so
wording changes may affect downstream expectations if anyone parses
those comments too strictly.

## Model Used

- OpenAI Codex, GPT-5-based coding agent in the Codex CLI environment.
Exact backend model deployment ID was not exposed in-session.
Tool-assisted editing and shell execution were used.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
This commit is contained in:
Dotta
2026-04-15 09:48:39 -05:00
committed by GitHub
parent c1a02497b0
commit 3fa5d25de1
7 changed files with 498 additions and 24 deletions
@@ -9,6 +9,8 @@ const mockAgentService = vi.hoisted(() => ({
const mockHeartbeatService = vi.hoisted(() => ({
getRunIssueSummary: vi.fn(),
getActiveRunIssueSummaryForAgent: vi.fn(),
getRunLogAccess: vi.fn(),
readLog: vi.fn(),
}));
const mockIssueService = vi.hoisted(() => ({
@@ -100,6 +102,19 @@ describe("agent live run routes", () => {
issueId: "issue-1",
});
mockHeartbeatService.getActiveRunIssueSummaryForAgent.mockResolvedValue(null);
mockHeartbeatService.getRunLogAccess.mockResolvedValue({
id: "run-1",
companyId: "company-1",
logStore: "local_file",
logRef: "logs/run-1.ndjson",
});
mockHeartbeatService.readLog.mockResolvedValue({
runId: "run-1",
store: "local_file",
logRef: "logs/run-1.ndjson",
content: "chunk",
nextOffset: 5,
});
});
it("returns a compact active run payload for issue polling", async () => {
@@ -163,4 +178,27 @@ describe("agent live run routes", () => {
adapterType: "codex_local",
});
});
it("uses narrow run log metadata lookups for log polling", async () => {
const res = await request(await createApp()).get("/api/heartbeat-runs/run-1/log?offset=12&limitBytes=64");
expect(res.status, JSON.stringify(res.body)).toBe(200);
expect(mockHeartbeatService.getRunLogAccess).toHaveBeenCalledWith("run-1");
expect(mockHeartbeatService.readLog).toHaveBeenCalledWith({
id: "run-1",
companyId: "company-1",
logStore: "local_file",
logRef: "logs/run-1.ndjson",
}, {
offset: 12,
limitBytes: 64,
});
expect(res.body).toEqual({
runId: "run-1",
store: "local_file",
logRef: "logs/run-1.ndjson",
content: "chunk",
nextOffset: 5,
});
});
});
@@ -0,0 +1,83 @@
import { describe, expect, it } from "vitest";
import {
summarizeHeartbeatRunContextSnapshot,
summarizeHeartbeatRunListResultJson,
} from "../services/heartbeat.js";
describe("summarizeHeartbeatRunContextSnapshot", () => {
it("keeps only the small retry/linking fields needed by the client", () => {
const summarized = summarizeHeartbeatRunContextSnapshot({
issueId: "issue-1",
taskId: "task-1",
taskKey: "PAP-1",
commentId: "comment-1",
wakeCommentId: "comment-2",
wakeReason: "retry_failed_run",
wakeSource: "on_demand",
wakeTriggerDetail: "manual",
paperclipWake: {
comments: [
{
body: "x".repeat(50_000),
},
],
},
executionStage: {
summary: "large nested object that should not be sent back in run lists",
},
});
expect(summarized).toEqual({
issueId: "issue-1",
taskId: "task-1",
taskKey: "PAP-1",
commentId: "comment-1",
wakeCommentId: "comment-2",
wakeReason: "retry_failed_run",
wakeSource: "on_demand",
wakeTriggerDetail: "manual",
});
});
it("returns null when no allowed fields are present", () => {
expect(
summarizeHeartbeatRunContextSnapshot({
paperclipWake: { comments: [{ body: "hello" }] },
}),
).toBeNull();
});
});
describe("summarizeHeartbeatRunListResultJson", () => {
it("keeps only summary fields and parses numeric cost aliases", () => {
expect(
summarizeHeartbeatRunListResultJson({
summary: "Completed the task",
result: "Updated three files",
message: "",
error: null,
totalCostUsd: "1.25",
costUsd: "0.75",
costUsdCamel: "0.5",
}),
).toEqual({
summary: "Completed the task",
result: "Updated three files",
total_cost_usd: 1.25,
cost_usd: 0.75,
costUsd: 0.5,
});
});
it("returns null when projected fields are empty", () => {
expect(
summarizeHeartbeatRunListResultJson({
summary: "",
result: null,
message: undefined,
error: " ",
totalCostUsd: "abc",
}),
).toBeNull();
});
});
+101
View File
@@ -88,4 +88,105 @@ describeEmbeddedPostgres("heartbeat list", () => {
}
}
});
it("returns small result json payloads unchanged from getRun", async () => {
const companyId = randomUUID();
const agentId = randomUUID();
const runId = randomUUID();
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`,
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values({
id: agentId,
companyId,
name: "CodexCoder",
role: "engineer",
status: "running",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {},
permissions: {},
});
await db.insert(heartbeatRuns).values({
id: runId,
companyId,
agentId,
invocationSource: "assignment",
status: "succeeded",
resultJson: {
summary: "done",
structured: { ok: true },
},
});
const run = await heartbeatService(db).getRun(runId);
expect(run?.resultJson).toEqual({
summary: "done",
structured: { ok: true },
});
});
it("bounds oversized legacy result json payloads on getRun", async () => {
const companyId = randomUUID();
const agentId = randomUUID();
const runId = randomUUID();
const oversizedStdout = Array.from({ length: 8_000 }, (_, index) =>
`${index.toString(16).padStart(4, "0")}-${randomUUID()}`,
).join("|");
const oversizedNestedPayload = Array.from({ length: 6_000 }, (_, index) =>
`${index.toString(16).padStart(4, "0")}:${randomUUID()}`,
).join("|");
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`,
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values({
id: agentId,
companyId,
name: "CodexCoder",
role: "engineer",
status: "running",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {},
permissions: {},
});
await db.insert(heartbeatRuns).values({
id: runId,
companyId,
agentId,
invocationSource: "assignment",
status: "succeeded",
resultJson: {
summary: "completed",
stdout: oversizedStdout,
nestedHuge: { payload: oversizedNestedPayload },
},
});
const run = await heartbeatService(db).getRun(runId);
const result = run?.resultJson as Record<string, unknown> | null;
expect(result).toMatchObject({
summary: "completed",
truncated: true,
truncationReason: "oversized_result_json",
stdoutTruncated: true,
});
expect(typeof result?.stdout).toBe("string");
expect((result?.stdout as string).length).toBeLessThan(oversizedStdout.length);
expect(result).not.toHaveProperty("nestedHuge");
});
});
@@ -598,6 +598,7 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
expect(comments).toHaveLength(1);
expect(comments[0]?.body).toContain("retried dispatch");
expect(comments[0]?.body).toContain("Latest retry failure: `process_lost` - run failed before issue advanced.");
});
it("re-enqueues continuation for stranded in-progress work with no active run", async () => {
@@ -646,6 +647,7 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
expect(comments).toHaveLength(1);
expect(comments[0]?.body).toContain("retried continuation");
expect(comments[0]?.body).toContain("Latest retry failure: `process_lost` - run failed before issue advanced.");
});
it("does not reconcile user-assigned work through the agent stranded-work recovery path", async () => {
+2 -2
View File
@@ -2479,7 +2479,7 @@ export function agentRoutes(db: Db) {
router.get("/heartbeat-runs/:runId/log", async (req, res) => {
const runId = req.params.runId as string;
const run = await heartbeat.getRun(runId);
const run = await heartbeat.getRunLogAccess(runId);
if (!run) {
res.status(404).json({ error: "Heartbeat run not found" });
return;
@@ -2488,7 +2488,7 @@ export function agentRoutes(db: Db) {
const offset = Number(req.query.offset ?? 0);
const limitBytes = Number(req.query.limitBytes ?? 256000);
const result = await heartbeat.readLog(runId, {
const result = await heartbeat.readLog(run, {
offset: Number.isFinite(offset) ? offset : 0,
limitBytes: Number.isFinite(limitBytes) ? limitBytes : 256000,
});
+5 -1
View File
@@ -1,4 +1,8 @@
function truncateSummaryText(value: unknown, maxLength = 500) {
export const HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS = 500;
export const HEARTBEAT_RUN_RESULT_OUTPUT_MAX_CHARS = 4_096;
export const HEARTBEAT_RUN_SAFE_RESULT_JSON_MAX_BYTES = 64 * 1024;
function truncateSummaryText(value: unknown, maxLength = HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS) {
if (typeof value !== "string") return null;
return value.length > maxLength ? value.slice(0, maxLength) : value;
}
+267 -21
View File
@@ -2,7 +2,7 @@ import fs from "node:fs/promises";
import path from "node:path";
import { execFile as execFileCallback } from "node:child_process";
import { promisify } from "node:util";
import { and, asc, desc, eq, gt, inArray, isNull, or, sql } from "drizzle-orm";
import { and, asc, desc, eq, getTableColumns, gt, inArray, isNull, or, sql } from "drizzle-orm";
import type { Db } from "@paperclipai/db";
import type { BillingType, ExecutionWorkspace, ExecutionWorkspaceConfig } from "@paperclipai/shared";
import {
@@ -35,8 +35,10 @@ import { secretService } from "./secrets.js";
import { resolveDefaultAgentWorkspaceDir, resolveManagedProjectWorkspaceDir } from "../home-paths.js";
import {
buildHeartbeatRunIssueComment,
HEARTBEAT_RUN_RESULT_OUTPUT_MAX_CHARS,
HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS,
HEARTBEAT_RUN_SAFE_RESULT_JSON_MAX_BYTES,
mergeHeartbeatRunResultJson,
summarizeHeartbeatRunResultJson,
} from "./heartbeat-run-summary.js";
import { logActivity, type LogActivityInput } from "./activity-log.js";
import {
@@ -378,7 +380,6 @@ const heartbeatRunListColumns = {
exitCode: heartbeatRuns.exitCode,
signal: heartbeatRuns.signal,
usageJson: heartbeatRuns.usageJson,
resultJson: heartbeatRuns.resultJson,
sessionIdBefore: heartbeatRuns.sessionIdBefore,
sessionIdAfter: heartbeatRuns.sessionIdAfter,
logStore: heartbeatRuns.logStore,
@@ -395,11 +396,90 @@ const heartbeatRunListColumns = {
processStartedAt: heartbeatRuns.processStartedAt,
retryOfRunId: heartbeatRuns.retryOfRunId,
processLossRetryCount: heartbeatRuns.processLossRetryCount,
contextSnapshot: heartbeatRuns.contextSnapshot,
createdAt: heartbeatRuns.createdAt,
updatedAt: heartbeatRuns.updatedAt,
} as const;
const heartbeatRunListContextColumns = {
contextIssueId: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'issueId'`.as("contextIssueId"),
contextTaskId: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'taskId'`.as("contextTaskId"),
contextTaskKey: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'taskKey'`.as("contextTaskKey"),
contextCommentId: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'commentId'`.as("contextCommentId"),
contextWakeCommentId: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'wakeCommentId'`.as("contextWakeCommentId"),
contextWakeReason: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'wakeReason'`.as("contextWakeReason"),
contextWakeSource: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'wakeSource'`.as("contextWakeSource"),
contextWakeTriggerDetail: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'wakeTriggerDetail'`.as("contextWakeTriggerDetail"),
} as const;
const heartbeatRunListResultColumns = {
resultSummary: sql<string | null>`left(${heartbeatRuns.resultJson} ->> 'summary', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS})`.as("resultSummary"),
resultResult: sql<string | null>`left(${heartbeatRuns.resultJson} ->> 'result', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS})`.as("resultResult"),
resultMessage: sql<string | null>`left(${heartbeatRuns.resultJson} ->> 'message', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS})`.as("resultMessage"),
resultError: sql<string | null>`left(${heartbeatRuns.resultJson} ->> 'error', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS})`.as("resultError"),
resultTotalCostUsd: sql<string | null>`${heartbeatRuns.resultJson} ->> 'total_cost_usd'`.as("resultTotalCostUsd"),
resultCostUsd: sql<string | null>`${heartbeatRuns.resultJson} ->> 'cost_usd'`.as("resultCostUsd"),
resultCostUsdCamel: sql<string | null>`${heartbeatRuns.resultJson} ->> 'costUsd'`.as("resultCostUsdCamel"),
} as const;
const heartbeatRunSafeResultJsonColumn = sql<Record<string, unknown> | null>`
case
when ${heartbeatRuns.resultJson} is null then null
when pg_column_size(${heartbeatRuns.resultJson}) <= ${HEARTBEAT_RUN_SAFE_RESULT_JSON_MAX_BYTES}
then ${heartbeatRuns.resultJson}
else jsonb_strip_nulls(
jsonb_build_object(
'summary', left(${heartbeatRuns.resultJson} ->> 'summary', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS}),
'result', left(${heartbeatRuns.resultJson} ->> 'result', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS}),
'message', left(${heartbeatRuns.resultJson} ->> 'message', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS}),
'error', left(${heartbeatRuns.resultJson} ->> 'error', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS}),
'stdout', left(${heartbeatRuns.resultJson} ->> 'stdout', ${HEARTBEAT_RUN_RESULT_OUTPUT_MAX_CHARS}),
'stderr', left(${heartbeatRuns.resultJson} ->> 'stderr', ${HEARTBEAT_RUN_RESULT_OUTPUT_MAX_CHARS}),
'stdoutTruncated', case
when length(${heartbeatRuns.resultJson} ->> 'stdout') > ${HEARTBEAT_RUN_RESULT_OUTPUT_MAX_CHARS}
then to_jsonb(true)
else null
end,
'stderrTruncated', case
when length(${heartbeatRuns.resultJson} ->> 'stderr') > ${HEARTBEAT_RUN_RESULT_OUTPUT_MAX_CHARS}
then to_jsonb(true)
else null
end,
'costUsd', coalesce(
${heartbeatRuns.resultJson} -> 'costUsd',
${heartbeatRuns.resultJson} -> 'cost_usd',
${heartbeatRuns.resultJson} -> 'total_cost_usd'
),
'cost_usd', coalesce(
${heartbeatRuns.resultJson} -> 'cost_usd',
${heartbeatRuns.resultJson} -> 'costUsd',
${heartbeatRuns.resultJson} -> 'total_cost_usd'
),
'total_cost_usd', coalesce(
${heartbeatRuns.resultJson} -> 'total_cost_usd',
${heartbeatRuns.resultJson} -> 'cost_usd',
${heartbeatRuns.resultJson} -> 'costUsd'
),
'truncated', true,
'truncationReason', 'oversized_result_json',
'originalSizeBytes', pg_column_size(${heartbeatRuns.resultJson})
)
)
end
`.as("resultJson");
const heartbeatRunSafeColumns = {
...getTableColumns(heartbeatRuns),
processGroupId: heartbeatRunProcessGroupIdColumn,
resultJson: heartbeatRunSafeResultJsonColumn,
} as const;
const heartbeatRunLogAccessColumns = {
id: heartbeatRuns.id,
companyId: heartbeatRuns.companyId,
logStore: heartbeatRuns.logStore,
logRef: heartbeatRuns.logRef,
} as const;
const heartbeatRunIssueSummaryColumns = {
id: heartbeatRuns.id,
status: heartbeatRuns.status,
@@ -519,6 +599,87 @@ function readNonEmptyString(value: unknown): string | null {
return typeof value === "string" && value.trim().length > 0 ? value : null;
}
export function summarizeHeartbeatRunContextSnapshot(
contextSnapshot: Record<string, unknown> | null | undefined,
): Record<string, unknown> | null {
const summary: Record<string, unknown> = {};
const allowedKeys = [
"issueId",
"taskId",
"taskKey",
"commentId",
"wakeCommentId",
"wakeReason",
"wakeSource",
"wakeTriggerDetail",
] as const;
for (const key of allowedKeys) {
const value = readNonEmptyString(contextSnapshot?.[key]);
if (value) summary[key] = value;
}
return Object.keys(summary).length > 0 ? summary : null;
}
export function summarizeHeartbeatRunListResultJson(input: {
summary?: string | null;
result?: string | null;
message?: string | null;
error?: string | null;
totalCostUsd?: string | null;
costUsd?: string | null;
costUsdCamel?: string | null;
}): Record<string, unknown> | null {
const summary: Record<string, unknown> = {};
for (const [key, value] of [
["summary", input.summary],
["result", input.result],
["message", input.message],
["error", input.error],
] as const) {
const normalized = readNonEmptyString(value);
if (normalized) summary[key] = normalized;
}
for (const [key, value] of [
["total_cost_usd", input.totalCostUsd],
["cost_usd", input.costUsd],
["costUsd", input.costUsdCamel],
] as const) {
const normalized = readNonEmptyString(value);
if (!normalized) continue;
const parsed = Number(normalized);
if (Number.isFinite(parsed)) summary[key] = parsed;
}
return Object.keys(summary).length > 0 ? summary : null;
}
function summarizeRunFailureForIssueComment(
run: Pick<typeof heartbeatRuns.$inferSelect, "error" | "errorCode"> | null | undefined,
) {
if (!run) return null;
const errorCode = readNonEmptyString(run.errorCode)?.trim() ?? null;
const rawError = readNonEmptyString(run.error)?.trim() ?? null;
const apiMessageMatch = rawError?.match(/"message"\s*:\s*"([^"]+)"/);
const firstLine = rawError
?.split(/\r?\n/)
.map((line) => line.trim())
.find(Boolean) ?? null;
const summarySource = apiMessageMatch?.[1] ?? firstLine;
const summary =
summarySource && summarySource.length > 240
? `${summarySource.slice(0, 237)}...`
: summarySource;
if (errorCode && summary) return ` Latest retry failure: \`${errorCode}\` - ${summary}.`;
if (errorCode) return ` Latest retry failure: \`${errorCode}\`.`;
if (summary) return ` Latest retry failure: ${summary}.`;
return null;
}
function normalizeLedgerBillingType(value: unknown): BillingType {
const raw = readNonEmptyString(value);
switch (raw) {
@@ -1351,9 +1512,17 @@ export function heartbeatService(db: Db) {
.then((rows) => rows[0] ?? null);
}
async function getRun(runId: string) {
async function getRun(runId: string, opts?: { unsafeFullResultJson?: boolean }) {
return db
.select()
.select(opts?.unsafeFullResultJson ? getTableColumns(heartbeatRuns) : heartbeatRunSafeColumns)
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, runId))
.then((rows) => rows[0] ?? null);
}
async function getRunLogAccess(runId: string) {
return db
.select(heartbeatRunLogAccessColumns)
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, runId))
.then((rows) => rows[0] ?? null);
@@ -1421,7 +1590,10 @@ export function heartbeatService(db: Db) {
conditions.push(sql`${heartbeatRuns.id} <> ${opts.excludeRunId}`);
}
return db
.select()
.select({
id: heartbeatRuns.id,
usageJson: heartbeatRuns.usageJson,
})
.from(heartbeatRuns)
.where(and(...conditions))
.orderBy(desc(heartbeatRuns.createdAt))
@@ -1497,8 +1669,8 @@ export function heartbeatService(db: Db) {
id: heartbeatRuns.id,
createdAt: heartbeatRuns.createdAt,
usageJson: heartbeatRuns.usageJson,
resultJson: heartbeatRuns.resultJson,
error: heartbeatRuns.error,
...heartbeatRunListResultColumns,
})
.from(heartbeatRuns)
.where(and(eq(heartbeatRuns.agentId, agent.id), eq(heartbeatRuns.sessionIdAfter, sessionId)))
@@ -1552,7 +1724,15 @@ export function heartbeatService(db: Db) {
};
}
const latestSummary = summarizeHeartbeatRunResultJson(latestRun.resultJson);
const latestSummary = summarizeHeartbeatRunListResultJson({
summary: latestRun?.resultSummary,
result: latestRun?.resultResult,
message: latestRun?.resultMessage,
error: latestRun?.resultError,
totalCostUsd: latestRun?.resultTotalCostUsd,
costUsd: latestRun?.resultCostUsd,
costUsdCamel: latestRun?.resultCostUsdCamel,
});
const latestTextSummary =
readNonEmptyString(latestSummary?.summary) ??
readNonEmptyString(latestSummary?.result) ??
@@ -2681,7 +2861,13 @@ export function heartbeatService(db: Db) {
async function getLatestIssueRun(companyId: string, issueId: string) {
return db
.select()
.select({
id: heartbeatRuns.id,
status: heartbeatRuns.status,
error: heartbeatRuns.error,
errorCode: heartbeatRuns.errorCode,
contextSnapshot: heartbeatRuns.contextSnapshot,
})
.from(heartbeatRuns)
.where(
and(
@@ -2771,7 +2957,10 @@ export function heartbeatService(db: Db) {
async function escalateStrandedAssignedIssue(input: {
issue: typeof issues.$inferSelect;
previousStatus: "todo" | "in_progress";
latestRun: typeof heartbeatRuns.$inferSelect | null;
latestRun: Pick<
typeof heartbeatRuns.$inferSelect,
"id" | "status" | "error" | "errorCode" | "contextSnapshot"
> | null;
comment: string;
}) {
const updated = await issuesSvc.update(input.issue.id, {
@@ -2857,13 +3046,15 @@ export function heartbeatService(db: Db) {
}
if (latestRetryReason === "assignment_recovery") {
const failureSummary = summarizeRunFailureForIssueComment(latestRun);
const updated = await escalateStrandedAssignedIssue({
issue,
previousStatus: "todo",
latestRun,
comment:
"Paperclip automatically retried dispatch for this assigned `todo` issue after a lost wake/run, " +
"but it still has no live execution path. Moving it to `blocked` so it is visible for intervention.",
`but it still has no live execution path.${failureSummary ?? ""} ` +
"Moving it to `blocked` so it is visible for intervention.",
});
if (updated) {
result.escalated += 1;
@@ -2892,14 +3083,15 @@ export function heartbeatService(db: Db) {
}
if (latestRetryReason === "issue_continuation_needed") {
const failureSummary = summarizeRunFailureForIssueComment(latestRun);
const updated = await escalateStrandedAssignedIssue({
issue,
previousStatus: "in_progress",
latestRun,
comment:
"Paperclip automatically retried continuation for this assigned `in_progress` issue after its live " +
"execution disappeared, but it still has no live execution path. Moving it to `blocked` so it is " +
"visible for intervention.",
`execution disappeared, but it still has no live execution path.${failureSummary ?? ""} ` +
"Moving it to `blocked` so it is visible for intervention.",
});
if (updated) {
result.escalated += 1;
@@ -4940,7 +5132,11 @@ export function heartbeatService(db: Db) {
return {
list: async (companyId: string, agentId?: string, limit?: number) => {
const query = db
.select(heartbeatRunListColumns)
.select({
...heartbeatRunListColumns,
...heartbeatRunListContextColumns,
...heartbeatRunListResultColumns,
})
.from(heartbeatRuns)
.where(
agentId
@@ -4950,14 +5146,55 @@ export function heartbeatService(db: Db) {
.orderBy(desc(heartbeatRuns.createdAt));
const rows = limit ? await query.limit(limit) : await query;
return rows.map((row) => ({
...row,
resultJson: summarizeHeartbeatRunResultJson(row.resultJson),
}));
return rows.map((row) => {
const {
contextIssueId,
contextTaskId,
contextTaskKey,
contextCommentId,
contextWakeCommentId,
contextWakeReason,
contextWakeSource,
contextWakeTriggerDetail,
resultSummary,
resultResult,
resultMessage,
resultError,
resultTotalCostUsd,
resultCostUsd,
resultCostUsdCamel,
...rest
} = row;
return {
...rest,
contextSnapshot: summarizeHeartbeatRunContextSnapshot({
issueId: contextIssueId,
taskId: contextTaskId,
taskKey: contextTaskKey,
commentId: contextCommentId,
wakeCommentId: contextWakeCommentId,
wakeReason: contextWakeReason,
wakeSource: contextWakeSource,
wakeTriggerDetail: contextWakeTriggerDetail,
}),
resultJson: summarizeHeartbeatRunListResultJson({
summary: resultSummary,
result: resultResult,
message: resultMessage,
error: resultError,
totalCostUsd: resultTotalCostUsd,
costUsd: resultCostUsd,
costUsdCamel: resultCostUsdCamel,
}),
};
});
},
getRun,
getRunLogAccess,
getRuntimeState: async (agentId: string) => {
const state = await getRuntimeState(agentId);
const agent = await getAgent(agentId);
@@ -5031,8 +5268,17 @@ export function heartbeatService(db: Db) {
.orderBy(asc(heartbeatRunEvents.seq))
.limit(Math.max(1, Math.min(limit, 1000))),
readLog: async (runId: string, opts?: { offset?: number; limitBytes?: number }) => {
const run = await getRun(runId);
readLog: async (
runOrLookup: string | {
id: string;
companyId: string;
logStore: string | null;
logRef: string | null;
},
opts?: { offset?: number; limitBytes?: number },
) => {
const run = typeof runOrLookup === "string" ? await getRunLogAccess(runOrLookup) : runOrLookup;
const runId = typeof runOrLookup === "string" ? runOrLookup : runOrLookup.id;
if (!run) throw notFound("Heartbeat run not found");
if (!run.logStore || !run.logRef) throw notFound("Run log not found");