forked from farhoodlabs/paperclip
[codex] harden heartbeat run summaries and recovery context (#3742)
## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Heartbeat runs are the control-plane record of what agents did, why they woke up, and what operators should see next > - Run lists, stranded issue comments, and live log polling all depend on compact but accurate heartbeat summaries > - The current branch had a focused backend slice that improves how run result JSON is summarized, how stale process recovery comments are written, and how live log polling resolves the active run > - This pull request isolates that heartbeat/runtime reliability work from the unrelated UI and dev-tooling changes > - The benefit is more reliable issue context and cheaper run lookups without dragging unrelated board UI changes into the same review ## What Changed - Include the latest run failure in stranded issue comments during orphaned process recovery. - Bound heartbeat `result_json` payloads for list responses while preserving the raw stored payloads. - Narrow heartbeat log endpoint lookups so issue polling resolves the relevant active run with less unnecessary scanning. - Add focused tests for heartbeat list summaries, live run polling, orphaned process recovery, and the run context/result summary helpers. ## Verification - `pnpm vitest run server/src/__tests__/heartbeat-context-summary.test.ts server/src/__tests__/heartbeat-list.test.ts server/src/__tests__/agent-live-run-routes.test.ts server/src/__tests__/heartbeat-process-recovery.test.ts` ## Risks - The main risk is accidentally hiding a field that some client still expects from summarized `result_json`, or over-constraining the live log lookup path for edge-case run routing. - Recovery comments now surface the latest failure more aggressively, so wording changes may affect downstream expectations if anyone parses those comments too strictly. ## Model Used - OpenAI Codex, GPT-5-based coding agent in the Codex CLI environment. Exact backend model deployment ID was not exposed in-session. Tool-assisted editing and shell execution were used. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
This commit is contained in:
@@ -9,6 +9,8 @@ const mockAgentService = vi.hoisted(() => ({
|
||||
const mockHeartbeatService = vi.hoisted(() => ({
|
||||
getRunIssueSummary: vi.fn(),
|
||||
getActiveRunIssueSummaryForAgent: vi.fn(),
|
||||
getRunLogAccess: vi.fn(),
|
||||
readLog: vi.fn(),
|
||||
}));
|
||||
|
||||
const mockIssueService = vi.hoisted(() => ({
|
||||
@@ -100,6 +102,19 @@ describe("agent live run routes", () => {
|
||||
issueId: "issue-1",
|
||||
});
|
||||
mockHeartbeatService.getActiveRunIssueSummaryForAgent.mockResolvedValue(null);
|
||||
mockHeartbeatService.getRunLogAccess.mockResolvedValue({
|
||||
id: "run-1",
|
||||
companyId: "company-1",
|
||||
logStore: "local_file",
|
||||
logRef: "logs/run-1.ndjson",
|
||||
});
|
||||
mockHeartbeatService.readLog.mockResolvedValue({
|
||||
runId: "run-1",
|
||||
store: "local_file",
|
||||
logRef: "logs/run-1.ndjson",
|
||||
content: "chunk",
|
||||
nextOffset: 5,
|
||||
});
|
||||
});
|
||||
|
||||
it("returns a compact active run payload for issue polling", async () => {
|
||||
@@ -163,4 +178,27 @@ describe("agent live run routes", () => {
|
||||
adapterType: "codex_local",
|
||||
});
|
||||
});
|
||||
|
||||
it("uses narrow run log metadata lookups for log polling", async () => {
|
||||
const res = await request(await createApp()).get("/api/heartbeat-runs/run-1/log?offset=12&limitBytes=64");
|
||||
|
||||
expect(res.status, JSON.stringify(res.body)).toBe(200);
|
||||
expect(mockHeartbeatService.getRunLogAccess).toHaveBeenCalledWith("run-1");
|
||||
expect(mockHeartbeatService.readLog).toHaveBeenCalledWith({
|
||||
id: "run-1",
|
||||
companyId: "company-1",
|
||||
logStore: "local_file",
|
||||
logRef: "logs/run-1.ndjson",
|
||||
}, {
|
||||
offset: 12,
|
||||
limitBytes: 64,
|
||||
});
|
||||
expect(res.body).toEqual({
|
||||
runId: "run-1",
|
||||
store: "local_file",
|
||||
logRef: "logs/run-1.ndjson",
|
||||
content: "chunk",
|
||||
nextOffset: 5,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
summarizeHeartbeatRunContextSnapshot,
|
||||
summarizeHeartbeatRunListResultJson,
|
||||
} from "../services/heartbeat.js";
|
||||
|
||||
describe("summarizeHeartbeatRunContextSnapshot", () => {
|
||||
it("keeps only the small retry/linking fields needed by the client", () => {
|
||||
const summarized = summarizeHeartbeatRunContextSnapshot({
|
||||
issueId: "issue-1",
|
||||
taskId: "task-1",
|
||||
taskKey: "PAP-1",
|
||||
commentId: "comment-1",
|
||||
wakeCommentId: "comment-2",
|
||||
wakeReason: "retry_failed_run",
|
||||
wakeSource: "on_demand",
|
||||
wakeTriggerDetail: "manual",
|
||||
paperclipWake: {
|
||||
comments: [
|
||||
{
|
||||
body: "x".repeat(50_000),
|
||||
},
|
||||
],
|
||||
},
|
||||
executionStage: {
|
||||
summary: "large nested object that should not be sent back in run lists",
|
||||
},
|
||||
});
|
||||
|
||||
expect(summarized).toEqual({
|
||||
issueId: "issue-1",
|
||||
taskId: "task-1",
|
||||
taskKey: "PAP-1",
|
||||
commentId: "comment-1",
|
||||
wakeCommentId: "comment-2",
|
||||
wakeReason: "retry_failed_run",
|
||||
wakeSource: "on_demand",
|
||||
wakeTriggerDetail: "manual",
|
||||
});
|
||||
});
|
||||
|
||||
it("returns null when no allowed fields are present", () => {
|
||||
expect(
|
||||
summarizeHeartbeatRunContextSnapshot({
|
||||
paperclipWake: { comments: [{ body: "hello" }] },
|
||||
}),
|
||||
).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("summarizeHeartbeatRunListResultJson", () => {
|
||||
it("keeps only summary fields and parses numeric cost aliases", () => {
|
||||
expect(
|
||||
summarizeHeartbeatRunListResultJson({
|
||||
summary: "Completed the task",
|
||||
result: "Updated three files",
|
||||
message: "",
|
||||
error: null,
|
||||
totalCostUsd: "1.25",
|
||||
costUsd: "0.75",
|
||||
costUsdCamel: "0.5",
|
||||
}),
|
||||
).toEqual({
|
||||
summary: "Completed the task",
|
||||
result: "Updated three files",
|
||||
total_cost_usd: 1.25,
|
||||
cost_usd: 0.75,
|
||||
costUsd: 0.5,
|
||||
});
|
||||
});
|
||||
|
||||
it("returns null when projected fields are empty", () => {
|
||||
expect(
|
||||
summarizeHeartbeatRunListResultJson({
|
||||
summary: "",
|
||||
result: null,
|
||||
message: undefined,
|
||||
error: " ",
|
||||
totalCostUsd: "abc",
|
||||
}),
|
||||
).toBeNull();
|
||||
});
|
||||
});
|
||||
@@ -88,4 +88,105 @@ describeEmbeddedPostgres("heartbeat list", () => {
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("returns small result json payloads unchanged from getRun", async () => {
|
||||
const companyId = randomUUID();
|
||||
const agentId = randomUUID();
|
||||
const runId = randomUUID();
|
||||
|
||||
await db.insert(companies).values({
|
||||
id: companyId,
|
||||
name: "Paperclip",
|
||||
issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`,
|
||||
requireBoardApprovalForNewAgents: false,
|
||||
});
|
||||
|
||||
await db.insert(agents).values({
|
||||
id: agentId,
|
||||
companyId,
|
||||
name: "CodexCoder",
|
||||
role: "engineer",
|
||||
status: "running",
|
||||
adapterType: "codex_local",
|
||||
adapterConfig: {},
|
||||
runtimeConfig: {},
|
||||
permissions: {},
|
||||
});
|
||||
|
||||
await db.insert(heartbeatRuns).values({
|
||||
id: runId,
|
||||
companyId,
|
||||
agentId,
|
||||
invocationSource: "assignment",
|
||||
status: "succeeded",
|
||||
resultJson: {
|
||||
summary: "done",
|
||||
structured: { ok: true },
|
||||
},
|
||||
});
|
||||
|
||||
const run = await heartbeatService(db).getRun(runId);
|
||||
|
||||
expect(run?.resultJson).toEqual({
|
||||
summary: "done",
|
||||
structured: { ok: true },
|
||||
});
|
||||
});
|
||||
|
||||
it("bounds oversized legacy result json payloads on getRun", async () => {
|
||||
const companyId = randomUUID();
|
||||
const agentId = randomUUID();
|
||||
const runId = randomUUID();
|
||||
const oversizedStdout = Array.from({ length: 8_000 }, (_, index) =>
|
||||
`${index.toString(16).padStart(4, "0")}-${randomUUID()}`,
|
||||
).join("|");
|
||||
const oversizedNestedPayload = Array.from({ length: 6_000 }, (_, index) =>
|
||||
`${index.toString(16).padStart(4, "0")}:${randomUUID()}`,
|
||||
).join("|");
|
||||
|
||||
await db.insert(companies).values({
|
||||
id: companyId,
|
||||
name: "Paperclip",
|
||||
issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`,
|
||||
requireBoardApprovalForNewAgents: false,
|
||||
});
|
||||
|
||||
await db.insert(agents).values({
|
||||
id: agentId,
|
||||
companyId,
|
||||
name: "CodexCoder",
|
||||
role: "engineer",
|
||||
status: "running",
|
||||
adapterType: "codex_local",
|
||||
adapterConfig: {},
|
||||
runtimeConfig: {},
|
||||
permissions: {},
|
||||
});
|
||||
|
||||
await db.insert(heartbeatRuns).values({
|
||||
id: runId,
|
||||
companyId,
|
||||
agentId,
|
||||
invocationSource: "assignment",
|
||||
status: "succeeded",
|
||||
resultJson: {
|
||||
summary: "completed",
|
||||
stdout: oversizedStdout,
|
||||
nestedHuge: { payload: oversizedNestedPayload },
|
||||
},
|
||||
});
|
||||
|
||||
const run = await heartbeatService(db).getRun(runId);
|
||||
const result = run?.resultJson as Record<string, unknown> | null;
|
||||
|
||||
expect(result).toMatchObject({
|
||||
summary: "completed",
|
||||
truncated: true,
|
||||
truncationReason: "oversized_result_json",
|
||||
stdoutTruncated: true,
|
||||
});
|
||||
expect(typeof result?.stdout).toBe("string");
|
||||
expect((result?.stdout as string).length).toBeLessThan(oversizedStdout.length);
|
||||
expect(result).not.toHaveProperty("nestedHuge");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -598,6 +598,7 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
|
||||
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
|
||||
expect(comments).toHaveLength(1);
|
||||
expect(comments[0]?.body).toContain("retried dispatch");
|
||||
expect(comments[0]?.body).toContain("Latest retry failure: `process_lost` - run failed before issue advanced.");
|
||||
});
|
||||
|
||||
it("re-enqueues continuation for stranded in-progress work with no active run", async () => {
|
||||
@@ -646,6 +647,7 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => {
|
||||
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, issueId));
|
||||
expect(comments).toHaveLength(1);
|
||||
expect(comments[0]?.body).toContain("retried continuation");
|
||||
expect(comments[0]?.body).toContain("Latest retry failure: `process_lost` - run failed before issue advanced.");
|
||||
});
|
||||
|
||||
it("does not reconcile user-assigned work through the agent stranded-work recovery path", async () => {
|
||||
|
||||
@@ -2479,7 +2479,7 @@ export function agentRoutes(db: Db) {
|
||||
|
||||
router.get("/heartbeat-runs/:runId/log", async (req, res) => {
|
||||
const runId = req.params.runId as string;
|
||||
const run = await heartbeat.getRun(runId);
|
||||
const run = await heartbeat.getRunLogAccess(runId);
|
||||
if (!run) {
|
||||
res.status(404).json({ error: "Heartbeat run not found" });
|
||||
return;
|
||||
@@ -2488,7 +2488,7 @@ export function agentRoutes(db: Db) {
|
||||
|
||||
const offset = Number(req.query.offset ?? 0);
|
||||
const limitBytes = Number(req.query.limitBytes ?? 256000);
|
||||
const result = await heartbeat.readLog(runId, {
|
||||
const result = await heartbeat.readLog(run, {
|
||||
offset: Number.isFinite(offset) ? offset : 0,
|
||||
limitBytes: Number.isFinite(limitBytes) ? limitBytes : 256000,
|
||||
});
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
function truncateSummaryText(value: unknown, maxLength = 500) {
|
||||
export const HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS = 500;
|
||||
export const HEARTBEAT_RUN_RESULT_OUTPUT_MAX_CHARS = 4_096;
|
||||
export const HEARTBEAT_RUN_SAFE_RESULT_JSON_MAX_BYTES = 64 * 1024;
|
||||
|
||||
function truncateSummaryText(value: unknown, maxLength = HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS) {
|
||||
if (typeof value !== "string") return null;
|
||||
return value.length > maxLength ? value.slice(0, maxLength) : value;
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { execFile as execFileCallback } from "node:child_process";
|
||||
import { promisify } from "node:util";
|
||||
import { and, asc, desc, eq, gt, inArray, isNull, or, sql } from "drizzle-orm";
|
||||
import { and, asc, desc, eq, getTableColumns, gt, inArray, isNull, or, sql } from "drizzle-orm";
|
||||
import type { Db } from "@paperclipai/db";
|
||||
import type { BillingType, ExecutionWorkspace, ExecutionWorkspaceConfig } from "@paperclipai/shared";
|
||||
import {
|
||||
@@ -35,8 +35,10 @@ import { secretService } from "./secrets.js";
|
||||
import { resolveDefaultAgentWorkspaceDir, resolveManagedProjectWorkspaceDir } from "../home-paths.js";
|
||||
import {
|
||||
buildHeartbeatRunIssueComment,
|
||||
HEARTBEAT_RUN_RESULT_OUTPUT_MAX_CHARS,
|
||||
HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS,
|
||||
HEARTBEAT_RUN_SAFE_RESULT_JSON_MAX_BYTES,
|
||||
mergeHeartbeatRunResultJson,
|
||||
summarizeHeartbeatRunResultJson,
|
||||
} from "./heartbeat-run-summary.js";
|
||||
import { logActivity, type LogActivityInput } from "./activity-log.js";
|
||||
import {
|
||||
@@ -378,7 +380,6 @@ const heartbeatRunListColumns = {
|
||||
exitCode: heartbeatRuns.exitCode,
|
||||
signal: heartbeatRuns.signal,
|
||||
usageJson: heartbeatRuns.usageJson,
|
||||
resultJson: heartbeatRuns.resultJson,
|
||||
sessionIdBefore: heartbeatRuns.sessionIdBefore,
|
||||
sessionIdAfter: heartbeatRuns.sessionIdAfter,
|
||||
logStore: heartbeatRuns.logStore,
|
||||
@@ -395,11 +396,90 @@ const heartbeatRunListColumns = {
|
||||
processStartedAt: heartbeatRuns.processStartedAt,
|
||||
retryOfRunId: heartbeatRuns.retryOfRunId,
|
||||
processLossRetryCount: heartbeatRuns.processLossRetryCount,
|
||||
contextSnapshot: heartbeatRuns.contextSnapshot,
|
||||
createdAt: heartbeatRuns.createdAt,
|
||||
updatedAt: heartbeatRuns.updatedAt,
|
||||
} as const;
|
||||
|
||||
const heartbeatRunListContextColumns = {
|
||||
contextIssueId: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'issueId'`.as("contextIssueId"),
|
||||
contextTaskId: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'taskId'`.as("contextTaskId"),
|
||||
contextTaskKey: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'taskKey'`.as("contextTaskKey"),
|
||||
contextCommentId: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'commentId'`.as("contextCommentId"),
|
||||
contextWakeCommentId: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'wakeCommentId'`.as("contextWakeCommentId"),
|
||||
contextWakeReason: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'wakeReason'`.as("contextWakeReason"),
|
||||
contextWakeSource: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'wakeSource'`.as("contextWakeSource"),
|
||||
contextWakeTriggerDetail: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'wakeTriggerDetail'`.as("contextWakeTriggerDetail"),
|
||||
} as const;
|
||||
|
||||
const heartbeatRunListResultColumns = {
|
||||
resultSummary: sql<string | null>`left(${heartbeatRuns.resultJson} ->> 'summary', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS})`.as("resultSummary"),
|
||||
resultResult: sql<string | null>`left(${heartbeatRuns.resultJson} ->> 'result', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS})`.as("resultResult"),
|
||||
resultMessage: sql<string | null>`left(${heartbeatRuns.resultJson} ->> 'message', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS})`.as("resultMessage"),
|
||||
resultError: sql<string | null>`left(${heartbeatRuns.resultJson} ->> 'error', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS})`.as("resultError"),
|
||||
resultTotalCostUsd: sql<string | null>`${heartbeatRuns.resultJson} ->> 'total_cost_usd'`.as("resultTotalCostUsd"),
|
||||
resultCostUsd: sql<string | null>`${heartbeatRuns.resultJson} ->> 'cost_usd'`.as("resultCostUsd"),
|
||||
resultCostUsdCamel: sql<string | null>`${heartbeatRuns.resultJson} ->> 'costUsd'`.as("resultCostUsdCamel"),
|
||||
} as const;
|
||||
|
||||
const heartbeatRunSafeResultJsonColumn = sql<Record<string, unknown> | null>`
|
||||
case
|
||||
when ${heartbeatRuns.resultJson} is null then null
|
||||
when pg_column_size(${heartbeatRuns.resultJson}) <= ${HEARTBEAT_RUN_SAFE_RESULT_JSON_MAX_BYTES}
|
||||
then ${heartbeatRuns.resultJson}
|
||||
else jsonb_strip_nulls(
|
||||
jsonb_build_object(
|
||||
'summary', left(${heartbeatRuns.resultJson} ->> 'summary', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS}),
|
||||
'result', left(${heartbeatRuns.resultJson} ->> 'result', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS}),
|
||||
'message', left(${heartbeatRuns.resultJson} ->> 'message', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS}),
|
||||
'error', left(${heartbeatRuns.resultJson} ->> 'error', ${HEARTBEAT_RUN_RESULT_SUMMARY_MAX_CHARS}),
|
||||
'stdout', left(${heartbeatRuns.resultJson} ->> 'stdout', ${HEARTBEAT_RUN_RESULT_OUTPUT_MAX_CHARS}),
|
||||
'stderr', left(${heartbeatRuns.resultJson} ->> 'stderr', ${HEARTBEAT_RUN_RESULT_OUTPUT_MAX_CHARS}),
|
||||
'stdoutTruncated', case
|
||||
when length(${heartbeatRuns.resultJson} ->> 'stdout') > ${HEARTBEAT_RUN_RESULT_OUTPUT_MAX_CHARS}
|
||||
then to_jsonb(true)
|
||||
else null
|
||||
end,
|
||||
'stderrTruncated', case
|
||||
when length(${heartbeatRuns.resultJson} ->> 'stderr') > ${HEARTBEAT_RUN_RESULT_OUTPUT_MAX_CHARS}
|
||||
then to_jsonb(true)
|
||||
else null
|
||||
end,
|
||||
'costUsd', coalesce(
|
||||
${heartbeatRuns.resultJson} -> 'costUsd',
|
||||
${heartbeatRuns.resultJson} -> 'cost_usd',
|
||||
${heartbeatRuns.resultJson} -> 'total_cost_usd'
|
||||
),
|
||||
'cost_usd', coalesce(
|
||||
${heartbeatRuns.resultJson} -> 'cost_usd',
|
||||
${heartbeatRuns.resultJson} -> 'costUsd',
|
||||
${heartbeatRuns.resultJson} -> 'total_cost_usd'
|
||||
),
|
||||
'total_cost_usd', coalesce(
|
||||
${heartbeatRuns.resultJson} -> 'total_cost_usd',
|
||||
${heartbeatRuns.resultJson} -> 'cost_usd',
|
||||
${heartbeatRuns.resultJson} -> 'costUsd'
|
||||
),
|
||||
'truncated', true,
|
||||
'truncationReason', 'oversized_result_json',
|
||||
'originalSizeBytes', pg_column_size(${heartbeatRuns.resultJson})
|
||||
)
|
||||
)
|
||||
end
|
||||
`.as("resultJson");
|
||||
|
||||
const heartbeatRunSafeColumns = {
|
||||
...getTableColumns(heartbeatRuns),
|
||||
processGroupId: heartbeatRunProcessGroupIdColumn,
|
||||
resultJson: heartbeatRunSafeResultJsonColumn,
|
||||
} as const;
|
||||
|
||||
const heartbeatRunLogAccessColumns = {
|
||||
id: heartbeatRuns.id,
|
||||
companyId: heartbeatRuns.companyId,
|
||||
logStore: heartbeatRuns.logStore,
|
||||
logRef: heartbeatRuns.logRef,
|
||||
} as const;
|
||||
|
||||
const heartbeatRunIssueSummaryColumns = {
|
||||
id: heartbeatRuns.id,
|
||||
status: heartbeatRuns.status,
|
||||
@@ -519,6 +599,87 @@ function readNonEmptyString(value: unknown): string | null {
|
||||
return typeof value === "string" && value.trim().length > 0 ? value : null;
|
||||
}
|
||||
|
||||
export function summarizeHeartbeatRunContextSnapshot(
|
||||
contextSnapshot: Record<string, unknown> | null | undefined,
|
||||
): Record<string, unknown> | null {
|
||||
const summary: Record<string, unknown> = {};
|
||||
const allowedKeys = [
|
||||
"issueId",
|
||||
"taskId",
|
||||
"taskKey",
|
||||
"commentId",
|
||||
"wakeCommentId",
|
||||
"wakeReason",
|
||||
"wakeSource",
|
||||
"wakeTriggerDetail",
|
||||
] as const;
|
||||
|
||||
for (const key of allowedKeys) {
|
||||
const value = readNonEmptyString(contextSnapshot?.[key]);
|
||||
if (value) summary[key] = value;
|
||||
}
|
||||
|
||||
return Object.keys(summary).length > 0 ? summary : null;
|
||||
}
|
||||
|
||||
export function summarizeHeartbeatRunListResultJson(input: {
|
||||
summary?: string | null;
|
||||
result?: string | null;
|
||||
message?: string | null;
|
||||
error?: string | null;
|
||||
totalCostUsd?: string | null;
|
||||
costUsd?: string | null;
|
||||
costUsdCamel?: string | null;
|
||||
}): Record<string, unknown> | null {
|
||||
const summary: Record<string, unknown> = {};
|
||||
for (const [key, value] of [
|
||||
["summary", input.summary],
|
||||
["result", input.result],
|
||||
["message", input.message],
|
||||
["error", input.error],
|
||||
] as const) {
|
||||
const normalized = readNonEmptyString(value);
|
||||
if (normalized) summary[key] = normalized;
|
||||
}
|
||||
|
||||
for (const [key, value] of [
|
||||
["total_cost_usd", input.totalCostUsd],
|
||||
["cost_usd", input.costUsd],
|
||||
["costUsd", input.costUsdCamel],
|
||||
] as const) {
|
||||
const normalized = readNonEmptyString(value);
|
||||
if (!normalized) continue;
|
||||
const parsed = Number(normalized);
|
||||
if (Number.isFinite(parsed)) summary[key] = parsed;
|
||||
}
|
||||
|
||||
return Object.keys(summary).length > 0 ? summary : null;
|
||||
}
|
||||
|
||||
function summarizeRunFailureForIssueComment(
|
||||
run: Pick<typeof heartbeatRuns.$inferSelect, "error" | "errorCode"> | null | undefined,
|
||||
) {
|
||||
if (!run) return null;
|
||||
|
||||
const errorCode = readNonEmptyString(run.errorCode)?.trim() ?? null;
|
||||
const rawError = readNonEmptyString(run.error)?.trim() ?? null;
|
||||
const apiMessageMatch = rawError?.match(/"message"\s*:\s*"([^"]+)"/);
|
||||
const firstLine = rawError
|
||||
?.split(/\r?\n/)
|
||||
.map((line) => line.trim())
|
||||
.find(Boolean) ?? null;
|
||||
const summarySource = apiMessageMatch?.[1] ?? firstLine;
|
||||
const summary =
|
||||
summarySource && summarySource.length > 240
|
||||
? `${summarySource.slice(0, 237)}...`
|
||||
: summarySource;
|
||||
|
||||
if (errorCode && summary) return ` Latest retry failure: \`${errorCode}\` - ${summary}.`;
|
||||
if (errorCode) return ` Latest retry failure: \`${errorCode}\`.`;
|
||||
if (summary) return ` Latest retry failure: ${summary}.`;
|
||||
return null;
|
||||
}
|
||||
|
||||
function normalizeLedgerBillingType(value: unknown): BillingType {
|
||||
const raw = readNonEmptyString(value);
|
||||
switch (raw) {
|
||||
@@ -1351,9 +1512,17 @@ export function heartbeatService(db: Db) {
|
||||
.then((rows) => rows[0] ?? null);
|
||||
}
|
||||
|
||||
async function getRun(runId: string) {
|
||||
async function getRun(runId: string, opts?: { unsafeFullResultJson?: boolean }) {
|
||||
return db
|
||||
.select()
|
||||
.select(opts?.unsafeFullResultJson ? getTableColumns(heartbeatRuns) : heartbeatRunSafeColumns)
|
||||
.from(heartbeatRuns)
|
||||
.where(eq(heartbeatRuns.id, runId))
|
||||
.then((rows) => rows[0] ?? null);
|
||||
}
|
||||
|
||||
async function getRunLogAccess(runId: string) {
|
||||
return db
|
||||
.select(heartbeatRunLogAccessColumns)
|
||||
.from(heartbeatRuns)
|
||||
.where(eq(heartbeatRuns.id, runId))
|
||||
.then((rows) => rows[0] ?? null);
|
||||
@@ -1421,7 +1590,10 @@ export function heartbeatService(db: Db) {
|
||||
conditions.push(sql`${heartbeatRuns.id} <> ${opts.excludeRunId}`);
|
||||
}
|
||||
return db
|
||||
.select()
|
||||
.select({
|
||||
id: heartbeatRuns.id,
|
||||
usageJson: heartbeatRuns.usageJson,
|
||||
})
|
||||
.from(heartbeatRuns)
|
||||
.where(and(...conditions))
|
||||
.orderBy(desc(heartbeatRuns.createdAt))
|
||||
@@ -1497,8 +1669,8 @@ export function heartbeatService(db: Db) {
|
||||
id: heartbeatRuns.id,
|
||||
createdAt: heartbeatRuns.createdAt,
|
||||
usageJson: heartbeatRuns.usageJson,
|
||||
resultJson: heartbeatRuns.resultJson,
|
||||
error: heartbeatRuns.error,
|
||||
...heartbeatRunListResultColumns,
|
||||
})
|
||||
.from(heartbeatRuns)
|
||||
.where(and(eq(heartbeatRuns.agentId, agent.id), eq(heartbeatRuns.sessionIdAfter, sessionId)))
|
||||
@@ -1552,7 +1724,15 @@ export function heartbeatService(db: Db) {
|
||||
};
|
||||
}
|
||||
|
||||
const latestSummary = summarizeHeartbeatRunResultJson(latestRun.resultJson);
|
||||
const latestSummary = summarizeHeartbeatRunListResultJson({
|
||||
summary: latestRun?.resultSummary,
|
||||
result: latestRun?.resultResult,
|
||||
message: latestRun?.resultMessage,
|
||||
error: latestRun?.resultError,
|
||||
totalCostUsd: latestRun?.resultTotalCostUsd,
|
||||
costUsd: latestRun?.resultCostUsd,
|
||||
costUsdCamel: latestRun?.resultCostUsdCamel,
|
||||
});
|
||||
const latestTextSummary =
|
||||
readNonEmptyString(latestSummary?.summary) ??
|
||||
readNonEmptyString(latestSummary?.result) ??
|
||||
@@ -2681,7 +2861,13 @@ export function heartbeatService(db: Db) {
|
||||
|
||||
async function getLatestIssueRun(companyId: string, issueId: string) {
|
||||
return db
|
||||
.select()
|
||||
.select({
|
||||
id: heartbeatRuns.id,
|
||||
status: heartbeatRuns.status,
|
||||
error: heartbeatRuns.error,
|
||||
errorCode: heartbeatRuns.errorCode,
|
||||
contextSnapshot: heartbeatRuns.contextSnapshot,
|
||||
})
|
||||
.from(heartbeatRuns)
|
||||
.where(
|
||||
and(
|
||||
@@ -2771,7 +2957,10 @@ export function heartbeatService(db: Db) {
|
||||
async function escalateStrandedAssignedIssue(input: {
|
||||
issue: typeof issues.$inferSelect;
|
||||
previousStatus: "todo" | "in_progress";
|
||||
latestRun: typeof heartbeatRuns.$inferSelect | null;
|
||||
latestRun: Pick<
|
||||
typeof heartbeatRuns.$inferSelect,
|
||||
"id" | "status" | "error" | "errorCode" | "contextSnapshot"
|
||||
> | null;
|
||||
comment: string;
|
||||
}) {
|
||||
const updated = await issuesSvc.update(input.issue.id, {
|
||||
@@ -2857,13 +3046,15 @@ export function heartbeatService(db: Db) {
|
||||
}
|
||||
|
||||
if (latestRetryReason === "assignment_recovery") {
|
||||
const failureSummary = summarizeRunFailureForIssueComment(latestRun);
|
||||
const updated = await escalateStrandedAssignedIssue({
|
||||
issue,
|
||||
previousStatus: "todo",
|
||||
latestRun,
|
||||
comment:
|
||||
"Paperclip automatically retried dispatch for this assigned `todo` issue after a lost wake/run, " +
|
||||
"but it still has no live execution path. Moving it to `blocked` so it is visible for intervention.",
|
||||
`but it still has no live execution path.${failureSummary ?? ""} ` +
|
||||
"Moving it to `blocked` so it is visible for intervention.",
|
||||
});
|
||||
if (updated) {
|
||||
result.escalated += 1;
|
||||
@@ -2892,14 +3083,15 @@ export function heartbeatService(db: Db) {
|
||||
}
|
||||
|
||||
if (latestRetryReason === "issue_continuation_needed") {
|
||||
const failureSummary = summarizeRunFailureForIssueComment(latestRun);
|
||||
const updated = await escalateStrandedAssignedIssue({
|
||||
issue,
|
||||
previousStatus: "in_progress",
|
||||
latestRun,
|
||||
comment:
|
||||
"Paperclip automatically retried continuation for this assigned `in_progress` issue after its live " +
|
||||
"execution disappeared, but it still has no live execution path. Moving it to `blocked` so it is " +
|
||||
"visible for intervention.",
|
||||
`execution disappeared, but it still has no live execution path.${failureSummary ?? ""} ` +
|
||||
"Moving it to `blocked` so it is visible for intervention.",
|
||||
});
|
||||
if (updated) {
|
||||
result.escalated += 1;
|
||||
@@ -4940,7 +5132,11 @@ export function heartbeatService(db: Db) {
|
||||
return {
|
||||
list: async (companyId: string, agentId?: string, limit?: number) => {
|
||||
const query = db
|
||||
.select(heartbeatRunListColumns)
|
||||
.select({
|
||||
...heartbeatRunListColumns,
|
||||
...heartbeatRunListContextColumns,
|
||||
...heartbeatRunListResultColumns,
|
||||
})
|
||||
.from(heartbeatRuns)
|
||||
.where(
|
||||
agentId
|
||||
@@ -4950,14 +5146,55 @@ export function heartbeatService(db: Db) {
|
||||
.orderBy(desc(heartbeatRuns.createdAt));
|
||||
|
||||
const rows = limit ? await query.limit(limit) : await query;
|
||||
return rows.map((row) => ({
|
||||
...row,
|
||||
resultJson: summarizeHeartbeatRunResultJson(row.resultJson),
|
||||
}));
|
||||
return rows.map((row) => {
|
||||
const {
|
||||
contextIssueId,
|
||||
contextTaskId,
|
||||
contextTaskKey,
|
||||
contextCommentId,
|
||||
contextWakeCommentId,
|
||||
contextWakeReason,
|
||||
contextWakeSource,
|
||||
contextWakeTriggerDetail,
|
||||
resultSummary,
|
||||
resultResult,
|
||||
resultMessage,
|
||||
resultError,
|
||||
resultTotalCostUsd,
|
||||
resultCostUsd,
|
||||
resultCostUsdCamel,
|
||||
...rest
|
||||
} = row;
|
||||
|
||||
return {
|
||||
...rest,
|
||||
contextSnapshot: summarizeHeartbeatRunContextSnapshot({
|
||||
issueId: contextIssueId,
|
||||
taskId: contextTaskId,
|
||||
taskKey: contextTaskKey,
|
||||
commentId: contextCommentId,
|
||||
wakeCommentId: contextWakeCommentId,
|
||||
wakeReason: contextWakeReason,
|
||||
wakeSource: contextWakeSource,
|
||||
wakeTriggerDetail: contextWakeTriggerDetail,
|
||||
}),
|
||||
resultJson: summarizeHeartbeatRunListResultJson({
|
||||
summary: resultSummary,
|
||||
result: resultResult,
|
||||
message: resultMessage,
|
||||
error: resultError,
|
||||
totalCostUsd: resultTotalCostUsd,
|
||||
costUsd: resultCostUsd,
|
||||
costUsdCamel: resultCostUsdCamel,
|
||||
}),
|
||||
};
|
||||
});
|
||||
},
|
||||
|
||||
getRun,
|
||||
|
||||
getRunLogAccess,
|
||||
|
||||
getRuntimeState: async (agentId: string) => {
|
||||
const state = await getRuntimeState(agentId);
|
||||
const agent = await getAgent(agentId);
|
||||
@@ -5031,8 +5268,17 @@ export function heartbeatService(db: Db) {
|
||||
.orderBy(asc(heartbeatRunEvents.seq))
|
||||
.limit(Math.max(1, Math.min(limit, 1000))),
|
||||
|
||||
readLog: async (runId: string, opts?: { offset?: number; limitBytes?: number }) => {
|
||||
const run = await getRun(runId);
|
||||
readLog: async (
|
||||
runOrLookup: string | {
|
||||
id: string;
|
||||
companyId: string;
|
||||
logStore: string | null;
|
||||
logRef: string | null;
|
||||
},
|
||||
opts?: { offset?: number; limitBytes?: number },
|
||||
) => {
|
||||
const run = typeof runOrLookup === "string" ? await getRunLogAccess(runOrLookup) : runOrLookup;
|
||||
const runId = typeof runOrLookup === "string" ? runOrLookup : runOrLookup.id;
|
||||
if (!run) throw notFound("Heartbeat run not found");
|
||||
if (!run.logStore || !run.logRef) throw notFound("Run log not found");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user