[codex] Add runtime lifecycle recovery and live issue visibility (#4419)

This commit is contained in:
Dotta
2026-04-24 15:50:32 -05:00
committed by GitHub
parent 9a8d219949
commit 5a0c1979cf
121 changed files with 9625 additions and 2044 deletions
+172 -5
View File
@@ -1,9 +1,14 @@
import { useMemo } from "react";
import type { Issue, Agent } from "@paperclipai/shared";
import { useQuery } from "@tanstack/react-query";
import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query";
import { Link } from "@/lib/router";
import { activityApi, type RunForIssue, type RunLivenessState } from "../api/activity";
import { heartbeatsApi, type ActiveRunForIssue, type LiveRunForIssue } from "../api/heartbeats";
import {
heartbeatsApi,
type ActiveRunForIssue,
type LiveRunForIssue,
type WatchdogDecisionInput,
} from "../api/heartbeats";
import { cn, relativeTime } from "../lib/utils";
import { queryKeys } from "../lib/queryKeys";
import { keepPreviousDataForSameQueryTail } from "../lib/query-placeholder-data";
@@ -24,11 +29,14 @@ type IssueRunLedgerContentProps = {
issueStatus: Issue["status"];
childIssues: Issue[];
agentMap: ReadonlyMap<string, Pick<Agent, "name">>;
pendingWatchdogDecision?: WatchdogDecisionInput["decision"] | null;
onWatchdogDecision?: (input: WatchdogDecisionInput) => void;
};
type LedgerRun = RunForIssue & {
isLive?: boolean;
agentName?: string;
outputSilence?: ActiveRunForIssue["outputSilence"];
};
type LivenessCopy = {
@@ -96,6 +104,28 @@ const MISSING_LIVENESS_COPY: LivenessCopy = {
const TERMINAL_CHILD_STATUSES = new Set<Issue["status"]>(["done", "cancelled"]);
const ACTIVE_RUN_STATUSES = new Set(["queued", "running"]);
type RunOutputSilenceLevel = NonNullable<ActiveRunForIssue["outputSilence"]>["level"];
type RunOutputSilenceCopy = {
label: string;
tone: string;
};
const RUN_OUTPUT_SILENCE_COPY: Partial<Record<RunOutputSilenceLevel, RunOutputSilenceCopy>> = {
suspicious: {
label: "Silence watch",
tone: "border-amber-500/30 bg-amber-500/10 text-amber-700 dark:text-amber-300",
},
critical: {
label: "Stale run",
tone: "border-red-500/30 bg-red-500/10 text-red-700 dark:text-red-300",
},
snoozed: {
label: "Silence snoozed",
tone: "border-cyan-500/30 bg-cyan-500/10 text-cyan-700 dark:text-cyan-300",
},
};
function asRecord(value: unknown): Record<string, unknown> | null {
if (typeof value !== "object" || value === null || Array.isArray(value)) return null;
return value as Record<string, unknown>;
@@ -143,6 +173,7 @@ function liveRunToLedgerRun(run: LiveRunForIssue | ActiveRunForIssue): LedgerRun
usageJson: null,
resultJson: null,
isLive: run.status === "queued" || run.status === "running",
outputSilence: run.outputSilence,
};
}
@@ -155,10 +186,25 @@ function mergeRuns(
for (const run of runs) byId.set(run.runId, run);
for (const run of liveRuns ?? []) {
const existing = byId.get(run.id);
byId.set(run.id, existing ? { ...existing, isLive: true, agentName: run.agentName } : liveRunToLedgerRun(run));
byId.set(
run.id,
existing
? { ...existing, isLive: true, agentName: run.agentName, outputSilence: run.outputSilence }
: liveRunToLedgerRun(run),
);
}
if (activeRun && !byId.has(activeRun.id)) {
byId.set(activeRun.id, liveRunToLedgerRun(activeRun));
if (activeRun) {
const existing = byId.get(activeRun.id);
if (existing) {
byId.set(activeRun.id, {
...existing,
isLive: isActiveRun(existing) || isActiveRun(activeRun),
agentName: activeRun.agentName,
outputSilence: activeRun.outputSilence,
});
} else {
byId.set(activeRun.id, liveRunToLedgerRun(activeRun));
}
}
return [...byId.values()].sort((a, b) => {
@@ -252,6 +298,17 @@ function compactAgentName(run: LedgerRun, agentMap: ReadonlyMap<string, Pick<Age
return run.agentName ?? agentMap.get(run.agentId)?.name ?? run.agentId.slice(0, 8);
}
function formatSilenceAge(ms: number | null | undefined) {
if (!ms || ms <= 0) return null;
const totalMinutes = Math.floor(ms / 60_000);
if (totalMinutes < 1) return "under 1 minute";
if (totalMinutes < 60) return `${totalMinutes} minute${totalMinutes === 1 ? "" : "s"}`;
const hours = Math.floor(totalMinutes / 60);
const minutes = totalMinutes % 60;
if (minutes === 0) return `${hours} hour${hours === 1 ? "" : "s"}`;
return `${hours}h ${minutes}m`;
}
export function IssueRunLedger({
issueId,
issueStatus,
@@ -259,6 +316,7 @@ export function IssueRunLedger({
agentMap,
hasLiveRuns,
}: IssueRunLedgerProps) {
const queryClient = useQueryClient();
const { data: runs } = useQuery({
queryKey: queryKeys.issues.runs(issueId),
queryFn: () => activityApi.runsForIssue(issueId),
@@ -279,6 +337,13 @@ export function IssueRunLedger({
refetchInterval: hasLiveRuns ? false : 3000,
placeholderData: keepPreviousDataForSameQueryTail<ActiveRunForIssue | null>(issueId),
});
const watchdogDecision = useMutation({
mutationFn: (input: WatchdogDecisionInput) => heartbeatsApi.recordWatchdogDecision(input),
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: queryKeys.issues.activeRun(issueId) });
queryClient.invalidateQueries({ queryKey: queryKeys.issues.liveRuns(issueId) });
},
});
return (
<IssueRunLedgerContent
@@ -288,6 +353,8 @@ export function IssueRunLedger({
issueStatus={issueStatus}
childIssues={childIssues}
agentMap={agentMap}
pendingWatchdogDecision={watchdogDecision.variables?.decision ?? null}
onWatchdogDecision={(input) => watchdogDecision.mutate(input)}
/>
);
}
@@ -299,9 +366,19 @@ export function IssueRunLedgerContent({
issueStatus,
childIssues,
agentMap,
pendingWatchdogDecision,
onWatchdogDecision,
}: IssueRunLedgerContentProps) {
const ledgerRuns = useMemo(() => mergeRuns(runs, liveRuns, activeRun), [activeRun, liveRuns, runs]);
const latestRun = ledgerRuns[0] ?? null;
const latestSilentRun = useMemo(
() =>
ledgerRuns.find((run) =>
isActiveRun(run)
&& (run.outputSilence?.level === "critical" || run.outputSilence?.level === "suspicious"),
) ?? null,
[ledgerRuns],
);
const children = childIssueSummary(childIssues);
return (
@@ -360,6 +437,86 @@ export function IssueRunLedgerContent({
</div>
) : null}
{latestSilentRun?.outputSilence ? (
<div
className={cn(
"rounded-md border px-3 py-2 text-xs",
latestSilentRun.outputSilence.level === "critical"
? "border-red-500/30 bg-red-500/10 text-red-900 dark:text-red-200"
: "border-amber-500/30 bg-amber-500/10 text-amber-900 dark:text-amber-200",
)}
>
<p className="font-medium">
{latestSilentRun.outputSilence.level === "critical"
? "Stale-run watchdog alert"
: "Output silence watchdog warning"}
</p>
<p className="mt-1">
Latest active run has been silent for{" "}
{formatSilenceAge(latestSilentRun.outputSilence.silenceAgeMs) ?? "an extended period"}.
{latestSilentRun.outputSilence.evaluationIssueIdentifier ? (
<>
{" "}
Review{" "}
<Link
to={`/issues/${latestSilentRun.outputSilence.evaluationIssueIdentifier}`}
className="font-medium underline underline-offset-2"
>
{latestSilentRun.outputSilence.evaluationIssueIdentifier}
</Link>
{" "}for recovery context.
</>
) : null}
</p>
{onWatchdogDecision ? (
<div className="mt-2 flex flex-wrap gap-1.5">
<button
type="button"
className="rounded-md border border-border bg-background/80 px-2 py-1 text-[11px] text-foreground hover:bg-background"
onClick={() =>
onWatchdogDecision({
runId: latestSilentRun.runId,
decision: "continue",
evaluationIssueId: latestSilentRun.outputSilence?.evaluationIssueId ?? null,
})}
disabled={pendingWatchdogDecision != null}
>
Continue monitoring
</button>
<button
type="button"
className="rounded-md border border-border bg-background/80 px-2 py-1 text-[11px] text-foreground hover:bg-background"
onClick={() =>
onWatchdogDecision({
runId: latestSilentRun.runId,
decision: "snooze",
evaluationIssueId: latestSilentRun.outputSilence?.evaluationIssueId ?? null,
snoozedUntil: new Date(Date.now() + 60 * 60 * 1000).toISOString(),
reason: "Snoozed from issue run ledger",
})}
disabled={pendingWatchdogDecision != null}
>
Snooze 1h
</button>
<button
type="button"
className="rounded-md border border-border bg-background/80 px-2 py-1 text-[11px] text-foreground hover:bg-background"
onClick={() =>
onWatchdogDecision({
runId: latestSilentRun.runId,
decision: "dismissed_false_positive",
evaluationIssueId: latestSilentRun.outputSilence?.evaluationIssueId ?? null,
reason: "Dismissed from issue run ledger",
})}
disabled={pendingWatchdogDecision != null}
>
Mark false positive
</button>
</div>
) : null}
</div>
) : null}
{ledgerRuns.length === 0 ? (
<div className="rounded-md border border-dashed border-border px-3 py-3 text-sm text-muted-foreground">
Historical runs without liveness metadata will appear here once linked to this issue.
@@ -418,6 +575,16 @@ export function IssueRunLedgerContent({
{retryState.badgeLabel}
</span>
) : null}
{run.outputSilence && RUN_OUTPUT_SILENCE_COPY[run.outputSilence.level] ? (
<span
className={cn(
"rounded-md border px-1.5 py-0.5 text-[11px] font-medium",
RUN_OUTPUT_SILENCE_COPY[run.outputSilence.level]?.tone,
)}
>
{RUN_OUTPUT_SILENCE_COPY[run.outputSilence.level]?.label}
</span>
) : null}
</div>
<div className="grid gap-2 text-xs text-muted-foreground sm:grid-cols-3">