Files
paperclip/server/src/services/recovery/service.ts
T
Dotta bfe6369ef5 Guard cheap recovery model usage (#6371)
## Thinking Path

> - Paperclip is the control plane that coordinates AI-agent work
through issues, heartbeats, comments, approvals, and auditable recovery
paths.
> - The affected subsystem is heartbeat/recovery orchestration,
especially the optional cheap model profile used for operational
recovery overhead.
> - Cheap recovery should repair status and liveness, but it must not
become the worker lane that writes deliverables, continues source work,
or propagates cheap execution hints into downstream retries.
> - The gap was that cheap-profile hints could follow recovery wake
contexts and assignment overrides farther than intended, making real
work eligible to run on the cheap model.
> - This pull request separates status-only cheap recovery from normal
source-work continuations, adds route guards for deliverable mutations
during cheap status-only runs, and documents the invariant.
> - The benefit is safer retry/recovery behavior: cheap runs can clean
up control-plane state, while any remaining source work resumes through
a normal/original model path.

## What Changed

- Added recovery model-profile work classes so status-only recovery
carries explicit guard context and normal-model continuations scrub
cheap hints.
- Updated heartbeat, productivity review, liveness continuation, and
recovery service wakeups to request cheap only for bounded status-only
recovery work.
- Blocked cheap status-only recovery runs from writing issue documents,
plans, attachments, work products, or assigning downstream work back to
`modelProfile: "cheap"`.
- Added/updated server tests for cheap profile propagation,
artifact/document guards, route authorization, retry scheduling, and
successful-run handoff behavior.
- Documented the recovery model-profile lane in
`doc/SPEC-implementation.md` and `doc/execution-semantics.md`.
- After rebasing onto current `public-gh/master`, stabilized the new
`InstanceSidebar` plugin-filter tests so the PR check lane stays green.

## Verification

- Local: `pnpm exec vitest run --config vitest.config.ts
src/services/recovery/model-profile-hint.test.ts
src/__tests__/issue-agent-mutation-ownership-routes.test.ts
src/__tests__/issue-document-restore-routes.test.ts` from `server/` - 3
files, 37 tests passed after final edits.
- Local: `pnpm exec vitest run --config vitest.config.ts
src/__tests__/heartbeat-process-recovery.test.ts` from `server/` - 44
tests passed after rerunning the cleanup-sensitive file alone.
- Local: `pnpm --filter @paperclipai/ui exec vitest run
src/components/InstanceSidebar.test.tsx` - 4 tests passed.
- Local: `pnpm --filter @paperclipai/server typecheck` - passed.
- Local: `pnpm --filter @paperclipai/ui typecheck` - passed.
- PR checks on latest head `6f8c3b1380f5bd872c6f49f6f7188ecf3bb6d263` -
all green, including `verify`, build, typecheck,
server/general/serialized tests, e2e, Snyk, and policy.
- Greptile: pass 3 returned Confidence Score 5/5 with zero unresolved
Greptile review threads.

## Risks

- Medium risk: recovery behavior is intentionally stricter, so any path
that incorrectly relies on cheap recovery to keep doing source work will
now need to hand back to a normal-model run.
- Low migration risk: no schema changes.
- No product UI changes; the UI file touched is a test-only
stabilization after rebasing onto current `master`.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex coding agent, GPT-5 model family (`gpt-5`), tool use and
local code execution enabled; context window not exposed in this
environment.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots (N/A: no product UI changes)
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-05-19 13:46:02 -05:00

3458 lines
126 KiB
TypeScript

import { and, asc, desc, eq, gt, gte, inArray, isNull, notInArray, sql } from "drizzle-orm";
import type { Db } from "@paperclipai/db";
import {
DEFAULT_ISSUE_GRAPH_LIVENESS_AUTO_RECOVERY_LOOKBACK_HOURS,
MAX_ISSUE_GRAPH_LIVENESS_AUTO_RECOVERY_LOOKBACK_HOURS,
MIN_ISSUE_GRAPH_LIVENESS_AUTO_RECOVERY_LOOKBACK_HOURS,
type IssueGraphLivenessAutoRecoveryPreview,
type IssueGraphLivenessAutoRecoveryPreviewItem,
} from "@paperclipai/shared";
import {
agents,
agentWakeupRequests,
approvals,
activityLog,
companies,
heartbeatRunEvents,
heartbeatRunWatchdogDecisions,
heartbeatRuns,
issueComments,
issueApprovals,
issueRecoveryActions,
issueRelations,
issueThreadInteractions,
issues,
} from "@paperclipai/db";
import { parseObject, asBoolean, asNumber } from "../../adapters/utils.js";
import { runningProcesses } from "../../adapters/index.js";
import { forbidden, notFound } from "../../errors.js";
import { logger } from "../../middleware/logger.js";
import { isPidAlive, isProcessGroupAlive, terminateLocalService } from "../local-service-supervisor.js";
import { redactCurrentUserText } from "../../log-redaction.js";
import { redactSensitiveText } from "../../redaction.js";
import { logActivity } from "../activity-log.js";
import { budgetService } from "../budgets.js";
import { instanceSettingsService } from "../instance-settings.js";
import { issueRecoveryActionService } from "../issue-recovery-actions.js";
import { issueTreeControlService } from "../issue-tree-control.js";
import { issueService } from "../issues.js";
import { getRunLogStore } from "../run-log-store.js";
import {
DEFAULT_MAX_SUCCESSFUL_RUN_HANDOFF_ATTEMPTS,
FINISH_SUCCESSFUL_RUN_HANDOFF_REASON,
SUCCESSFUL_RUN_MISSING_STATE_REASON,
buildSuccessfulRunHandoffExhaustedNotice,
noticeMetadataReferencesRecoveryAction,
type SuccessfulRunHandoffNotice,
} from "./successful-run-handoff.js";
import {
RECOVERY_ORIGIN_KINDS,
buildIssueGraphLivenessLeafKey,
isStrandedIssueRecoveryOriginKind,
parseIssueGraphLivenessIncidentKey,
} from "./origins.js";
import {
classifyIssueGraphLiveness,
type IssueLivenessFinding,
} from "./issue-graph-liveness.js";
import {
recoveryAssigneeAdapterOverrides,
withRecoveryModelProfileHint,
} from "./model-profile-hint.js";
import { isAutomaticRecoverySuppressedByPauseHold } from "./pause-hold-guard.js";
const EXECUTION_PATH_HEARTBEAT_RUN_STATUSES = ["queued", "running", "scheduled_retry"] as const;
const UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES = ["failed", "cancelled", "timed_out"] as const;
export const ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS = 60 * 60 * 1000;
export const ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS = 4 * 60 * 60 * 1000;
export const ACTIVE_RUN_OUTPUT_CONTINUE_REARM_MS = 30 * 60 * 1000;
const ACTIVE_RUN_OUTPUT_EVIDENCE_TAIL_BYTES = 8 * 1024;
const STRANDED_ISSUE_RECOVERY_ORIGIN_KIND = RECOVERY_ORIGIN_KINDS.strandedIssueRecovery;
const STALE_ACTIVE_RUN_EVALUATION_ORIGIN_KIND = RECOVERY_ORIGIN_KINDS.staleActiveRunEvaluation;
const DEFERRED_WAKE_CONTEXT_KEY = "_paperclipWakeContext";
const SESSIONED_LOCAL_ADAPTERS = new Set([
"claude_local",
"codex_local",
"cursor",
"gemini_local",
"hermes_local",
"opencode_local",
"pi_local",
]);
type RecoveryWakeupOptions = {
source?: "timer" | "assignment" | "on_demand" | "automation";
triggerDetail?: "manual" | "ping" | "callback" | "system";
reason?: string | null;
payload?: Record<string, unknown> | null;
idempotencyKey?: string | null;
requestedByActorType?: "user" | "agent" | "system";
requestedByActorId?: string | null;
contextSnapshot?: Record<string, unknown>;
};
type RecoveryWakeup = (
agentId: string,
opts?: RecoveryWakeupOptions,
) => Promise<typeof heartbeatRuns.$inferSelect | null>;
type LatestIssueRun = Pick<
typeof heartbeatRuns.$inferSelect,
"id" | "agentId" | "status" | "error" | "errorCode" | "contextSnapshot" | "livenessState"
> | null;
type SuccessfulLatestIssueRun = NonNullable<LatestIssueRun> & { status: "succeeded" };
type StrandedRecoveryCause = "stranded_assigned_issue" | typeof SUCCESSFUL_RUN_MISSING_STATE_REASON;
type SuccessfulRunHandoffRecoveryEvidence = {
sourceRunId: string | null;
correctiveRunId: string;
missingDisposition: string;
handoffAttempt: number;
maxHandoffAttempts: number;
};
type WatchdogDecisionActor =
| { type: "board"; userId?: string | null; runId?: string | null }
| { type: "agent"; agentId?: string | null; runId?: string | null }
| { type: "none" };
export type RunOutputSilenceSummary = {
lastOutputAt: Date | null;
lastOutputSeq: number;
lastOutputStream: "stdout" | "stderr" | null;
silenceStartedAt: Date | null;
silenceAgeMs: number | null;
level: "not_applicable" | "ok" | "suspicious" | "critical" | "snoozed";
suspicionThresholdMs: number;
criticalThresholdMs: number;
snoozedUntil: Date | null;
evaluationIssueId: string | null;
evaluationIssueIdentifier: string | null;
evaluationIssueAssigneeAgentId: string | null;
};
function readNonEmptyString(value: unknown): string | null {
return typeof value === "string" && value.trim().length > 0 ? value : null;
}
function summarizeRunFailureForIssueComment(run: LatestIssueRun) {
if (!run) return null;
if (readNonEmptyString(run.error) || readNonEmptyString(run.errorCode)) {
return " Latest retry failure details were withheld from the issue thread; inspect the linked run for evidence.";
}
return null;
}
function didAutomaticRecoveryFail(
latestRun: LatestIssueRun,
expectedRetryReason: "assignment_recovery" | "issue_continuation_needed",
) {
if (!latestRun) return false;
const latestContext = parseObject(latestRun.contextSnapshot);
const latestRetryReason = readNonEmptyString(latestContext.retryReason);
return latestRetryReason === expectedRetryReason &&
UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES.includes(
latestRun.status as (typeof UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES)[number],
);
}
function successfulRunHandoffRecoveryEvidence(latestRun: LatestIssueRun): SuccessfulRunHandoffRecoveryEvidence | null {
if (!latestRun) return null;
const context = parseObject(latestRun.contextSnapshot);
const wakeReason = readNonEmptyString(context.wakeReason);
const handoffReason = readNonEmptyString(context.handoffReason);
const isSuccessfulRunHandoff =
wakeReason === FINISH_SUCCESSFUL_RUN_HANDOFF_REASON ||
handoffReason === SUCCESSFUL_RUN_MISSING_STATE_REASON ||
asBoolean(context.handoffRequired, false) === true;
if (!isSuccessfulRunHandoff) return null;
const handoffAttempt = asNumber(context.handoffAttempt, 1);
const maxHandoffAttempts = asNumber(
context.maxHandoffAttempts,
DEFAULT_MAX_SUCCESSFUL_RUN_HANDOFF_ATTEMPTS,
);
return {
sourceRunId: readNonEmptyString(context.sourceRunId) ?? readNonEmptyString(context.resumeFromRunId),
correctiveRunId: latestRun.id,
missingDisposition: readNonEmptyString(context.missingDisposition) ?? "clear_next_step",
handoffAttempt,
maxHandoffAttempts,
};
}
function isExhaustedSuccessfulRunHandoff(latestRun: LatestIssueRun) {
const evidence = successfulRunHandoffRecoveryEvidence(latestRun);
if (!evidence) return null;
if (evidence.handoffAttempt < evidence.maxHandoffAttempts) return { ...evidence, exhausted: false };
return { ...evidence, exhausted: true };
}
function issueIdFromRunContext(contextSnapshot: unknown) {
const context = parseObject(contextSnapshot);
return readNonEmptyString(context.issueId) ?? readNonEmptyString(context.taskId);
}
function issueIdFromWakePayload(payload: unknown) {
const parsed = parseObject(payload);
const nestedContext = parseObject(parsed[DEFERRED_WAKE_CONTEXT_KEY]);
return readNonEmptyString(parsed.issueId) ??
readNonEmptyString(nestedContext.issueId) ??
readNonEmptyString(nestedContext.taskId);
}
function issueUiLink(issue: { identifier: string | null; id: string }, prefix: string) {
const label = issue.identifier ?? issue.id;
return `[${label}](/${prefix}/issues/${label})`;
}
function runUiLink(run: { id: string; agentId: string }, prefix: string) {
return `[${run.id}](/${prefix}/agents/${run.agentId}/runs/${run.id})`;
}
function agentUiLink(agent: { id: string; name: string | null } | null, prefix: string) {
if (!agent) return "unknown";
return `[${agent.name ?? agent.id}](/${prefix}/agents/${agent.id})`;
}
function formatDuration(ms: number | null) {
if (ms === null) return "unknown";
const minutes = Math.floor(ms / 60_000);
if (minutes < 60) return `${minutes}m`;
const hours = Math.floor(minutes / 60);
const remainingMinutes = minutes % 60;
return remainingMinutes > 0 ? `${hours}h ${remainingMinutes}m` : `${hours}h`;
}
function formatIssueLinksForComment(relations: Array<{ identifier?: string | null }>) {
const identifiers = [
...new Set(
relations
.map((relation) => relation.identifier)
.filter((identifier): identifier is string => Boolean(identifier)),
),
];
if (identifiers.length === 0) return "another open issue";
return identifiers
.slice(0, 5)
.map((identifier) => {
const prefix = identifier.split("-")[0] || "PAP";
return `[${identifier}](/${prefix}/issues/${identifier})`;
})
.join(", ");
}
function unwrapDatabaseConflictError(error: unknown) {
if (!error || typeof error !== "object") return null;
const candidate = error as {
code?: string;
constraint?: string;
constraint_name?: string;
message?: string;
cause?: unknown;
};
if (
typeof candidate.code === "string" ||
typeof candidate.constraint === "string" ||
typeof candidate.constraint_name === "string"
) {
return candidate;
}
const cause = candidate.cause;
if (!cause || typeof cause !== "object") return candidate;
return cause as {
code?: string;
constraint?: string;
constraint_name?: string;
message?: string;
};
}
function isAgentInvokable(agent: typeof agents.$inferSelect | null | undefined) {
return Boolean(agent && !["paused", "terminated", "pending_approval"].includes(agent.status));
}
function isStrandedIssueRecoveryIssue(issue: Pick<typeof issues.$inferSelect, "originKind">) {
return isStrandedIssueRecoveryOriginKind(issue.originKind);
}
function isUnsuccessfulTerminalIssueRun(latestRun: LatestIssueRun) {
return Boolean(
latestRun &&
UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES.includes(
latestRun.status as (typeof UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES)[number],
),
);
}
function isSuccessfulInProgressContinuationRun(latestRun: LatestIssueRun): latestRun is SuccessfulLatestIssueRun {
return latestRun?.status === "succeeded";
}
function isProductiveContinuationRun(latestRun: LatestIssueRun) {
return latestRun?.status === "succeeded" &&
(latestRun.livenessState === "advanced" ||
latestRun.livenessState === "completed" ||
latestRun.livenessState === "blocked" ||
latestRun.livenessState === "needs_followup");
}
function isRepeatedProductiveContinuationRecovery(latestRun: SuccessfulLatestIssueRun) {
const latestContext = parseObject(latestRun.contextSnapshot);
return readNonEmptyString(latestContext.retryReason) === "issue_continuation_needed" &&
readNonEmptyString(latestContext.source) === "issue.productive_terminal_continuation_recovery" &&
isProductiveContinuationRun(latestRun);
}
function parseLivenessIncidentKey(incidentKey: string | null | undefined) {
if (!incidentKey) return null;
return parseIssueGraphLivenessIncidentKey(incidentKey);
}
function livenessRecoveryLeafIssueId(finding: IssueLivenessFinding) {
return finding.recoveryIssueId;
}
function livenessRecoveryLeafFingerprint(finding: IssueLivenessFinding) {
return buildIssueGraphLivenessLeafKey({
companyId: finding.companyId,
state: finding.state,
leafIssueId: livenessRecoveryLeafIssueId(finding),
});
}
function livenessRecoveryLeafKey(companyId: string, state: string, leafIssueId: string) {
return buildIssueGraphLivenessLeafKey({ companyId, state, leafIssueId });
}
function isUniqueLivenessRecoveryConflict(error: unknown) {
if (!error || typeof error !== "object") return false;
const maybe = error as { code?: string; constraint?: string; message?: string };
return maybe.code === "23505" &&
(
maybe.constraint === "issues_active_liveness_recovery_incident_uq" ||
maybe.constraint === "issues_active_liveness_recovery_leaf_uq" ||
typeof maybe.message === "string" &&
(
maybe.message.includes("issues_active_liveness_recovery_incident_uq") ||
maybe.message.includes("issues_active_liveness_recovery_leaf_uq")
)
);
}
function formatDependencyPath(finding: IssueLivenessFinding) {
return finding.dependencyPath
.map((entry) => entry.identifier ?? entry.issueId)
.join(" -> ");
}
function buildLivenessEscalationDescription(finding: IssueLivenessFinding) {
const source = finding.dependencyPath[0];
const recovery = finding.dependencyPath.find((entry) => entry.issueId === finding.recoveryIssueId);
const selectedOwner = finding.recommendedOwnerAgentId ?? "none";
return [
"Paperclip detected a harness-level issue graph liveness incident.",
"",
"## Source",
"",
`- Source issue: ${source?.identifier ?? source?.issueId ?? finding.issueId}`,
`- Recovery target issue: ${recovery?.identifier ?? recovery?.issueId ?? finding.recoveryIssueId}`,
`- Incident key: \`${finding.incidentKey}\``,
`- Detected invariant: \`${finding.state}\``,
`- Dependency path: ${formatDependencyPath(finding)}`,
`- Reason: ${finding.reason}`,
"",
"## Ownership",
"",
`- Selected owner agent: \`${selectedOwner}\``,
`- Candidate owner agents: ${finding.recommendedOwnerCandidateAgentIds.length > 0 ? finding.recommendedOwnerCandidateAgentIds.map((id) => `\`${id}\``).join(", ") : "none"}`,
"",
"## Next Action",
"",
finding.recommendedAction,
"",
"Resolve the blocked chain, then mark this escalation issue done so the original issue can resume when all blockers are cleared.",
].join("\n");
}
function buildLivenessOriginalIssueComment(finding: IssueLivenessFinding, escalation: typeof issues.$inferSelect) {
return [
"Paperclip detected a harness-level liveness incident in this issue's dependency graph.",
"",
`- Escalation issue: ${escalation.identifier ?? escalation.id}`,
`- Incident key: \`${finding.incidentKey}\``,
`- Finding: \`${finding.state}\``,
`- Dependency path: ${formatDependencyPath(finding)}`,
`- Reason: ${finding.reason}`,
`- Manager action requested: ${finding.recommendedAction}`,
"",
"This issue now keeps its existing blockers and is also blocked by the escalation issue so dependency wakeups remain explicit.",
].join("\n");
}
export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup }) {
const issuesSvc = issueService(db);
const recoveryActionsSvc = issueRecoveryActionService(db);
const treeControlSvc = issueTreeControlService(db);
const budgets = budgetService(db);
const instanceSettings = instanceSettingsService(db);
const runLogStore = getRunLogStore();
const getCurrentUserRedactionOptions = async () => ({
enabled: (await instanceSettings.getGeneral()).censorUsernameInLogs,
});
async function getAgent(agentId: string) {
return db.select().from(agents).where(eq(agents.id, agentId)).then((rows) => rows[0] ?? null);
}
async function getLatestIssueRun(companyId: string, issueId: string): Promise<LatestIssueRun> {
return db
.select({
id: heartbeatRuns.id,
agentId: heartbeatRuns.agentId,
status: heartbeatRuns.status,
error: heartbeatRuns.error,
errorCode: heartbeatRuns.errorCode,
contextSnapshot: heartbeatRuns.contextSnapshot,
livenessState: heartbeatRuns.livenessState,
})
.from(heartbeatRuns)
.where(
and(
eq(heartbeatRuns.companyId, companyId),
sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${issueId}`,
),
)
.orderBy(desc(heartbeatRuns.createdAt), desc(heartbeatRuns.id))
.limit(1)
.then((rows) => rows[0] ?? null);
}
async function hasActiveExecutionPath(companyId: string, issueId: string) {
const [run, deferredWake] = await Promise.all([
db
.select({ id: heartbeatRuns.id })
.from(heartbeatRuns)
.where(
and(
eq(heartbeatRuns.companyId, companyId),
inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES]),
sql`${heartbeatRuns.contextSnapshot} ->> 'issueId' = ${issueId}`,
),
)
.limit(1)
.then((rows) => rows[0] ?? null),
db
.select({ id: agentWakeupRequests.id })
.from(agentWakeupRequests)
.where(
and(
eq(agentWakeupRequests.companyId, companyId),
eq(agentWakeupRequests.status, "deferred_issue_execution"),
sql`${agentWakeupRequests.payload} ->> 'issueId' = ${issueId}`,
),
)
.limit(1)
.then((rows) => rows[0] ?? null),
]);
return Boolean(run || deferredWake);
}
async function hasQueuedIssueWake(companyId: string, issueId: string) {
return db
.select({ id: agentWakeupRequests.id })
.from(agentWakeupRequests)
.where(
and(
eq(agentWakeupRequests.companyId, companyId),
eq(agentWakeupRequests.status, "queued"),
sql`${agentWakeupRequests.payload} ->> 'issueId' = ${issueId}`,
),
)
.limit(1)
.then((rows) => Boolean(rows[0]));
}
async function enqueueStrandedIssueRecovery(input: {
issueId: string;
agentId: string;
reason: "issue_assignment_recovery" | "issue_continuation_needed";
retryReason: "assignment_recovery" | "issue_continuation_needed";
source: string;
retryOfRunId?: string | null;
}) {
const queued = await deps.enqueueWakeup(input.agentId, {
source: "automation",
triggerDetail: "system",
reason: input.reason,
payload: withRecoveryModelProfileHint({
issueId: input.issueId,
...(input.retryOfRunId ? { retryOfRunId: input.retryOfRunId } : {}),
}, "normal_model"),
requestedByActorType: "system",
requestedByActorId: null,
contextSnapshot: withRecoveryModelProfileHint({
issueId: input.issueId,
taskId: input.issueId,
wakeReason: input.reason,
retryReason: input.retryReason,
source: input.source,
...(input.retryOfRunId ? { retryOfRunId: input.retryOfRunId } : {}),
}, "normal_model"),
});
if (queued && input.retryOfRunId) {
return db
.update(heartbeatRuns)
.set({
retryOfRunId: input.retryOfRunId,
updatedAt: new Date(),
})
.where(eq(heartbeatRuns.id, queued.id))
.returning()
.then((rows) => rows[0] ?? queued);
}
return queued;
}
async function enqueueInitialAssignedTodoDispatch(issue: typeof issues.$inferSelect, agentId: string) {
return deps.enqueueWakeup(agentId, {
source: "assignment",
triggerDetail: "system",
reason: "issue_assigned",
payload: withRecoveryModelProfileHint({
issueId: issue.id,
mutation: "assigned_todo_liveness_dispatch",
}, "normal_model"),
requestedByActorType: "system",
requestedByActorId: null,
contextSnapshot: withRecoveryModelProfileHint({
issueId: issue.id,
taskId: issue.id,
wakeReason: "issue_assigned",
source: "issue.assigned_todo_liveness_dispatch",
}, "normal_model"),
});
}
async function isInvocationBudgetBlocked(issue: typeof issues.$inferSelect, agentId: string) {
const budgetBlock = await budgets.getInvocationBlock(issue.companyId, agentId, {
issueId: issue.id,
projectId: issue.projectId,
});
return Boolean(budgetBlock);
}
async function reconcileUnassignedBlockingIssues() {
const candidates = await db
.select({
id: issues.id,
companyId: issues.companyId,
identifier: issues.identifier,
status: issues.status,
createdByAgentId: issues.createdByAgentId,
})
.from(issueRelations)
.innerJoin(issues, eq(issueRelations.issueId, issues.id))
.where(
and(
eq(issueRelations.type, "blocks"),
inArray(issues.status, ["todo", "blocked"]),
isNull(issues.assigneeAgentId),
isNull(issues.assigneeUserId),
sql`${issues.createdByAgentId} is not null`,
sql`exists (
select 1
from issues blocked_issue
where blocked_issue.id = ${issueRelations.relatedIssueId}
and blocked_issue.company_id = ${issues.companyId}
and blocked_issue.status not in ('done', 'cancelled')
)`,
),
);
let assigned = 0;
let skipped = 0;
const issueIds: string[] = [];
const seen = new Set<string>();
for (const candidate of candidates) {
if (seen.has(candidate.id)) continue;
seen.add(candidate.id);
const creatorAgentId = candidate.createdByAgentId;
if (!creatorAgentId) {
skipped += 1;
continue;
}
const creatorAgent = await getAgent(creatorAgentId);
if (!creatorAgent || creatorAgent.companyId !== candidate.companyId || !isAgentInvokable(creatorAgent)) {
skipped += 1;
continue;
}
const relations = await issuesSvc.getRelationSummaries(candidate.id);
const blockingLinks = formatIssueLinksForComment(relations.blocks);
const updated = await issuesSvc.update(candidate.id, {
assigneeAgentId: creatorAgent.id,
assigneeUserId: null,
});
if (!updated) {
skipped += 1;
continue;
}
await issuesSvc.addComment(
candidate.id,
[
"## Assigned Orphan Blocker",
"",
`Paperclip found this issue is blocking ${blockingLinks} but had no assignee, so no heartbeat could pick it up.`,
"",
"- Assigned it back to the agent that created the blocker.",
"- Next action: resolve this blocker or reassign it to the right owner.",
].join("\n"),
{},
);
await logActivity(db, {
companyId: candidate.companyId,
actorType: "system",
actorId: "system",
agentId: null,
runId: null,
action: "issue.updated",
entityType: "issue",
entityId: candidate.id,
details: {
identifier: candidate.identifier,
assigneeAgentId: creatorAgent.id,
source: "recovery.reconcile_unassigned_blocking_issue",
},
});
const queued = await deps.enqueueWakeup(creatorAgent.id, {
source: "automation",
triggerDetail: "system",
reason: "issue_assigned",
payload: withRecoveryModelProfileHint({
issueId: candidate.id,
mutation: "unassigned_blocker_recovery",
}, "normal_model"),
requestedByActorType: "system",
requestedByActorId: null,
contextSnapshot: withRecoveryModelProfileHint({
issueId: candidate.id,
taskId: candidate.id,
wakeReason: "issue_assigned",
source: "issue.unassigned_blocker_recovery",
}, "normal_model"),
});
if (queued) {
assigned += 1;
issueIds.push(candidate.id);
} else {
skipped += 1;
}
}
return { assigned, skipped, issueIds };
}
async function getCompanyIssuePrefix(companyId: string) {
return db
.select({ issuePrefix: companies.issuePrefix })
.from(companies)
.where(eq(companies.id, companyId))
.then((rows) => rows[0]?.issuePrefix ?? "PAP");
}
function staleActiveRunOriginFingerprint(companyId: string, runId: string) {
return `stale_active_run:${companyId}:${runId}`;
}
function isTerminalIssueStatus(status: string | null | undefined) {
return status === "done" || status === "cancelled";
}
function isRecoveryOriginIssue(issue: typeof issues.$inferSelect) {
return Object.values(RECOVERY_ORIGIN_KINDS).includes(
issue.originKind as typeof RECOVERY_ORIGIN_KINDS[keyof typeof RECOVERY_ORIGIN_KINDS],
);
}
function silenceStartedAtForRun(run: Pick<typeof heartbeatRuns.$inferSelect, "lastOutputAt" | "processStartedAt" | "startedAt" | "createdAt">) {
return run.lastOutputAt ?? run.processStartedAt ?? run.startedAt ?? run.createdAt ?? null;
}
function silenceAgeMsForRun(run: Pick<typeof heartbeatRuns.$inferSelect, "lastOutputAt" | "processStartedAt" | "startedAt" | "createdAt">, now = new Date()) {
const startedAt = silenceStartedAtForRun(run);
return startedAt ? Math.max(0, now.getTime() - startedAt.getTime()) : null;
}
async function latestActiveOutputQuietUntilDecision(companyId: string, runId: string, now = new Date()) {
const [row] = await db
.select()
.from(heartbeatRunWatchdogDecisions)
.where(
and(
eq(heartbeatRunWatchdogDecisions.companyId, companyId),
eq(heartbeatRunWatchdogDecisions.runId, runId),
inArray(heartbeatRunWatchdogDecisions.decision, ["snooze", "continue"]),
gt(heartbeatRunWatchdogDecisions.snoozedUntil, now),
),
)
.orderBy(desc(heartbeatRunWatchdogDecisions.createdAt))
.limit(1);
return row ?? null;
}
async function findOpenStaleRunEvaluation(companyId: string, runId: string) {
const [row] = await db
.select({
id: issues.id,
identifier: issues.identifier,
status: issues.status,
priority: issues.priority,
assigneeAgentId: issues.assigneeAgentId,
updatedAt: issues.updatedAt,
})
.from(issues)
.where(
and(
eq(issues.companyId, companyId),
eq(issues.originKind, STALE_ACTIVE_RUN_EVALUATION_ORIGIN_KIND),
eq(issues.originId, runId),
isNull(issues.hiddenAt),
notInArray(issues.status, ["done", "cancelled"]),
),
)
.limit(1);
return row ?? null;
}
async function buildRunOutputSilence(
run: Pick<
typeof heartbeatRuns.$inferSelect,
"id" | "companyId" | "status" | "lastOutputAt" | "lastOutputSeq" | "lastOutputStream" | "processStartedAt" | "startedAt" | "createdAt"
>,
now = new Date(),
): Promise<RunOutputSilenceSummary> {
const [quietUntilDecision, evaluation] = await Promise.all([
latestActiveOutputQuietUntilDecision(run.companyId, run.id, now),
findOpenStaleRunEvaluation(run.companyId, run.id),
]);
const silenceStartedAt = silenceStartedAtForRun(run);
const silenceAgeMs = run.status === "running" ? silenceAgeMsForRun(run, now) : null;
const level = run.status !== "running"
? "not_applicable"
: quietUntilDecision
? "snoozed"
: (silenceAgeMs ?? 0) >= ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS
? "critical"
: (silenceAgeMs ?? 0) >= ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS
? "suspicious"
: "ok";
return {
lastOutputAt: run.lastOutputAt ?? null,
lastOutputSeq: run.lastOutputSeq ?? 0,
lastOutputStream: (run.lastOutputStream === "stdout" || run.lastOutputStream === "stderr")
? run.lastOutputStream
: null,
silenceStartedAt,
silenceAgeMs,
level,
suspicionThresholdMs: ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS,
criticalThresholdMs: ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS,
snoozedUntil: quietUntilDecision?.snoozedUntil ?? null,
evaluationIssueId: evaluation?.id ?? null,
evaluationIssueIdentifier: evaluation?.identifier ?? null,
evaluationIssueAssigneeAgentId: evaluation?.assigneeAgentId ?? null,
};
}
function redactWatchdogEvidenceText(value: string, currentUserRedactionOptions: Awaited<ReturnType<typeof getCurrentUserRedactionOptions>>) {
return redactSensitiveText(redactCurrentUserText(value, currentUserRedactionOptions));
}
function truncateEvidenceText(value: string, maxChars = 4000) {
if (value.length <= maxChars) return value;
return `${value.slice(value.length - maxChars)}\n[truncated earlier evidence]`;
}
async function readRunLogTailForEvidence(run: typeof heartbeatRuns.$inferSelect) {
if (!run.logStore || !run.logRef || !run.logBytes) return "";
try {
const offset = Math.max(0, run.logBytes - ACTIVE_RUN_OUTPUT_EVIDENCE_TAIL_BYTES);
const result = await runLogStore.read(
{ store: run.logStore as "local_file", logRef: run.logRef },
{ offset, limitBytes: ACTIVE_RUN_OUTPUT_EVIDENCE_TAIL_BYTES },
);
return result.content;
} catch (err) {
logger.warn({ err, runId: run.id }, "failed to read stale-run watchdog evidence tail");
return "";
}
}
async function resolveStaleRunSourceIssue(run: typeof heartbeatRuns.$inferSelect) {
const issueId = issueIdFromRunContext(run.contextSnapshot);
if (!issueId) return null;
const [issue] = await db
.select()
.from(issues)
.where(and(eq(issues.companyId, run.companyId), eq(issues.id, issueId), isNull(issues.hiddenAt)))
.limit(1);
return issue ?? null;
}
async function latestSameRunSourceTerminalEvidence(input: {
run: typeof heartbeatRuns.$inferSelect;
sourceIssue: typeof issues.$inferSelect;
evidenceAfter: Date | null;
}) {
if (!isTerminalIssueStatus(input.sourceIssue.status)) return null;
const after = input.evidenceAfter ?? input.run.startedAt ?? input.run.createdAt ?? null;
const activityPredicates = [
eq(activityLog.companyId, input.run.companyId),
eq(activityLog.runId, input.run.id),
eq(activityLog.action, "issue.updated"),
eq(activityLog.entityType, "issue"),
eq(activityLog.entityId, input.sourceIssue.id),
sql`${activityLog.details} ->> 'status' = ${input.sourceIssue.status}`,
];
if (after) {
activityPredicates.push(gte(activityLog.createdAt, after));
}
const activity = await db
.select({
id: activityLog.id,
createdAt: activityLog.createdAt,
action: activityLog.action,
})
.from(activityLog)
.where(and(...activityPredicates))
.orderBy(desc(activityLog.createdAt))
.limit(1)
.then((rows) => rows[0] ?? null);
if (activity) {
return {
kind: "activity" as const,
id: activity.id,
createdAt: activity.createdAt,
action: activity.action,
};
}
return null;
}
async function nextRunEventSeq(runId: string) {
const [row] = await db
.select({ maxSeq: sql<number | null>`max(${heartbeatRunEvents.seq})` })
.from(heartbeatRunEvents)
.where(eq(heartbeatRunEvents.runId, runId));
return Number(row?.maxSeq ?? 0) + 1;
}
async function appendRecoveryRunEvent(
run: typeof heartbeatRuns.$inferSelect,
event: {
level: "info" | "warn" | "error";
message: string;
payload?: Record<string, unknown>;
},
) {
await db.insert(heartbeatRunEvents).values({
companyId: run.companyId,
runId: run.id,
agentId: run.agentId,
seq: await nextRunEventSeq(run.id),
eventType: "lifecycle",
stream: "system",
level: event.level,
message: event.message,
payload: event.payload ?? null,
});
}
async function cleanupSourceResolvedRunProcess(input: {
run: typeof heartbeatRuns.$inferSelect;
runningAgent: typeof agents.$inferSelect;
}) {
if (!SESSIONED_LOCAL_ADAPTERS.has(input.runningAgent.adapterType)) {
return {
attempted: false,
outcome: "skipped_non_local_adapter",
adapterType: input.runningAgent.adapterType,
};
}
const running = runningProcesses.get(input.run.id);
const pid = running?.child.pid ?? input.run.processPid ?? null;
const processGroupId = running?.processGroupId ?? input.run.processGroupId ?? null;
if (typeof pid !== "number" && typeof processGroupId !== "number") {
return {
attempted: false,
outcome: "no_process_metadata",
adapterType: input.runningAgent.adapterType,
};
}
const wasAlive =
(typeof pid === "number" && isPidAlive(pid)) ||
(typeof processGroupId === "number" && isProcessGroupAlive(processGroupId));
if (!wasAlive) {
runningProcesses.delete(input.run.id);
return {
attempted: false,
outcome: "not_running",
adapterType: input.runningAgent.adapterType,
pid,
processGroupId,
};
}
try {
await terminateLocalService(
{
pid: typeof pid === "number" && Number.isInteger(pid) && pid > 0
? pid
: (processGroupId ?? 0),
processGroupId: typeof processGroupId === "number" && Number.isInteger(processGroupId) && processGroupId > 0
? processGroupId
: null,
},
running ? { forceAfterMs: Math.max(1, running.graceSec) * 1000 } : undefined,
);
runningProcesses.delete(input.run.id);
const stillAlive =
(typeof pid === "number" && isPidAlive(pid)) ||
(typeof processGroupId === "number" && isProcessGroupAlive(processGroupId));
return {
attempted: true,
outcome: stillAlive ? "termination_sent_still_running" : "terminated",
adapterType: input.runningAgent.adapterType,
pid,
processGroupId,
};
} catch (error) {
return {
attempted: true,
outcome: "failed",
adapterType: input.runningAgent.adapterType,
pid,
processGroupId,
error: error instanceof Error ? error.message : String(error),
};
}
}
async function finalizeAgentAfterSourceResolvedRun(run: typeof heartbeatRuns.$inferSelect, status: "succeeded" | "cancelled") {
const [runningCountRow] = await db
.select({ count: sql<number>`count(*)::int` })
.from(heartbeatRuns)
.where(and(eq(heartbeatRuns.agentId, run.agentId), eq(heartbeatRuns.status, "running")));
const runningCount = Number(runningCountRow?.count ?? 0);
const nextStatus = runningCount > 0 ? "running" : status === "succeeded" || status === "cancelled" ? "idle" : "error";
await db
.update(agents)
.set({
status: nextStatus,
lastHeartbeatAt: new Date(),
updatedAt: new Date(),
})
.where(and(eq(agents.id, run.agentId), notInArray(agents.status, ["paused", "terminated"])));
}
async function foldSourceResolvedStaleRun(input: {
run: typeof heartbeatRuns.$inferSelect;
runningAgent: typeof agents.$inferSelect;
sourceIssue: typeof issues.$inferSelect;
evidence: Awaited<ReturnType<typeof latestSameRunSourceTerminalEvidence>>;
existingEvaluation: Awaited<ReturnType<typeof findOpenStaleRunEvaluation>>;
silenceStartedAt: Date | null;
silenceAgeMs: number | null;
now: Date;
}) {
if (!input.evidence) return { kind: "skipped" as const };
const cleanup = await cleanupSourceResolvedRunProcess({ run: input.run, runningAgent: input.runningAgent });
const finalRunStatus = input.sourceIssue.status === "cancelled" ? "cancelled" : "succeeded";
const resultJson = {
...parseObject(input.run.resultJson),
sourceResolvedWatchdogFold: {
sourceIssueId: input.sourceIssue.id,
sourceIssueIdentifier: input.sourceIssue.identifier,
sourceIssueStatus: input.sourceIssue.status,
sameRunEvidenceKind: input.evidence.kind,
sameRunEvidenceId: input.evidence.id,
sameRunEvidenceAt: input.evidence.createdAt.toISOString(),
silenceStartedAt: input.silenceStartedAt?.toISOString() ?? null,
silenceAgeMs: input.silenceAgeMs,
evaluationIssueId: input.existingEvaluation?.id ?? null,
evaluationIssueIdentifier: input.existingEvaluation?.identifier ?? null,
cleanup,
},
};
const finalizedRun = await db.transaction(async (tx) => {
const [updatedRun] = await tx
.update(heartbeatRuns)
.set({
status: finalRunStatus,
finishedAt: input.now,
error: null,
errorCode: null,
resultJson,
updatedAt: input.now,
})
.where(and(eq(heartbeatRuns.id, input.run.id), eq(heartbeatRuns.companyId, input.run.companyId), eq(heartbeatRuns.status, "running")))
.returning();
if (!updatedRun) return null;
if (input.run.wakeupRequestId) {
await tx
.update(agentWakeupRequests)
.set({
status: finalRunStatus === "succeeded" ? "completed" : "cancelled",
finishedAt: input.now,
error: null,
updatedAt: input.now,
})
.where(and(eq(agentWakeupRequests.id, input.run.wakeupRequestId), eq(agentWakeupRequests.companyId, input.run.companyId)));
}
await tx
.update(issues)
.set({
executionRunId: null,
executionAgentNameKey: null,
executionLockedAt: null,
updatedAt: input.now,
})
.where(
and(
eq(issues.id, input.sourceIssue.id),
eq(issues.companyId, input.run.companyId),
eq(issues.executionRunId, input.run.id),
),
);
return updatedRun;
});
if (!finalizedRun) return { kind: "skipped" as const };
if (input.existingEvaluation && !isTerminalIssueStatus(input.existingEvaluation.status)) {
await issuesSvc.update(input.existingEvaluation.id, { status: "done" });
await issuesSvc.addComment(input.existingEvaluation.id, [
"Source-resolved watchdog fold.",
"",
`- Source issue: ${input.sourceIssue.identifier ?? input.sourceIssue.id}`,
`- Run: \`${input.run.id}\``,
`- Same-run evidence: \`${input.evidence.kind}:${input.evidence.id}\` at ${input.evidence.createdAt.toISOString()}`,
"- Outcome: false positive; the source issue already reached a terminal disposition from this run.",
].join("\n"), { runId: input.run.id });
}
const activeRecoveryAction = await recoveryActionsSvc.getActiveForIssue(input.run.companyId, input.sourceIssue.id);
if (activeRecoveryAction?.kind === "active_run_watchdog") {
await recoveryActionsSvc.resolveActiveForIssue({
companyId: input.run.companyId,
sourceIssueId: input.sourceIssue.id,
actionId: activeRecoveryAction.id,
status: "resolved",
outcome: "false_positive",
resolutionNote: "Source issue reached a terminal disposition through durable same-run activity; watchdog folded as source-resolved.",
});
}
const [decision] = await db
.insert(heartbeatRunWatchdogDecisions)
.values({
companyId: input.run.companyId,
runId: input.run.id,
evaluationIssueId: input.existingEvaluation?.id ?? null,
decision: "dismissed_false_positive",
reason: "Source issue already reached a terminal disposition through durable same-run activity.",
createdByRunId: input.run.id,
})
.returning();
await appendRecoveryRunEvent(finalizedRun, {
level: cleanup.outcome === "failed" ? "warn" : "info",
message: "Source-resolved watchdog fold finalized stale active run",
payload: resultJson.sourceResolvedWatchdogFold,
});
await logActivity(db, {
companyId: input.run.companyId,
actorType: "system",
actorId: "system",
agentId: input.run.agentId,
runId: input.run.id,
action: "heartbeat.output_stale_source_resolved",
entityType: "heartbeat_run",
entityId: input.run.id,
details: {
source: "recovery.scan_silent_active_runs",
sourceIssueId: input.sourceIssue.id,
sourceIssueIdentifier: input.sourceIssue.identifier,
sourceIssueStatus: input.sourceIssue.status,
evaluationIssueId: input.existingEvaluation?.id ?? null,
watchdogDecisionId: decision.id,
sameRunEvidenceKind: input.evidence.kind,
sameRunEvidenceId: input.evidence.id,
sameRunEvidenceAt: input.evidence.createdAt.toISOString(),
cleanup,
},
});
await finalizeAgentAfterSourceResolvedRun(finalizedRun, finalRunStatus);
return { kind: "folded" as const, evaluationIssueId: input.existingEvaluation?.id ?? null };
}
async function resolveStaleRunOwnerAgentId(input: {
run: typeof heartbeatRuns.$inferSelect;
runningAgent: typeof agents.$inferSelect;
sourceIssue: typeof issues.$inferSelect | null;
}) {
const candidateIds: string[] = [];
if (input.sourceIssue?.assigneeAgentId) {
const sourceAssignee = await getAgent(input.sourceIssue.assigneeAgentId);
if (sourceAssignee?.reportsTo) candidateIds.push(sourceAssignee.reportsTo);
}
if (input.runningAgent.reportsTo) candidateIds.push(input.runningAgent.reportsTo);
const roleCandidates = await db
.select()
.from(agents)
.where(and(eq(agents.companyId, input.run.companyId), inArray(agents.role, ["cto", "ceo"])))
.orderBy(sql`case when ${agents.role} = 'cto' then 0 else 1 end`, asc(agents.createdAt));
candidateIds.push(...roleCandidates.map((agent) => agent.id));
const seen = new Set<string>();
for (const agentId of candidateIds) {
if (seen.has(agentId)) continue;
seen.add(agentId);
const candidate = await getAgent(agentId);
if (!candidate || candidate.companyId !== input.run.companyId) continue;
const budgetBlock = await budgets.getInvocationBlock(input.run.companyId, candidate.id, {
issueId: input.sourceIssue?.id ?? null,
projectId: input.sourceIssue?.projectId ?? null,
});
if (isAgentInvokable(candidate) && !budgetBlock) return candidate.id;
}
return null;
}
async function collectStaleRunEvidence(input: {
run: typeof heartbeatRuns.$inferSelect;
runningAgent: typeof agents.$inferSelect;
sourceIssue: typeof issues.$inferSelect | null;
prefix: string;
now: Date;
}) {
const [tail, recentEvents, childIssues, blockers] = await Promise.all([
readRunLogTailForEvidence(input.run),
db
.select({
eventType: heartbeatRunEvents.eventType,
level: heartbeatRunEvents.level,
message: heartbeatRunEvents.message,
createdAt: heartbeatRunEvents.createdAt,
})
.from(heartbeatRunEvents)
.where(and(eq(heartbeatRunEvents.companyId, input.run.companyId), eq(heartbeatRunEvents.runId, input.run.id)))
.orderBy(desc(heartbeatRunEvents.id))
.limit(8),
input.sourceIssue
? db
.select({ id: issues.id, identifier: issues.identifier, title: issues.title, status: issues.status })
.from(issues)
.where(and(eq(issues.companyId, input.run.companyId), eq(issues.parentId, input.sourceIssue.id), isNull(issues.hiddenAt)))
.orderBy(desc(issues.updatedAt))
.limit(8)
: Promise.resolve([]),
input.sourceIssue
? db
.select({ id: issues.id, identifier: issues.identifier, title: issues.title, status: issues.status })
.from(issueRelations)
.innerJoin(issues, eq(issueRelations.issueId, issues.id))
.where(
and(
eq(issueRelations.companyId, input.run.companyId),
eq(issueRelations.relatedIssueId, input.sourceIssue.id),
eq(issueRelations.type, "blocks"),
),
)
.limit(8)
: Promise.resolve([]),
]);
const currentUserRedactionOptions = await getCurrentUserRedactionOptions();
const safeTail = truncateEvidenceText(redactWatchdogEvidenceText(tail, currentUserRedactionOptions));
const silenceAgeMs = silenceAgeMsForRun(input.run, input.now);
return {
safeTail,
silenceAgeMs,
recentEvents: recentEvents.reverse().map((event) => ({
eventType: event.eventType,
level: event.level,
createdAt: event.createdAt.toISOString(),
message: event.message ? truncateEvidenceText(redactWatchdogEvidenceText(event.message, currentUserRedactionOptions), 300) : null,
})),
childIssues,
blockers,
};
}
function buildStaleRunEvaluationDescription(input: {
run: typeof heartbeatRuns.$inferSelect;
runningAgent: typeof agents.$inferSelect;
sourceIssue: typeof issues.$inferSelect | null;
prefix: string;
evidence: Awaited<ReturnType<typeof collectStaleRunEvidence>>;
level: "suspicious" | "critical";
now: Date;
}) {
const sourceIssue = input.sourceIssue
? issueUiLink({ identifier: input.sourceIssue.identifier, id: input.sourceIssue.id }, input.prefix)
: "none";
const recentEvents = input.evidence.recentEvents.length > 0
? input.evidence.recentEvents.map((event) =>
`- ${event.createdAt} \`${event.eventType}\`${event.level ? ` ${event.level}` : ""}: ${event.message ?? "(no message)"}`,
).join("\n")
: "- none";
const childIssues = input.evidence.childIssues.length > 0
? input.evidence.childIssues.map((issue) =>
`- ${issueUiLink({ identifier: issue.identifier, id: issue.id }, input.prefix)} \`${issue.status}\`: ${issue.title}`,
).join("\n")
: "- none detected";
const blockers = input.evidence.blockers.length > 0
? input.evidence.blockers.map((issue) =>
`- ${issueUiLink({ identifier: issue.identifier, id: issue.id }, input.prefix)} \`${issue.status}\`: ${issue.title}`,
).join("\n")
: "- none detected";
return [
`Paperclip detected ${input.level} output silence on an active heartbeat run.`,
"",
"## Run",
"",
`- Run: ${runUiLink(input.run, input.prefix)}`,
`- Agent: ${input.runningAgent.name} (${input.runningAgent.adapterType})`,
`- Invocation: ${input.run.invocationSource}${input.run.triggerDetail ? ` / ${input.run.triggerDetail}` : ""}`,
`- Source issue: ${sourceIssue}`,
`- Started at: ${input.run.startedAt?.toISOString() ?? "unknown"}`,
`- Process started at: ${input.run.processStartedAt?.toISOString() ?? "unknown"}`,
`- Last output at: ${input.run.lastOutputAt?.toISOString() ?? "none recorded"}`,
`- Last output sequence: ${input.run.lastOutputSeq ?? 0}`,
`- Silent for: ${formatDuration(input.evidence.silenceAgeMs)}`,
`- Thresholds: suspicious after ${formatDuration(ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS)}, critical after ${formatDuration(ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS)}`,
`- Process metadata: pid \`${input.run.processPid ?? "unknown"}\`, process group \`${input.run.processGroupId ?? "unknown"}\`, in-memory handle \`${runningProcesses.has(input.run.id) ? "yes" : "no"}\``,
"",
"## Last Output Excerpt",
"",
input.evidence.safeTail ? `\`\`\`text\n${input.evidence.safeTail}\n\`\`\`` : "_No run-log tail was available._",
"",
"## Recent Run Events",
"",
recentEvents,
"",
"## Related Work",
"",
"Active child issues:",
childIssues,
"",
"Current source blockers:",
blockers,
"",
"## Decision Checklist",
"",
"- Continue or snooze if the run is intentionally quiet.",
"- Ask the run owner for context if work may be delegated outside the transcript.",
"- Preserve artifacts, branch state, and useful output before cancellation.",
"- Cancel or recover through the explicit run recovery controls when authorized.",
"- Close this issue as a false positive only after recording the reason.",
].join("\n");
}
function isUniqueStaleRunEvaluationConflict(error: unknown) {
const maybe = unwrapDatabaseConflictError(error);
if (!maybe) return false;
return maybe.code === "23505" &&
(
maybe.constraint === "issues_active_stale_run_evaluation_uq" ||
maybe.constraint_name === "issues_active_stale_run_evaluation_uq" ||
typeof maybe.message === "string" && maybe.message.includes("issues_active_stale_run_evaluation_uq")
);
}
function isUniqueStrandedIssueRecoveryConflict(error: unknown) {
const maybe = unwrapDatabaseConflictError(error);
if (!maybe) return false;
return maybe.code === "23505" &&
(
maybe.constraint === "issues_active_stranded_issue_recovery_uq" ||
maybe.constraint_name === "issues_active_stranded_issue_recovery_uq" ||
typeof maybe.message === "string" && maybe.message.includes("issues_active_stranded_issue_recovery_uq")
);
}
async function ensureSourceIssueBlockedByStaleEvaluation(input: {
sourceIssue: typeof issues.$inferSelect | null;
evaluationIssue: { id: string; identifier: string | null };
run: typeof heartbeatRuns.$inferSelect;
}) {
if (!input.sourceIssue || ["done", "cancelled"].includes(input.sourceIssue.status)) return false;
const blockerIds = await existingBlockerIssueIds(input.sourceIssue.companyId, input.sourceIssue.id);
if (blockerIds.includes(input.evaluationIssue.id)) return false;
const nextBlockerIds = [...blockerIds, input.evaluationIssue.id];
await issuesSvc.update(input.sourceIssue.id, {
...(input.sourceIssue.status === "blocked" ? {} : { status: "blocked" }),
blockedByIssueIds: nextBlockerIds,
});
await issuesSvc.addComment(input.sourceIssue.id, [
"Paperclip detected critical output silence on this issue's active run.",
"",
`- Evaluation issue: ${input.evaluationIssue.identifier ?? input.evaluationIssue.id}`,
`- Run: \`${input.run.id}\``,
"",
"This blocks the source issue on the explicit review task without cancelling the active process.",
].join("\n"), { runId: input.run.id });
await logActivity(db, {
companyId: input.sourceIssue.companyId,
actorType: "system",
actorId: "system",
agentId: null,
runId: input.run.id,
action: "heartbeat.output_stale_escalated",
entityType: "issue",
entityId: input.sourceIssue.id,
details: {
source: "recovery.scan_silent_active_runs",
evaluationIssueId: input.evaluationIssue.id,
blockerIssueIds: nextBlockerIds,
},
});
return true;
}
async function createOrUpdateStaleRunEvaluation(input: {
run: typeof heartbeatRuns.$inferSelect;
now: Date;
}) {
const runningAgent = await getAgent(input.run.agentId);
if (!runningAgent || runningAgent.companyId !== input.run.companyId) return { kind: "skipped" as const };
const sourceIssue = await resolveStaleRunSourceIssue(input.run);
const existing = await findOpenStaleRunEvaluation(input.run.companyId, input.run.id);
if (sourceIssue && isRecoveryOriginIssue(sourceIssue)) {
await logActivity(db, {
companyId: input.run.companyId,
actorType: "system",
actorId: "system",
agentId: input.run.agentId,
runId: input.run.id,
action: "heartbeat.output_stale_recovery_recursion_refused",
entityType: "heartbeat_run",
entityId: input.run.id,
details: {
source: "recovery.scan_silent_active_runs",
sourceIssueId: sourceIssue.id,
sourceIssueIdentifier: sourceIssue.identifier,
sourceIssueOriginKind: sourceIssue.originKind,
existingEvaluationIssueId: existing?.id ?? null,
},
});
return { kind: "skipped" as const };
}
const silenceStartedAt = silenceStartedAtForRun(input.run);
if (sourceIssue && isTerminalIssueStatus(sourceIssue.status)) {
const terminalEvidence = await latestSameRunSourceTerminalEvidence({
run: input.run,
sourceIssue,
evidenceAfter: silenceStartedAt,
});
if (terminalEvidence) {
return foldSourceResolvedStaleRun({
run: input.run,
runningAgent,
sourceIssue,
evidence: terminalEvidence,
existingEvaluation: existing,
silenceStartedAt,
silenceAgeMs: silenceAgeMsForRun(input.run, input.now),
now: input.now,
});
}
}
const prefix = await getCompanyIssuePrefix(input.run.companyId);
const evidence = await collectStaleRunEvidence({
run: input.run,
runningAgent,
sourceIssue,
prefix,
now: input.now,
});
const level = (evidence.silenceAgeMs ?? 0) >= ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS ? "critical" : "suspicious";
if (existing) {
if (level === "critical" && existing.priority !== "high") {
await issuesSvc.update(existing.id, {
priority: "high",
});
await issuesSvc.addComment(existing.id, [
"Critical output silence threshold crossed.",
"",
`- Run: \`${input.run.id}\``,
`- Silent for: ${formatDuration(evidence.silenceAgeMs)}`,
`- Last output at: ${input.run.lastOutputAt?.toISOString() ?? "none recorded"}`,
].join("\n"), { runId: input.run.id });
await ensureSourceIssueBlockedByStaleEvaluation({
sourceIssue,
evaluationIssue: existing,
run: input.run,
});
return { kind: "escalated" as const, evaluationIssueId: existing.id };
}
if (level === "critical") {
await ensureSourceIssueBlockedByStaleEvaluation({
sourceIssue,
evaluationIssue: existing,
run: input.run,
});
}
return { kind: "existing" as const, evaluationIssueId: existing.id };
}
const ownerAgentId = await resolveStaleRunOwnerAgentId({ run: input.run, runningAgent, sourceIssue });
const description = buildStaleRunEvaluationDescription({
run: input.run,
runningAgent,
sourceIssue,
prefix,
evidence,
level,
now: input.now,
});
let evaluation: Awaited<ReturnType<typeof issuesSvc.create>>;
try {
evaluation = await issuesSvc.create(input.run.companyId, {
title: `Review silent active run for ${runningAgent.name}`,
description,
status: "todo",
priority: level === "critical" ? "high" : "medium",
parentId: sourceIssue && !["done", "cancelled"].includes(sourceIssue.status) ? sourceIssue.id : null,
projectId: sourceIssue?.projectId ?? null,
goalId: sourceIssue?.goalId ?? null,
billingCode: sourceIssue?.billingCode ?? null,
assigneeAgentId: ownerAgentId,
assigneeAdapterOverrides: recoveryAssigneeAdapterOverrides("status_only"),
originKind: STALE_ACTIVE_RUN_EVALUATION_ORIGIN_KIND,
originId: input.run.id,
originRunId: input.run.id,
originFingerprint: staleActiveRunOriginFingerprint(input.run.companyId, input.run.id),
});
} catch (error) {
if (!isUniqueStaleRunEvaluationConflict(error)) throw error;
const raced = await findOpenStaleRunEvaluation(input.run.companyId, input.run.id);
if (!raced) throw error;
return { kind: "existing" as const, evaluationIssueId: raced.id };
}
await logActivity(db, {
companyId: input.run.companyId,
actorType: "system",
actorId: "system",
agentId: ownerAgentId,
runId: input.run.id,
action: "heartbeat.output_stale_detected",
entityType: "issue",
entityId: evaluation.id,
details: {
source: "recovery.scan_silent_active_runs",
level,
sourceIssueId: sourceIssue?.id ?? null,
silenceAgeMs: evidence.silenceAgeMs,
lastOutputAt: input.run.lastOutputAt?.toISOString() ?? null,
},
});
if (level === "critical") {
await ensureSourceIssueBlockedByStaleEvaluation({
sourceIssue,
evaluationIssue: evaluation,
run: input.run,
});
}
if (ownerAgentId) {
await deps.enqueueWakeup(ownerAgentId, {
source: "assignment",
triggerDetail: "system",
reason: "issue_assigned",
payload: withRecoveryModelProfileHint({
issueId: evaluation.id,
staleRunId: input.run.id,
sourceIssueId: sourceIssue?.id ?? null,
}, "status_only"),
requestedByActorType: "system",
requestedByActorId: null,
contextSnapshot: withRecoveryModelProfileHint({
issueId: evaluation.id,
taskId: evaluation.id,
wakeReason: "issue_assigned",
source: STALE_ACTIVE_RUN_EVALUATION_ORIGIN_KIND,
staleRunId: input.run.id,
sourceIssueId: sourceIssue?.id ?? null,
}, "status_only"),
});
}
return { kind: "created" as const, evaluationIssueId: evaluation.id };
}
async function scanSilentActiveRuns(opts?: { now?: Date; companyId?: string }) {
const now = opts?.now ?? new Date();
const suspicionBefore = new Date(now.getTime() - ACTIVE_RUN_OUTPUT_SUSPICION_THRESHOLD_MS);
const candidates = await db
.select()
.from(heartbeatRuns)
.where(
and(
opts?.companyId ? eq(heartbeatRuns.companyId, opts.companyId) : undefined,
eq(heartbeatRuns.status, "running"),
sql`coalesce(${heartbeatRuns.lastOutputAt}, ${heartbeatRuns.processStartedAt}, ${heartbeatRuns.startedAt}, ${heartbeatRuns.createdAt}) <= ${suspicionBefore.toISOString()}::timestamptz`,
),
)
.orderBy(asc(heartbeatRuns.createdAt))
.limit(100);
const result = {
scanned: candidates.length,
created: 0,
existing: 0,
escalated: 0,
folded: 0,
snoozed: 0,
skipped: 0,
evaluationIssueIds: [] as string[],
};
for (const run of candidates) {
if (await latestActiveOutputQuietUntilDecision(run.companyId, run.id, now)) {
result.snoozed += 1;
continue;
}
const outcome = await createOrUpdateStaleRunEvaluation({ run, now });
if (outcome.kind === "created") result.created += 1;
else if (outcome.kind === "existing") result.existing += 1;
else if (outcome.kind === "escalated") result.escalated += 1;
else if (outcome.kind === "folded") result.folded += 1;
else result.skipped += 1;
if ("evaluationIssueId" in outcome && outcome.evaluationIssueId) {
result.evaluationIssueIds.push(outcome.evaluationIssueId);
}
}
return result;
}
async function recordWatchdogDecision(input: {
runId: string;
actor: WatchdogDecisionActor;
decision: "snooze" | "continue" | "dismissed_false_positive";
evaluationIssueId?: string | null;
reason?: string | null;
snoozedUntil?: Date | null;
createdByRunId?: string | null;
now?: Date;
}) {
const [run] = await db
.select()
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, input.runId))
.limit(1);
if (!run) throw notFound("Heartbeat run not found");
let evaluationIssue: {
id: string;
assigneeAgentId: string | null;
companyId: string;
originKind: string;
originId: string | null;
hiddenAt: Date | null;
status: string;
} | null = null;
if (input.evaluationIssueId) {
evaluationIssue = await db
.select({
id: issues.id,
assigneeAgentId: issues.assigneeAgentId,
companyId: issues.companyId,
originKind: issues.originKind,
originId: issues.originId,
hiddenAt: issues.hiddenAt,
status: issues.status,
})
.from(issues)
.where(and(eq(issues.id, input.evaluationIssueId), eq(issues.companyId, run.companyId)))
.then((rows) => rows[0] ?? null);
if (!evaluationIssue) throw notFound("Evaluation issue not found");
}
const boardActor = input.actor.type === "board";
const assignedRecoveryOwner =
input.actor.type === "agent" &&
Boolean(input.actor.agentId) &&
evaluationIssue !== null &&
evaluationIssue.originKind === STALE_ACTIVE_RUN_EVALUATION_ORIGIN_KIND &&
evaluationIssue.originId === run.id &&
evaluationIssue.hiddenAt === null &&
!["done", "cancelled"].includes(evaluationIssue.status) &&
evaluationIssue?.assigneeAgentId === input.actor.agentId;
if (!boardActor && !assignedRecoveryOwner) {
throw forbidden("Only the board or the assigned recovery owner can record watchdog decisions");
}
if (evaluationIssue && (
evaluationIssue.originKind !== STALE_ACTIVE_RUN_EVALUATION_ORIGIN_KIND ||
evaluationIssue.originId !== run.id
)) {
throw forbidden("Watchdog decision evaluation issue is not bound to the target run");
}
if (input.actor.type === "agent" && !evaluationIssue) {
throw forbidden("Agent watchdog decisions require the target evaluation issue");
}
const createdByRunId = input.actor.type === "agent"
? input.actor.runId ?? input.createdByRunId ?? null
: input.actor.type === "board"
? input.actor.runId ?? input.createdByRunId ?? null
: null;
if (createdByRunId) {
const [creatorRun] = await db
.select({ id: heartbeatRuns.id, companyId: heartbeatRuns.companyId, agentId: heartbeatRuns.agentId })
.from(heartbeatRuns)
.where(eq(heartbeatRuns.id, createdByRunId))
.limit(1);
const sameCompany = creatorRun?.companyId === run.companyId;
const sameAgent = input.actor.type !== "agent" || creatorRun?.agentId === input.actor.agentId;
if (!creatorRun || !sameCompany || !sameAgent) {
throw forbidden("createdByRunId is not valid for this watchdog decision actor");
}
}
const decisionNow = input.now ?? new Date();
const effectiveSnoozedUntil = input.decision === "snooze"
? input.snoozedUntil ?? null
: input.decision === "continue"
? input.snoozedUntil && input.snoozedUntil > decisionNow
? input.snoozedUntil
: new Date(decisionNow.getTime() + ACTIVE_RUN_OUTPUT_CONTINUE_REARM_MS)
: null;
const [row] = await db
.insert(heartbeatRunWatchdogDecisions)
.values({
companyId: run.companyId,
runId: run.id,
evaluationIssueId: input.evaluationIssueId ?? null,
decision: input.decision,
snoozedUntil: effectiveSnoozedUntil,
reason: input.reason ?? null,
createdByAgentId: input.actor.type === "agent" ? input.actor.agentId ?? null : null,
createdByUserId: input.actor.type === "board" ? input.actor.userId ?? null : null,
createdByRunId,
})
.returning();
await logActivity(db, {
companyId: run.companyId,
actorType: input.actor.type === "agent" ? "agent" : "user",
actorId: input.actor.type === "agent"
? input.actor.agentId ?? "agent"
: input.actor.type === "board"
? input.actor.userId ?? "board"
: "unknown",
agentId: input.actor.type === "agent" ? input.actor.agentId ?? null : null,
runId: run.id,
action: input.decision === "snooze" ? "heartbeat.watchdog_snoozed" : "heartbeat.watchdog_decision_recorded",
entityType: "heartbeat_run",
entityId: run.id,
details: {
source: "recovery.record_watchdog_decision",
decision: input.decision,
evaluationIssueId: input.evaluationIssueId ?? null,
snoozedUntil: effectiveSnoozedUntil?.toISOString() ?? null,
reason: input.reason ?? null,
},
});
return row;
}
async function findOpenStrandedIssueRecoveryIssue(companyId: string, sourceIssueId: string) {
return db
.select()
.from(issues)
.where(
and(
eq(issues.companyId, companyId),
eq(issues.originKind, STRANDED_ISSUE_RECOVERY_ORIGIN_KIND),
eq(issues.originId, sourceIssueId),
isNull(issues.hiddenAt),
notInArray(issues.status, ["done", "cancelled"]),
),
)
.orderBy(desc(issues.createdAt))
.limit(1)
.then((rows) => rows[0] ?? null);
}
function isStrandedIssueRecoveryIssue(issue: typeof issues.$inferSelect) {
return issue.originKind === STRANDED_ISSUE_RECOVERY_ORIGIN_KIND;
}
async function buildNestedStrandedRecoveryLine(issue: typeof issues.$inferSelect, prefix: string) {
const sourceIssueId = readNonEmptyString(issue.originId);
const sourceIssue = sourceIssueId
? await db
.select({ id: issues.id, identifier: issues.identifier })
.from(issues)
.where(and(eq(issues.companyId, issue.companyId), eq(issues.id, sourceIssueId)))
.then((rows) => rows[0] ?? null)
: null;
const sourceLine = sourceIssue
? `- Original source issue: ${issueUiLink(sourceIssue, prefix)}`
: sourceIssueId
? `- Original source issue: \`${sourceIssueId}\``
: "- Original source issue: unknown";
return [
"",
"- Nested recovery: suppressed because this issue is already a `stranded_issue_recovery` issue.",
sourceLine,
"- Next action: the assigned recovery owner or board operator should fix the runtime/adapter problem, resolve or reassign the original source issue, then mark this recovery issue done or cancelled.",
].join("\n");
}
async function resolveStrandedIssueRecoveryOwnerAgentId(issue: typeof issues.$inferSelect) {
const candidateIds: string[] = [];
if (issue.assigneeAgentId) {
const assignee = await getAgent(issue.assigneeAgentId);
if (assignee?.reportsTo) candidateIds.push(assignee.reportsTo);
}
if (issue.createdByAgentId) {
const creator = await getAgent(issue.createdByAgentId);
if (creator?.reportsTo) candidateIds.push(creator.reportsTo);
candidateIds.push(issue.createdByAgentId);
}
const roleCandidates = await db
.select()
.from(agents)
.where(and(eq(agents.companyId, issue.companyId), inArray(agents.role, ["cto", "ceo"])))
.orderBy(sql`case when ${agents.role} = 'cto' then 0 else 1 end`, asc(agents.createdAt));
candidateIds.push(...roleCandidates.map((agent) => agent.id));
if (issue.assigneeAgentId) candidateIds.push(issue.assigneeAgentId);
const seen = new Set<string>();
for (const agentId of candidateIds) {
if (seen.has(agentId)) continue;
seen.add(agentId);
const candidate = await getAgent(agentId);
if (!candidate || candidate.companyId !== issue.companyId) continue;
const budgetBlock = await budgets.getInvocationBlock(issue.companyId, candidate.id, {
issueId: issue.id,
projectId: issue.projectId,
});
if (isAgentInvokable(candidate) && !budgetBlock) return candidate.id;
}
return null;
}
function buildStrandedIssueRecoveryDescription(input: {
issue: typeof issues.$inferSelect;
latestRun: LatestIssueRun;
previousStatus: "todo" | "in_progress";
prefix: string;
recoveryCause?: StrandedRecoveryCause;
successfulRunHandoffEvidence?: SuccessfulRunHandoffRecoveryEvidence | null;
sourceAssignee?: Pick<typeof agents.$inferSelect, "id" | "name"> | null;
}) {
const sourceIssue = issueUiLink({ identifier: input.issue.identifier, id: input.issue.id }, input.prefix);
const runLink = input.latestRun
? `[\`${input.latestRun.id}\`](/${input.prefix}/agents/${input.latestRun.agentId}/runs/${input.latestRun.id})`
: "none";
if (input.recoveryCause === SUCCESSFUL_RUN_MISSING_STATE_REASON) {
const sourceRunId = input.successfulRunHandoffEvidence?.sourceRunId;
const sourceRunLink = sourceRunId && input.latestRun
? `[\`${sourceRunId}\`](/${input.prefix}/agents/${input.latestRun.agentId}/runs/${sourceRunId})`
: "unknown";
const missingDisposition = input.successfulRunHandoffEvidence?.missingDisposition ?? "clear_next_step";
return [
"Paperclip exhausted the bounded corrective handoff for a successful run that still has no valid issue disposition.",
"",
"This is not a runtime/adapter crash report. The source run succeeded; the remaining problem is the missing `done`, `in_review`, `blocked`, delegated follow-up, or explicit continuation path.",
"",
"## Safe Evidence",
"",
`- Source issue: ${sourceIssue}`,
`- Source run: ${sourceRunLink}`,
`- Corrective handoff run: ${runLink}`,
`- Source assignee: ${agentUiLink(input.sourceAssignee ?? null, input.prefix)}`,
`- Latest issue status: \`${input.issue.status}\``,
`- Latest handoff run status: \`${input.latestRun?.status ?? "unknown"}\``,
`- Normalized cause: \`${SUCCESSFUL_RUN_MISSING_STATE_REASON}\``,
`- Missing disposition: \`${missingDisposition}\``,
"- Suggested manager action: choose and record a valid issue disposition without copying transcript content.",
"",
"## Required Action",
"",
"- Inspect the source issue and run metadata, not raw transcript excerpts.",
"- Choose a valid issue disposition: `done`/`cancelled`, `in_review` with an owner, `blocked` with first-class blockers, delegated follow-up work, or an explicit continuation path.",
"- When the source issue has a clear owner and disposition, mark this recovery issue done.",
].join("\n");
}
const retryReason = readNonEmptyString(parseObject(input.latestRun?.contextSnapshot)?.retryReason) ?? "unknown";
const failureSummary = summarizeRunFailureForIssueComment(input.latestRun);
return [
"Paperclip exhausted automatic recovery for an assigned issue and created this explicit recovery task.",
"",
"## Source",
"",
`- Source issue: ${sourceIssue}`,
`- Previous source status: \`${input.previousStatus}\``,
`- Latest retry run: ${runLink}`,
`- Latest retry status: \`${input.latestRun?.status ?? "unknown"}\``,
`- Detected invariant: \`stranded_assigned_issue\``,
`- Retry reason: \`${retryReason}\``,
failureSummary ? `- Failure: ${failureSummary.trim()}` : "- Failure: none recorded",
"",
"## Ownership",
"",
"- Selected owner: the first invokable manager/creator/executive candidate with budget available.",
"",
"## Required Action",
"",
"- Inspect the latest run and source issue state.",
"- Fix the runtime/adapter problem, reassign the source issue, or convert the source issue into a clear manual-review state.",
"- When the source issue has a live execution path or has been intentionally resolved, mark this recovery issue done.",
].join("\n");
}
async function ensureStrandedIssueRecoveryIssue(input: {
issue: typeof issues.$inferSelect;
latestRun: LatestIssueRun;
previousStatus: "todo" | "in_progress";
recoveryCause?: StrandedRecoveryCause;
successfulRunHandoffEvidence?: SuccessfulRunHandoffRecoveryEvidence | null;
}) {
if (isStrandedIssueRecoveryIssue(input.issue)) return null;
const existing = await findOpenStrandedIssueRecoveryIssue(input.issue.companyId, input.issue.id);
if (existing) return existing;
const ownerAgentId = await resolveStrandedIssueRecoveryOwnerAgentId(input.issue);
if (!ownerAgentId) return null;
const prefix = await getCompanyIssuePrefix(input.issue.companyId);
const sourceAssignee = input.issue.assigneeAgentId ? await getAgent(input.issue.assigneeAgentId) : null;
const recoveryCause = input.recoveryCause ?? "stranded_assigned_issue";
let recovery: Awaited<ReturnType<typeof issuesSvc.create>>;
try {
recovery = await issuesSvc.create(input.issue.companyId, {
title: recoveryCause === SUCCESSFUL_RUN_MISSING_STATE_REASON
? `Recover missing next step ${input.issue.identifier ?? input.issue.title}`
: `Recover stalled issue ${input.issue.identifier ?? input.issue.title}`,
description: buildStrandedIssueRecoveryDescription({
issue: input.issue,
latestRun: input.latestRun,
previousStatus: input.previousStatus,
prefix,
recoveryCause,
successfulRunHandoffEvidence: input.successfulRunHandoffEvidence,
sourceAssignee,
}),
status: "todo",
priority: input.issue.priority,
parentId: input.issue.id,
projectId: input.issue.projectId,
goalId: input.issue.goalId,
assigneeAgentId: ownerAgentId,
assigneeAdapterOverrides: recoveryAssigneeAdapterOverrides("status_only"),
originKind: STRANDED_ISSUE_RECOVERY_ORIGIN_KIND,
originId: input.issue.id,
originRunId: input.latestRun?.id ?? null,
originFingerprint: [
STRANDED_ISSUE_RECOVERY_ORIGIN_KIND,
input.issue.companyId,
input.issue.id,
recoveryCause,
input.latestRun?.id ?? "no-run",
].join(":"),
billingCode: input.issue.billingCode,
inheritExecutionWorkspaceFromIssueId: input.issue.id,
});
} catch (error) {
if (!isUniqueStrandedIssueRecoveryConflict(error)) throw error;
const raced = await findOpenStrandedIssueRecoveryIssue(input.issue.companyId, input.issue.id);
if (!raced) throw error;
return raced;
}
await deps.enqueueWakeup(ownerAgentId, {
source: "assignment",
triggerDetail: "system",
reason: "issue_assigned",
payload: withRecoveryModelProfileHint({
issueId: recovery.id,
sourceIssueId: input.issue.id,
strandedRunId: input.latestRun?.id ?? null,
recoveryCause,
}, "status_only"),
requestedByActorType: "system",
requestedByActorId: null,
contextSnapshot: withRecoveryModelProfileHint({
issueId: recovery.id,
taskId: recovery.id,
wakeReason: "issue_assigned",
source: STRANDED_ISSUE_RECOVERY_ORIGIN_KIND,
sourceIssueId: input.issue.id,
strandedRunId: input.latestRun?.id ?? null,
recoveryCause,
}, "status_only"),
});
return recovery;
}
function strandedRecoveryActionKind(cause: StrandedRecoveryCause) {
return cause === SUCCESSFUL_RUN_MISSING_STATE_REASON
? "missing_disposition" as const
: "stranded_assigned_issue" as const;
}
function strandedRecoveryActionFingerprint(input: {
issue: typeof issues.$inferSelect;
recoveryCause: StrandedRecoveryCause;
}) {
return [
"source_scoped_recovery",
input.issue.companyId,
input.issue.id,
input.recoveryCause,
].join(":");
}
function buildStrandedRecoveryActionEvidence(input: {
issue: typeof issues.$inferSelect;
latestRun: LatestIssueRun;
previousStatus: "todo" | "in_progress";
recoveryCause: StrandedRecoveryCause;
successfulRunHandoffEvidence?: SuccessfulRunHandoffRecoveryEvidence | null;
}) {
const context = parseObject(input.latestRun?.contextSnapshot);
return {
sourceIssueId: input.issue.id,
sourceIdentifier: input.issue.identifier,
previousStatus: input.previousStatus,
latestIssueStatus: input.issue.status,
latestRunId: input.latestRun?.id ?? null,
latestRunStatus: input.latestRun?.status ?? null,
latestRunErrorCode: input.latestRun?.errorCode ?? null,
retryReason: readNonEmptyString(context.retryReason) ?? null,
recoveryCause: input.recoveryCause,
sourceRunId: input.successfulRunHandoffEvidence?.sourceRunId ?? null,
correctiveRunId: input.successfulRunHandoffEvidence?.correctiveRunId ?? null,
missingDisposition: input.successfulRunHandoffEvidence?.missingDisposition ?? null,
handoffAttempt: input.successfulRunHandoffEvidence?.handoffAttempt ?? null,
maxHandoffAttempts: input.successfulRunHandoffEvidence?.maxHandoffAttempts ?? null,
};
}
async function ensureSourceScopedStrandedRecoveryAction(input: {
issue: typeof issues.$inferSelect;
latestRun: LatestIssueRun;
previousStatus: "todo" | "in_progress";
recoveryCause?: StrandedRecoveryCause;
successfulRunHandoffEvidence?: SuccessfulRunHandoffRecoveryEvidence | null;
}) {
const recoveryCause = input.recoveryCause ?? "stranded_assigned_issue";
const ownerAgentId = await resolveStrandedIssueRecoveryOwnerAgentId(input.issue);
const now = new Date();
const action = await recoveryActionsSvc.upsertSourceScoped({
companyId: input.issue.companyId,
sourceIssueId: input.issue.id,
kind: strandedRecoveryActionKind(recoveryCause),
ownerType: ownerAgentId ? "agent" : "board",
ownerAgentId,
previousOwnerAgentId: input.issue.assigneeAgentId,
returnOwnerAgentId: input.issue.assigneeAgentId,
cause: recoveryCause,
fingerprint: strandedRecoveryActionFingerprint({
issue: input.issue,
recoveryCause,
}),
evidence: buildStrandedRecoveryActionEvidence({
issue: input.issue,
latestRun: input.latestRun,
previousStatus: input.previousStatus,
recoveryCause,
successfulRunHandoffEvidence: input.successfulRunHandoffEvidence,
}),
nextAction: recoveryCause === SUCCESSFUL_RUN_MISSING_STATE_REASON
? "Choose and record a valid issue disposition without copying transcript content."
: "Restore a live execution path, fix the runtime/adapter failure, or record an intentional manual resolution.",
wakePolicy: ownerAgentId
? {
type: "wake_owner",
reason: "source_scoped_recovery_action",
ownerAgentId,
}
: {
type: "board_escalation",
reason: "no_invokable_recovery_owner",
},
monitorPolicy: null,
maxAttempts: null,
lastAttemptAt: now,
});
return action;
}
async function enqueueSourceScopedStrandedRecoveryWake(input: {
action: Awaited<ReturnType<typeof recoveryActionsSvc.upsertSourceScoped>>;
issue: typeof issues.$inferSelect;
latestRun: LatestIssueRun;
recoveryCause: StrandedRecoveryCause;
}) {
if (!input.action.ownerAgentId) return;
await deps.enqueueWakeup(input.action.ownerAgentId, {
source: "assignment",
triggerDetail: "system",
reason: "source_scoped_recovery_action",
idempotencyKey: `source_scoped_recovery_action:${input.action.id}:${input.action.attemptCount}`,
payload: withRecoveryModelProfileHint({
issueId: input.issue.id,
sourceIssueId: input.issue.id,
recoveryActionId: input.action.id,
strandedRunId: input.latestRun?.id ?? null,
recoveryCause: input.recoveryCause,
}, "status_only"),
requestedByActorType: "system",
requestedByActorId: null,
contextSnapshot: withRecoveryModelProfileHint({
issueId: input.issue.id,
taskId: input.issue.id,
wakeReason: "source_scoped_recovery_action",
skipIssueComment: true,
source: "issue_recovery_action",
recoveryActionId: input.action.id,
sourceIssueId: input.issue.id,
strandedRunId: input.latestRun?.id ?? null,
recoveryCause: input.recoveryCause,
}, "status_only"),
});
}
function buildRecoveryIssueInPlaceEscalationComment(input: {
issue: typeof issues.$inferSelect;
previousStatus: "todo" | "in_progress";
latestRun: LatestIssueRun;
prefix: string;
}) {
const runLink = input.latestRun
? runUiLink({ id: input.latestRun.id, agentId: input.latestRun.agentId }, input.prefix)
: "none";
const retryReason = readNonEmptyString(parseObject(input.latestRun?.contextSnapshot)?.retryReason) ?? "none";
const failureSummary = summarizeRunFailureForIssueComment(input.latestRun);
return [
"Paperclip stopped automatic stranded-work recovery for this recovery issue.",
"",
`- Recovery issue: ${issueUiLink({ identifier: input.issue.identifier, id: input.issue.id }, input.prefix)}`,
`- Previous status: \`${input.previousStatus}\``,
`- Latest run: ${runLink}`,
`- Latest run status: \`${input.latestRun?.status ?? "unknown"}\``,
`- Retry reason: \`${retryReason}\``,
failureSummary ? `- Failure: ${failureSummary.trim()}` : "- Failure: none recorded",
"- Guard: recovery issues do not create nested `stranded_issue_recovery` issues.",
"",
"Next action: the current recovery owner should inspect the failed run evidence, restore a live execution path or record the manual resolution, then move this recovery issue out of `blocked`.",
].join("\n");
}
async function escalateStrandedRecoveryIssueInPlace(input: {
issue: typeof issues.$inferSelect;
previousStatus: "todo" | "in_progress";
latestRun: LatestIssueRun;
}) {
const updated = await issuesSvc.update(input.issue.id, { status: "blocked" });
if (!updated) return null;
const prefix = await getCompanyIssuePrefix(input.issue.companyId);
await issuesSvc.addComment(
input.issue.id,
buildRecoveryIssueInPlaceEscalationComment({
issue: input.issue,
previousStatus: input.previousStatus,
latestRun: input.latestRun,
prefix,
}),
{},
);
await logActivity(db, {
companyId: input.issue.companyId,
actorType: "system",
actorId: "system",
agentId: null,
runId: null,
action: "issue.updated",
entityType: "issue",
entityId: input.issue.id,
details: {
identifier: input.issue.identifier,
status: "blocked",
previousStatus: input.previousStatus,
source: "recovery.reconcile_stranded_recovery_issue",
latestRunId: input.latestRun?.id ?? null,
latestRunStatus: input.latestRun?.status ?? null,
latestRunErrorCode: input.latestRun?.errorCode ?? null,
originKind: input.issue.originKind,
originId: input.issue.originId,
},
});
return updated;
}
async function existingBlockerIssueIds(companyId: string, issueId: string) {
return db
.select({ blockerIssueId: issueRelations.issueId })
.from(issueRelations)
.where(
and(
eq(issueRelations.companyId, companyId),
eq(issueRelations.relatedIssueId, issueId),
eq(issueRelations.type, "blocks"),
),
)
.then((rows) => rows.map((row) => row.blockerIssueId));
}
async function existingUnresolvedBlockerIssueIds(companyId: string, issueId: string) {
return db
.select({ blockerIssueId: issueRelations.issueId })
.from(issueRelations)
.innerJoin(
issues,
and(
eq(issues.companyId, issueRelations.companyId),
eq(issues.id, issueRelations.issueId),
),
)
.where(
and(
eq(issueRelations.companyId, companyId),
eq(issueRelations.relatedIssueId, issueId),
eq(issueRelations.type, "blocks"),
notInArray(issues.status, ["done", "cancelled"]),
),
)
.then((rows) => rows.map((row) => row.blockerIssueId));
}
async function escalateStrandedAssignedIssue(input: {
issue: typeof issues.$inferSelect;
previousStatus: "todo" | "in_progress";
latestRun: LatestIssueRun;
comment?: string;
recoveryCause?: StrandedRecoveryCause;
successfulRunHandoffEvidence?: SuccessfulRunHandoffRecoveryEvidence | null;
}) {
if (isStrandedIssueRecoveryIssue(input.issue)) {
return escalateStrandedRecoveryIssueInPlace({
issue: input.issue,
previousStatus: input.previousStatus,
latestRun: input.latestRun,
});
}
const recoveryCause = input.recoveryCause ?? "stranded_assigned_issue";
const recoveryAction = await ensureSourceScopedStrandedRecoveryAction({
issue: input.issue,
previousStatus: input.previousStatus,
latestRun: input.latestRun,
recoveryCause,
successfulRunHandoffEvidence: input.successfulRunHandoffEvidence,
});
const blockerIds = await existingUnresolvedBlockerIssueIds(input.issue.companyId, input.issue.id);
const updated = await issuesSvc.update(input.issue.id, {
status: "blocked",
blockedByIssueIds: blockerIds,
assigneeAgentId: recoveryAction.ownerAgentId ?? input.issue.assigneeAgentId,
});
if (!updated) return null;
const prefix = await getCompanyIssuePrefix(input.issue.companyId);
const recoveryOwner = recoveryAction.ownerAgentId ? await getAgent(recoveryAction.ownerAgentId) : null;
const sourceAssignee = input.issue.assigneeAgentId ? await getAgent(input.issue.assigneeAgentId) : null;
let notice: SuccessfulRunHandoffNotice | null = null;
if (input.recoveryCause === SUCCESSFUL_RUN_MISSING_STATE_REASON && input.successfulRunHandoffEvidence) {
notice = buildSuccessfulRunHandoffExhaustedNotice({
issue: input.issue,
sourceRun: input.successfulRunHandoffEvidence.sourceRunId
? { id: input.successfulRunHandoffEvidence.sourceRunId, status: "succeeded" }
: null,
correctiveRun: input.latestRun ? { id: input.latestRun.id, status: input.latestRun.status } : null,
sourceAssignee,
recoveryIssue: null,
recoveryActionId: recoveryAction.id,
recoveryOwner,
latestIssueStatus: input.issue.status,
latestHandoffRunStatus: input.latestRun?.status ?? "unknown",
missingDisposition: input.successfulRunHandoffEvidence.missingDisposition,
});
}
const recoveryLine = recoveryAction.ownerAgentId
? [
"",
`- Recovery action: \`${recoveryAction.id}\``,
`- Recovery owner: ${agentUiLink(recoveryOwner, prefix)}`,
"- Next action: the recovery owner should either restore a live execution path or record the manual resolution on the source issue.",
].join("\n")
: [
"",
`- Recovery action: \`${recoveryAction.id}\``,
"- Recovery owner: board escalation, because Paperclip could not find an invokable manager, creator, or executive owner with budget available.",
"- Next action: a board operator should assign an invokable recovery owner, fix the agent/runtime state, or record an intentional manual resolution.",
].join("\n");
if (recoveryAction.attemptCount === 1) {
const escalationCommentMarker = `Recovery action: \`${recoveryAction.id}\``;
const hasEscalationComment = await db
.select({ id: issueComments.id, body: issueComments.body, metadata: issueComments.metadata })
.from(issueComments)
.where(
and(
eq(issueComments.issueId, input.issue.id),
eq(issueComments.authorType, "system"),
),
)
.orderBy(desc(issueComments.createdAt))
.limit(50)
.then((rows) => rows.some((row) =>
(row.body ?? "").includes(escalationCommentMarker) ||
noticeMetadataReferencesRecoveryAction(row.metadata, recoveryAction.id),
));
if (!hasEscalationComment) {
if (notice) {
await issuesSvc.addComment(input.issue.id, notice.body, {}, {
authorType: "system",
presentation: notice.presentation,
metadata: notice.metadata,
});
} else {
await issuesSvc.addComment(input.issue.id, `${input.comment ?? ""}${recoveryLine}`, {}, {
authorType: "system",
});
}
}
}
await logActivity(db, {
companyId: input.issue.companyId,
actorType: "system",
actorId: "system",
agentId: null,
runId: null,
action: input.recoveryCause === SUCCESSFUL_RUN_MISSING_STATE_REASON
? "issue.successful_run_handoff_escalated"
: "issue.updated",
entityType: "issue",
entityId: input.issue.id,
details: {
identifier: input.issue.identifier,
status: "blocked",
previousStatus: input.previousStatus,
source: input.recoveryCause === SUCCESSFUL_RUN_MISSING_STATE_REASON
? "recovery.reconcile_successful_run_handoff_missing_state"
: "recovery.reconcile_stranded_assigned_issue",
recoveryCause: input.recoveryCause ?? "stranded_assigned_issue",
latestRunId: input.latestRun?.id ?? null,
latestRunStatus: input.latestRun?.status ?? null,
latestRunErrorCode: input.latestRun?.errorCode ?? null,
recoveryActionId: recoveryAction.id,
recoveryOwnerAgentId: recoveryAction.ownerAgentId,
previousOwnerAgentId: recoveryAction.previousOwnerAgentId,
returnOwnerAgentId: recoveryAction.returnOwnerAgentId,
blockerIssueIds: blockerIds,
},
});
await enqueueSourceScopedStrandedRecoveryWake({
action: recoveryAction,
issue: input.issue,
latestRun: input.latestRun,
recoveryCause,
});
if (recoveryAction.ownerAgentId && recoveryAction.ownerAgentId === input.issue.assigneeAgentId) {
const [currentIssue] = await db
.select({
status: issues.status,
assigneeAgentId: issues.assigneeAgentId,
})
.from(issues)
.where(eq(issues.id, input.issue.id))
.limit(1);
if (
currentIssue &&
(currentIssue.status !== "blocked" ||
currentIssue.assigneeAgentId !== recoveryAction.ownerAgentId)
) {
const reblocked = await issuesSvc.update(input.issue.id, {
status: "blocked",
blockedByIssueIds: blockerIds,
assigneeAgentId: recoveryAction.ownerAgentId,
});
if (reblocked) return reblocked;
}
}
return updated;
}
async function reconcileStrandedAssignedIssues() {
const candidates = await db
.select()
.from(issues)
.where(
and(
isNull(issues.assigneeUserId),
inArray(issues.status, ["todo", "in_progress"]),
sql`${issues.assigneeAgentId} is not null`,
),
);
const result = {
assignmentDispatched: 0,
dispatchRequeued: 0,
continuationRequeued: 0,
productiveContinuationObserved: 0,
successfulContinuationObserved: 0,
orphanBlockersAssigned: 0,
successfulRunHandoffEscalated: 0,
escalated: 0,
skipped: 0,
issueIds: [] as string[],
};
for (const issue of candidates) {
const agentId = issue.assigneeAgentId;
if (!agentId) {
result.skipped += 1;
continue;
}
const agent = await getAgent(agentId);
if (!agent || agent.companyId !== issue.companyId || !isAgentInvokable(agent)) {
result.skipped += 1;
continue;
}
if (await hasActiveExecutionPath(issue.companyId, issue.id)) {
result.skipped += 1;
continue;
}
if (await isAutomaticRecoverySuppressedByPauseHold(db, issue.companyId, issue.id, treeControlSvc)) {
result.skipped += 1;
continue;
}
const latestRun = await getLatestIssueRun(issue.companyId, issue.id);
if (isStrandedIssueRecoveryIssue(issue) && isUnsuccessfulTerminalIssueRun(latestRun)) {
const updated = await escalateStrandedRecoveryIssueInPlace({
issue,
previousStatus: issue.status as "todo" | "in_progress",
latestRun,
});
if (updated) {
result.escalated += 1;
result.issueIds.push(issue.id);
} else {
result.skipped += 1;
}
continue;
}
if (issue.status === "todo") {
if (!latestRun) {
if (await hasQueuedIssueWake(issue.companyId, issue.id)) {
result.skipped += 1;
continue;
}
if (await isInvocationBudgetBlocked(issue, agentId)) {
result.skipped += 1;
continue;
}
const queued = await enqueueInitialAssignedTodoDispatch(issue, agentId);
if (queued) {
result.assignmentDispatched += 1;
result.issueIds.push(issue.id);
} else {
result.skipped += 1;
}
continue;
}
if (latestRun.status === "succeeded") {
result.skipped += 1;
continue;
}
if (didAutomaticRecoveryFail(latestRun, "assignment_recovery")) {
const failureSummary = summarizeRunFailureForIssueComment(latestRun);
const updated = await escalateStrandedAssignedIssue({
issue,
previousStatus: "todo",
latestRun,
comment:
"Paperclip automatically retried dispatch for this assigned `todo` issue after a lost wake/run, " +
`but it still has no live execution path.${failureSummary ?? ""} ` +
"Moving it to `blocked` so it is visible for intervention.",
});
if (updated) {
result.escalated += 1;
result.issueIds.push(issue.id);
} else {
result.skipped += 1;
}
continue;
}
if (await isInvocationBudgetBlocked(issue, agentId)) {
result.skipped += 1;
continue;
}
const queued = await enqueueStrandedIssueRecovery({
issueId: issue.id,
agentId,
reason: "issue_assignment_recovery",
retryReason: "assignment_recovery",
source: "issue.assignment_recovery",
retryOfRunId: latestRun.id,
});
if (queued) {
result.dispatchRequeued += 1;
result.issueIds.push(issue.id);
} else {
result.skipped += 1;
}
continue;
}
if (!latestRun && !issue.checkoutRunId && !issue.executionRunId) {
result.skipped += 1;
continue;
}
const handoffEvidence = isExhaustedSuccessfulRunHandoff(latestRun);
if (handoffEvidence) {
if (!handoffEvidence.exhausted) {
result.skipped += 1;
continue;
}
const updated = await escalateStrandedAssignedIssue({
issue,
previousStatus: "in_progress",
latestRun,
recoveryCause: SUCCESSFUL_RUN_MISSING_STATE_REASON,
successfulRunHandoffEvidence: handoffEvidence,
});
if (updated) {
result.successfulRunHandoffEscalated += 1;
result.issueIds.push(issue.id);
} else {
result.skipped += 1;
}
continue;
}
if (isSuccessfulInProgressContinuationRun(latestRun)) {
const successfulRun = latestRun;
if (!isProductiveContinuationRun(successfulRun)) {
result.successfulContinuationObserved += 1;
result.skipped += 1;
continue;
}
if (isRepeatedProductiveContinuationRecovery(successfulRun)) {
const updated = await escalateStrandedAssignedIssue({
issue,
previousStatus: "in_progress",
latestRun: successfulRun,
comment:
"Paperclip automatically retried continuation for this assigned `in_progress` issue and the retry " +
"made progress, but it still has no live execution path. Moving it to `blocked` so it is visible for intervention.",
});
if (updated) {
result.escalated += 1;
result.issueIds.push(issue.id);
} else {
result.skipped += 1;
}
continue;
}
if (await isInvocationBudgetBlocked(issue, agentId)) {
result.skipped += 1;
continue;
}
const queued = await enqueueStrandedIssueRecovery({
issueId: issue.id,
agentId,
reason: "issue_continuation_needed",
retryReason: "issue_continuation_needed",
source: "issue.productive_terminal_continuation_recovery",
retryOfRunId: successfulRun.id,
});
if (queued) {
result.continuationRequeued += 1;
result.issueIds.push(issue.id);
} else {
result.skipped += 1;
}
continue;
}
if (didAutomaticRecoveryFail(latestRun, "issue_continuation_needed")) {
const failureSummary = summarizeRunFailureForIssueComment(latestRun);
const updated = await escalateStrandedAssignedIssue({
issue,
previousStatus: "in_progress",
latestRun,
comment:
"Paperclip automatically retried continuation for this assigned `in_progress` issue after its live " +
`execution disappeared, but it still has no live execution path.${failureSummary ?? ""} ` +
"Moving it to `blocked` so it is visible for intervention.",
});
if (updated) {
result.escalated += 1;
result.issueIds.push(issue.id);
} else {
result.skipped += 1;
}
continue;
}
if (await isInvocationBudgetBlocked(issue, agentId)) {
result.skipped += 1;
continue;
}
const queued = await enqueueStrandedIssueRecovery({
issueId: issue.id,
agentId,
reason: "issue_continuation_needed",
retryReason: "issue_continuation_needed",
source: "issue.continuation_recovery",
retryOfRunId: latestRun?.id ?? issue.checkoutRunId ?? null,
});
if (queued) {
result.continuationRequeued += 1;
result.issueIds.push(issue.id);
} else {
result.skipped += 1;
}
}
const orphanBlockerRecovery = await reconcileUnassignedBlockingIssues();
result.orphanBlockersAssigned = orphanBlockerRecovery.assigned;
result.skipped += orphanBlockerRecovery.skipped;
result.issueIds.push(...orphanBlockerRecovery.issueIds);
return result;
}
async function collectIssueGraphLivenessFindings() {
const issueRowsPromise = Promise.resolve(db
.select({
id: issues.id,
companyId: issues.companyId,
identifier: issues.identifier,
title: issues.title,
status: issues.status,
projectId: issues.projectId,
goalId: issues.goalId,
parentId: issues.parentId,
assigneeAgentId: issues.assigneeAgentId,
assigneeUserId: issues.assigneeUserId,
createdByAgentId: issues.createdByAgentId,
createdByUserId: issues.createdByUserId,
executionPolicy: issues.executionPolicy,
executionState: issues.executionState,
monitorNextCheckAt: issues.monitorNextCheckAt,
monitorAttemptCount: issues.monitorAttemptCount,
})
.from(issues)
.where(
and(
isNull(issues.hiddenAt),
notInArray(issues.originKind, [RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation]),
),
));
const [
issueRows,
relationRows,
agentRows,
activeRunRows,
activeIssueRunRows,
wakeRows,
interactionRows,
approvalRows,
recoveryIssueRows,
recoveryActionRows,
] = await Promise.all([
issueRowsPromise,
db
.select({
companyId: issueRelations.companyId,
blockerIssueId: issueRelations.issueId,
blockedIssueId: issueRelations.relatedIssueId,
})
.from(issueRelations)
.where(eq(issueRelations.type, "blocks")),
db
.select({
id: agents.id,
companyId: agents.companyId,
name: agents.name,
role: agents.role,
title: agents.title,
status: agents.status,
reportsTo: agents.reportsTo,
})
.from(agents),
db
.select({
companyId: heartbeatRuns.companyId,
agentId: heartbeatRuns.agentId,
status: heartbeatRuns.status,
contextSnapshot: heartbeatRuns.contextSnapshot,
})
.from(heartbeatRuns)
.where(inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES])),
db
.select({
companyId: issues.companyId,
agentId: heartbeatRuns.agentId,
status: heartbeatRuns.status,
issueId: issues.id,
})
.from(issues)
.innerJoin(heartbeatRuns, eq(issues.executionRunId, heartbeatRuns.id))
.where(
and(
isNull(issues.hiddenAt),
notInArray(issues.originKind, [RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation]),
inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES]),
),
),
db
.select({
companyId: agentWakeupRequests.companyId,
agentId: agentWakeupRequests.agentId,
status: agentWakeupRequests.status,
payload: agentWakeupRequests.payload,
})
.from(agentWakeupRequests)
.where(inArray(agentWakeupRequests.status, ["queued", "deferred_issue_execution"])),
db
.select({
companyId: issueThreadInteractions.companyId,
issueId: issueThreadInteractions.issueId,
status: issueThreadInteractions.status,
})
.from(issueThreadInteractions)
.where(eq(issueThreadInteractions.status, "pending")),
db
.select({
companyId: issueApprovals.companyId,
issueId: issueApprovals.issueId,
status: approvals.status,
})
.from(issueApprovals)
.innerJoin(approvals, eq(issueApprovals.approvalId, approvals.id))
.where(inArray(approvals.status, ["pending", "revision_requested"])),
db
.select({
companyId: issues.companyId,
id: issues.id,
status: issues.status,
originKind: issues.originKind,
originId: issues.originId,
})
.from(issues)
.where(
and(
isNull(issues.hiddenAt),
inArray(issues.originKind, [
STRANDED_ISSUE_RECOVERY_ORIGIN_KIND,
RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation,
]),
notInArray(issues.status, ["done", "cancelled"]),
),
),
issueRowsPromise.then((rows) => {
const issueIdsUnderAnalysis = rows.map((row) => row.id);
return issueIdsUnderAnalysis.length === 0
? []
: db
.select({
companyId: issueRecoveryActions.companyId,
issueId: issueRecoveryActions.sourceIssueId,
status: issueRecoveryActions.status,
})
.from(issueRecoveryActions)
.where(
and(
inArray(issueRecoveryActions.status, ["active", "escalated"]),
inArray(issueRecoveryActions.sourceIssueId, issueIdsUnderAnalysis),
),
);
}),
]);
const openRecoveryIssues = recoveryIssueRows.flatMap((row) => {
if (row.originKind === RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation) {
const parsed = parseIssueGraphLivenessIncidentKey(row.originId);
if (!parsed || parsed.companyId !== row.companyId) return [];
return [
{
companyId: row.companyId,
issueId: parsed.issueId,
status: row.status,
},
{
companyId: row.companyId,
issueId: parsed.leafIssueId,
status: row.status,
},
];
}
const issueId = readNonEmptyString(row.originId);
if (!issueId) return [];
return [{
companyId: row.companyId,
issueId,
status: row.status,
}];
});
return classifyIssueGraphLiveness({
issues: issueRows,
relations: relationRows,
agents: agentRows,
activeRuns: activeRunRows.map((row) => ({
companyId: row.companyId,
agentId: row.agentId,
status: row.status,
issueId: issueIdFromRunContext(row.contextSnapshot),
})).concat(activeIssueRunRows.map((row) => ({
companyId: row.companyId,
agentId: row.agentId,
status: row.status,
issueId: row.issueId,
}))),
queuedWakeRequests: wakeRows.map((row) => ({
companyId: row.companyId,
agentId: row.agentId,
status: row.status,
issueId: issueIdFromWakePayload(row.payload),
})),
pendingInteractions: interactionRows,
pendingApprovals: approvalRows,
openRecoveryIssues: openRecoveryIssues.concat(recoveryActionRows),
now: new Date(),
});
}
async function findOpenLivenessEscalation(companyId: string, incidentKey: string) {
return db
.select()
.from(issues)
.where(
and(
eq(issues.companyId, companyId),
eq(issues.originKind, RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation),
eq(issues.originId, incidentKey),
isNull(issues.hiddenAt),
notInArray(issues.status, ["done", "cancelled"]),
),
)
.limit(1)
.then((rows) => rows[0] ?? null);
}
async function findOpenLivenessRecoveryIssueForLeaf(finding: IssueLivenessFinding) {
const byFingerprint = await db
.select()
.from(issues)
.where(
and(
eq(issues.companyId, finding.companyId),
eq(issues.originKind, RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation),
eq(issues.originFingerprint, livenessRecoveryLeafFingerprint(finding)),
isNull(issues.hiddenAt),
notInArray(issues.status, ["done", "cancelled"]),
),
)
.limit(1)
.then((rows) => rows[0] ?? null);
if (byFingerprint) return byFingerprint;
const leafIssueId = livenessRecoveryLeafIssueId(finding);
const openRecoveries = await db
.select()
.from(issues)
.where(
and(
eq(issues.companyId, finding.companyId),
eq(issues.originKind, RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation),
isNull(issues.hiddenAt),
notInArray(issues.status, ["done", "cancelled"]),
),
);
return openRecoveries.find((row) => {
const parsed = parseLivenessIncidentKey(row.originId);
return parsed?.state === finding.state && parsed.leafIssueId === leafIssueId;
}) ?? null;
}
async function removeRecoveryBlockerFromSource(recovery: typeof issues.$inferSelect) {
const parsed = parseLivenessIncidentKey(recovery.originId);
if (!parsed) return false;
const sourceIssue = await db
.select()
.from(issues)
.where(and(eq(issues.companyId, recovery.companyId), eq(issues.id, parsed.issueId)))
.then((rows) => rows[0] ?? null);
if (!sourceIssue) return false;
const blockerIds = await existingBlockerIssueIds(sourceIssue.companyId, sourceIssue.id);
if (!blockerIds.includes(recovery.id)) return false;
await issuesSvc.update(sourceIssue.id, {
blockedByIssueIds: blockerIds.filter((blockerId) => blockerId !== recovery.id),
});
return true;
}
async function hasActiveRunForIssueId(companyId: string, issueId: string) {
const [contextRun, issueRun] = await Promise.all([
db
.select({ id: heartbeatRuns.id })
.from(heartbeatRuns)
.where(
and(
eq(heartbeatRuns.companyId, companyId),
inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES]),
sql`(${heartbeatRuns.contextSnapshot}->>'issueId' = ${issueId}
OR ${heartbeatRuns.contextSnapshot}->>'taskId' = ${issueId})`,
),
)
.limit(1)
.then((rows) => rows[0] ?? null),
db
.select({ id: heartbeatRuns.id })
.from(issues)
.innerJoin(heartbeatRuns, eq(issues.executionRunId, heartbeatRuns.id))
.where(
and(
eq(issues.companyId, companyId),
eq(issues.id, issueId),
inArray(heartbeatRuns.status, [...EXECUTION_PATH_HEARTBEAT_RUN_STATUSES]),
),
)
.limit(1)
.then((rows) => rows[0] ?? null),
]);
return Boolean(contextRun || issueRun);
}
async function retireObsoleteLivenessRecoveryIssues(findings: IssueLivenessFinding[]) {
const currentIncidentKeys = new Set(findings.map((finding) => finding.incidentKey));
const currentLeafKeys = new Set(
findings.map((finding) =>
livenessRecoveryLeafKey(
finding.companyId,
finding.state,
livenessRecoveryLeafIssueId(finding),
),
),
);
const openRecoveries = await db
.select()
.from(issues)
.where(
and(
eq(issues.originKind, RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation),
isNull(issues.hiddenAt),
notInArray(issues.status, ["done", "cancelled"]),
),
);
const result = {
retired: 0,
activeSkipped: 0,
blockerRelationsRemoved: 0,
retiredIssueIds: [] as string[],
};
for (const recovery of openRecoveries) {
if (recovery.originId && currentIncidentKeys.has(recovery.originId)) continue;
const parsed = parseLivenessIncidentKey(recovery.originId);
if (!parsed) continue;
if (
currentLeafKeys.has(
livenessRecoveryLeafKey(parsed.companyId, parsed.state, parsed.leafIssueId),
)
) {
continue;
}
const sourceIssue = await db
.select({
id: issues.id,
status: issues.status,
})
.from(issues)
.where(and(eq(issues.companyId, parsed.companyId), eq(issues.id, parsed.issueId)))
.then((rows) => rows[0] ?? null);
if (sourceIssue && !["done", "cancelled"].includes(sourceIssue.status)) {
const blockerIds = await existingBlockerIssueIds(parsed.companyId, sourceIssue.id);
if (blockerIds.includes(recovery.id)) {
result.activeSkipped += 1;
continue;
}
}
if (await removeRecoveryBlockerFromSource(recovery)) {
result.blockerRelationsRemoved += 1;
}
if (await hasActiveRunForIssueId(recovery.companyId, recovery.id)) {
result.activeSkipped += 1;
continue;
}
await issuesSvc.update(recovery.id, { status: "cancelled" });
result.retired += 1;
result.retiredIssueIds.push(recovery.id);
}
return result;
}
async function retireDoneLivenessRecoveryBlockers() {
const closedRecoveries = await db
.select()
.from(issues)
.where(
and(
eq(issues.originKind, RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation),
isNull(issues.hiddenAt),
inArray(issues.status, ["done", "cancelled"]),
),
);
let blockerRelationsRemoved = 0;
for (const recovery of closedRecoveries) {
if (await removeRecoveryBlockerFromSource(recovery)) {
blockerRelationsRemoved += 1;
}
}
return { blockerRelationsRemoved };
}
function normalizeIssueGraphLivenessAutoRecoveryLookbackHours(raw: unknown) {
const numeric = Math.floor(asNumber(raw, DEFAULT_ISSUE_GRAPH_LIVENESS_AUTO_RECOVERY_LOOKBACK_HOURS));
return Math.min(
MAX_ISSUE_GRAPH_LIVENESS_AUTO_RECOVERY_LOOKBACK_HOURS,
Math.max(MIN_ISSUE_GRAPH_LIVENESS_AUTO_RECOVERY_LOOKBACK_HOURS, numeric),
);
}
function livenessDependencyIssueKey(companyId: string, issueId: string) {
return `${companyId}:${issueId}`;
}
async function loadLivenessDependencyUpdatedAtByIssue(findings: IssueLivenessFinding[]) {
const issueIds = [
...new Set(
findings.flatMap((finding) => finding.dependencyPath.map((entry) => entry.issueId)),
),
];
if (issueIds.length === 0) return new Map<string, Date>();
const rows = await db
.select({ id: issues.id, companyId: issues.companyId, updatedAt: issues.updatedAt })
.from(issues)
.where(inArray(issues.id, issueIds));
return new Map(rows.map((row) => [
livenessDependencyIssueKey(row.companyId, row.id),
row.updatedAt,
]));
}
function latestDependencyUpdatedAtForLivenessFinding(
finding: IssueLivenessFinding,
updatedAtByIssueKey: Map<string, Date>,
) {
const dependencyIssueIds = [...new Set(finding.dependencyPath.map((entry) => entry.issueId))];
if (dependencyIssueIds.length === 0) return null;
const timestamps = dependencyIssueIds.map((issueId) =>
updatedAtByIssueKey.get(livenessDependencyIssueKey(finding.companyId, issueId)) ?? null
);
if (timestamps.some((timestamp) => !timestamp)) return null;
const [firstTimestamp, ...remainingTimestamps] = timestamps as Date[];
return remainingTimestamps.reduce((latest, updatedAt) =>
updatedAt > latest ? updatedAt : latest,
firstTimestamp!);
}
function isLivenessFindingInsideAutoRecoveryLookback(
finding: IssueLivenessFinding,
cutoff: Date,
updatedAtByIssueKey: Map<string, Date>,
) {
const latestUpdatedAt = latestDependencyUpdatedAtForLivenessFinding(finding, updatedAtByIssueKey);
return Boolean(latestUpdatedAt && latestUpdatedAt >= cutoff);
}
async function buildIssueGraphLivenessAutoRecoveryPreview(
opts?: { lookbackHours?: number; now?: Date },
): Promise<IssueGraphLivenessAutoRecoveryPreview> {
const now = opts?.now ?? new Date();
const lookbackHours = normalizeIssueGraphLivenessAutoRecoveryLookbackHours(opts?.lookbackHours);
const cutoff = new Date(now.getTime() - lookbackHours * 60 * 60 * 1000);
const findings = await collectIssueGraphLivenessFindings();
const updatedAtByIssueKey = await loadLivenessDependencyUpdatedAtByIssue(findings);
const issueIds = [...new Set(findings.map((finding) => finding.recoveryIssueId))];
const recoveryRows = issueIds.length > 0
? await db
.select({ id: issues.id, identifier: issues.identifier, title: issues.title })
.from(issues)
.where(inArray(issues.id, issueIds))
: [];
const recoveryById = new Map(recoveryRows.map((row) => [row.id, row]));
const items: IssueGraphLivenessAutoRecoveryPreviewItem[] = [];
let skippedOutsideLookback = 0;
for (const finding of findings) {
const latestDependencyUpdatedAt = latestDependencyUpdatedAtForLivenessFinding(
finding,
updatedAtByIssueKey,
);
if (!latestDependencyUpdatedAt || latestDependencyUpdatedAt < cutoff) {
skippedOutsideLookback += 1;
continue;
}
const recoveryIssue = recoveryById.get(finding.recoveryIssueId);
items.push({
issueId: finding.issueId,
identifier: finding.identifier,
title: finding.dependencyPath[0]?.title ?? finding.identifier ?? finding.issueId,
state: finding.state,
severity: finding.severity,
reason: finding.reason,
recoveryIssueId: finding.recoveryIssueId,
recoveryIdentifier: recoveryIssue?.identifier ?? null,
recoveryTitle: recoveryIssue?.title ?? null,
recommendedOwnerAgentId: finding.recommendedOwnerAgentId,
incidentKey: finding.incidentKey,
latestDependencyUpdatedAt: latestDependencyUpdatedAt.toISOString(),
dependencyPath: finding.dependencyPath,
});
}
return {
lookbackHours,
cutoff: cutoff.toISOString(),
generatedAt: now.toISOString(),
findings: findings.length,
recoverableFindings: items.length,
skippedOutsideLookback,
items,
};
}
async function resolveEscalationOwnerAgentId(
finding: IssueLivenessFinding,
issue: typeof issues.$inferSelect,
) {
const detailedCandidates = finding.recommendedOwnerCandidates.length > 0
? finding.recommendedOwnerCandidates
: finding.recommendedOwnerCandidateAgentIds.map((agentId) => ({
agentId,
reason: "ordered_invokable_fallback" as const,
sourceIssueId: finding.recoveryIssueId,
}));
const seenCandidates = new Set<string>();
const candidates = detailedCandidates.filter((candidate) => {
if (seenCandidates.has(candidate.agentId)) return false;
seenCandidates.add(candidate.agentId);
return true;
});
const budgetBlockedCandidateAgentIds: string[] = [];
for (const candidate of candidates) {
const budgetBlock = await budgets.getInvocationBlock(issue.companyId, candidate.agentId, {
issueId: issue.id,
projectId: issue.projectId,
});
if (!budgetBlock) {
return {
agentId: candidate.agentId,
reason: candidate.reason,
sourceIssueId: candidate.sourceIssueId,
candidateAgentIds: candidates.map((entry) => entry.agentId),
candidateReasons: candidates.map((entry) => ({
agentId: entry.agentId,
reason: entry.reason,
sourceIssueId: entry.sourceIssueId,
})),
budgetBlockedCandidateAgentIds,
};
}
budgetBlockedCandidateAgentIds.push(candidate.agentId);
}
return null;
}
function shouldReuseRecoveryExecutionWorkspace(input: {
finding: IssueLivenessFinding;
recoveryIssue: typeof issues.$inferSelect;
ownerAgentId: string;
}) {
if (input.finding.recoveryIssueId === input.finding.issueId) return false;
return input.recoveryIssue.assigneeAgentId === input.ownerAgentId;
}
async function ensureIssueBlockedByEscalation(input: {
issue: typeof issues.$inferSelect;
escalationIssueId: string;
finding: IssueLivenessFinding;
runId?: string | null;
}) {
const blockerIds = await existingBlockerIssueIds(input.issue.companyId, input.issue.id);
const nextBlockerIds = [...new Set([...blockerIds, input.escalationIssueId])];
const isAlreadyBlockedByEscalation = blockerIds.includes(input.escalationIssueId);
const isAlreadyBlocked = input.issue.status === "blocked";
if (isAlreadyBlockedByEscalation && isAlreadyBlocked) {
return input.issue;
}
const update: Partial<typeof issues.$inferInsert> & { blockedByIssueIds: string[] } = {
blockedByIssueIds: nextBlockerIds,
};
if (!isAlreadyBlocked) {
update.status = "blocked";
}
const updated = await issuesSvc.update(input.issue.id, update);
if (!updated) return null;
await logActivity(db, {
companyId: input.issue.companyId,
actorType: "system",
actorId: "system",
agentId: null,
runId: input.runId ?? null,
action: "issue.blockers.updated",
entityType: "issue",
entityId: input.issue.id,
details: {
source: "recovery.reconcile_issue_graph_liveness",
incidentKey: input.finding.incidentKey,
findingState: input.finding.state,
blockerIssueIds: nextBlockerIds,
escalationIssueId: input.escalationIssueId,
status: update.status ?? input.issue.status,
previousStatus: input.issue.status,
},
});
return updated;
}
async function createIssueGraphLivenessEscalation(input: {
finding: IssueLivenessFinding;
runId?: string | null;
}) {
const issue = await db
.select()
.from(issues)
.where(eq(issues.id, input.finding.issueId))
.then((rows) => rows[0] ?? null);
if (!issue || issue.companyId !== input.finding.companyId) return { kind: "skipped" as const };
if (await isAutomaticRecoverySuppressedByPauseHold(db, issue.companyId, issue.id, treeControlSvc)) {
return { kind: "skipped" as const };
}
const recoveryIssue = await db
.select()
.from(issues)
.where(and(eq(issues.id, input.finding.recoveryIssueId), eq(issues.companyId, issue.companyId)))
.then((rows) => rows[0] ?? null);
if (!recoveryIssue) return { kind: "skipped" as const };
const existing =
await findOpenLivenessEscalation(issue.companyId, input.finding.incidentKey) ??
await findOpenLivenessRecoveryIssueForLeaf(input.finding);
if (existing) {
await ensureIssueBlockedByEscalation({
issue,
escalationIssueId: existing.id,
finding: input.finding,
runId: input.runId ?? null,
});
return { kind: "existing" as const, escalationIssueId: existing.id };
}
const ownerSelection = await resolveEscalationOwnerAgentId(input.finding, recoveryIssue);
if (!ownerSelection) return { kind: "skipped" as const };
const reuseRecoveryExecutionWorkspace = shouldReuseRecoveryExecutionWorkspace({
finding: input.finding,
recoveryIssue,
ownerAgentId: ownerSelection.agentId,
});
let escalation: Awaited<ReturnType<typeof issuesSvc.create>>;
try {
escalation = await issuesSvc.create(issue.companyId, {
title: `Unblock liveness incident for ${recoveryIssue.identifier ?? recoveryIssue.title}`,
description: buildLivenessEscalationDescription(input.finding),
status: "todo",
priority: "high",
parentId: recoveryIssue.id,
projectId: recoveryIssue.projectId,
goalId: recoveryIssue.goalId,
assigneeAgentId: ownerSelection.agentId,
assigneeAdapterOverrides: recoveryAssigneeAdapterOverrides("status_only"),
originKind: RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation,
originId: input.finding.incidentKey,
originFingerprint: livenessRecoveryLeafFingerprint(input.finding),
billingCode: recoveryIssue.billingCode,
...(reuseRecoveryExecutionWorkspace
? { inheritExecutionWorkspaceFromIssueId: recoveryIssue.id }
: {
executionWorkspaceId: null,
executionWorkspacePreference: null,
executionWorkspaceSettings: null,
}),
});
} catch (error) {
if (!isUniqueLivenessRecoveryConflict(error)) throw error;
const raced =
await findOpenLivenessEscalation(issue.companyId, input.finding.incidentKey) ??
await findOpenLivenessRecoveryIssueForLeaf(input.finding);
if (!raced) throw error;
await ensureIssueBlockedByEscalation({
issue,
escalationIssueId: raced.id,
finding: input.finding,
runId: input.runId ?? null,
});
return { kind: "existing" as const, escalationIssueId: raced.id };
}
await ensureIssueBlockedByEscalation({
issue,
escalationIssueId: escalation.id,
finding: input.finding,
runId: input.runId ?? null,
});
await issuesSvc.addComment(
issue.id,
buildLivenessOriginalIssueComment(input.finding, escalation),
{ runId: input.runId ?? null },
);
await logActivity(db, {
companyId: issue.companyId,
actorType: "system",
actorId: "system",
agentId: ownerSelection.agentId,
runId: input.runId ?? null,
action: "issue.harness_liveness_escalation_created",
entityType: "issue",
entityId: escalation.id,
details: {
source: "recovery.reconcile_issue_graph_liveness",
incidentKey: input.finding.incidentKey,
findingState: input.finding.state,
sourceIssueId: issue.id,
sourceIdentifier: issue.identifier,
recoveryIssueId: recoveryIssue.id,
recoveryIdentifier: recoveryIssue.identifier,
escalationIssueId: escalation.id,
escalationIdentifier: escalation.identifier,
dependencyPath: input.finding.dependencyPath,
ownerSelection: {
selectedAgentId: ownerSelection.agentId,
selectedReason: ownerSelection.reason,
selectedSourceIssueId: ownerSelection.sourceIssueId,
candidateAgentIds: ownerSelection.candidateAgentIds,
candidateReasons: ownerSelection.candidateReasons,
budgetBlockedCandidateAgentIds: ownerSelection.budgetBlockedCandidateAgentIds,
},
workspaceSelection: {
reuseRecoveryExecutionWorkspace,
inheritedExecutionWorkspaceFromIssueId: reuseRecoveryExecutionWorkspace ? recoveryIssue.id : null,
projectWorkspaceSourceIssueId: recoveryIssue.id,
},
},
});
const wake = await deps.enqueueWakeup(ownerSelection.agentId, {
source: "assignment",
triggerDetail: "system",
reason: "issue_assigned",
payload: withRecoveryModelProfileHint({
issueId: escalation.id,
sourceIssueId: issue.id,
recoveryIssueId: recoveryIssue.id,
incidentKey: input.finding.incidentKey,
}, "status_only"),
requestedByActorType: "system",
requestedByActorId: null,
contextSnapshot: withRecoveryModelProfileHint({
issueId: escalation.id,
taskId: escalation.id,
wakeReason: "issue_assigned",
source: RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation,
sourceIssueId: issue.id,
recoveryIssueId: recoveryIssue.id,
incidentKey: input.finding.incidentKey,
}, "status_only"),
});
logger.warn({
incidentKey: input.finding.incidentKey,
findingState: input.finding.state,
sourceIssueId: issue.id,
recoveryIssueId: recoveryIssue.id,
escalationIssueId: escalation.id,
ownerAgentId: ownerSelection.agentId,
ownerSelectionReason: ownerSelection.reason,
wakeupRunId: wake?.id ?? null,
}, "created issue graph liveness escalation");
return { kind: "created" as const, escalationIssueId: escalation.id };
}
async function reconcileIssueGraphLiveness(opts?: {
runId?: string | null;
force?: boolean;
lookbackHours?: number;
}) {
const findings = await collectIssueGraphLivenessFindings();
const experimentalSettings = await instanceSettings.getExperimental();
const autoRecoveryEnabled = asBoolean(
experimentalSettings.enableIssueGraphLivenessAutoRecovery,
true,
) || opts?.force === true;
const lookbackHours = normalizeIssueGraphLivenessAutoRecoveryLookbackHours(
opts?.lookbackHours ?? experimentalSettings.issueGraphLivenessAutoRecoveryLookbackHours,
);
const now = new Date();
const cutoff = new Date(now.getTime() - lookbackHours * 60 * 60 * 1000);
const obsoleteRecoveryCleanup = await retireObsoleteLivenessRecoveryIssues(findings);
const doneRecoveryBlockerCleanup = await retireDoneLivenessRecoveryBlockers();
const updatedAtByIssueKey = await loadLivenessDependencyUpdatedAtByIssue(findings);
const result = {
findings: findings.length,
autoRecoveryEnabled,
lookbackHours,
cutoff: cutoff.toISOString(),
escalationsCreated: 0,
existingEscalations: 0,
skipped: 0,
skippedAutoRecoveryDisabled: 0,
skippedOutsideLookback: 0,
obsoleteRecoveriesRetired: obsoleteRecoveryCleanup.retired,
obsoleteRecoveriesActiveSkipped: obsoleteRecoveryCleanup.activeSkipped,
obsoleteRecoveryBlockerRelationsRemoved: obsoleteRecoveryCleanup.blockerRelationsRemoved,
doneRecoveryBlockerRelationsRemoved: doneRecoveryBlockerCleanup.blockerRelationsRemoved,
issueIds: [] as string[],
escalationIssueIds: [] as string[],
retiredRecoveryIssueIds: obsoleteRecoveryCleanup.retiredIssueIds,
};
if (!autoRecoveryEnabled) {
result.skippedAutoRecoveryDisabled = findings.length;
return result;
}
for (const finding of findings) {
if (!isLivenessFindingInsideAutoRecoveryLookback(finding, cutoff, updatedAtByIssueKey)) {
result.skippedOutsideLookback += 1;
result.skipped += 1;
continue;
}
const escalation = await createIssueGraphLivenessEscalation({
finding,
runId: opts?.runId ?? null,
});
if (escalation.kind === "created") {
result.escalationsCreated += 1;
result.issueIds.push(finding.issueId);
result.escalationIssueIds.push(escalation.escalationIssueId);
} else if (escalation.kind === "existing") {
result.existingEscalations += 1;
result.issueIds.push(finding.issueId);
result.escalationIssueIds.push(escalation.escalationIssueId);
} else {
result.skipped += 1;
}
}
return result;
}
function readRecoveryTimerIntervalMs(raw: unknown, fallback: number) {
return Math.max(1, Math.floor(asNumber(raw, fallback)));
}
return {
buildRunOutputSilence,
escalateStrandedRecoveryIssueInPlace,
escalateStrandedAssignedIssue,
recordWatchdogDecision,
scanSilentActiveRuns,
reconcileStrandedAssignedIssues,
buildIssueGraphLivenessAutoRecoveryPreview,
reconcileIssueGraphLiveness,
readRecoveryTimerIntervalMs,
};
}