Files
paperclip/server/src/services/issue-recovery-actions.ts
T
Dotta 0808b388ee [codex] Add source-scoped recovery actions (#5599)
## Thinking Path

> - Paperclip is a control plane for autonomous AI companies, where work
must end with a clear disposition rather than ambiguous agent liveness.
> - Recovery currently detects stalled or missing-next-step issues, but
source issue recovery can become split across child recovery issues,
blockers, and comments.
> - That makes it harder for operators and agents to see who owns
recovery and what exact action is needed on the original issue.
> - Source-scoped recovery actions give the original issue a first-class
active recovery state with owner, evidence, wake policy, and resolution
outcome.
> - This pull request adds the recovery-action data model, backend
reconciliation and resolution APIs, and board UI indicators/actions.
> - The benefit is clearer stalled-work recovery without losing source
issue context or relying on comments as the liveness path.

## What Changed

- Added the `issue_recovery_actions` schema, shared
types/constants/validators, and an idempotent
`0084_issue_recovery_actions` migration ordered after current `master`
migrations.
- Updated stranded/missing-disposition recovery to create source-scoped
recovery actions, wake the recovery owner on the source issue, and avoid
locking the source issue for recovery-action wakes.
- Added API support for reading active recovery actions on issue
detail/list surfaces and resolving them with restored, blocked,
cancelled, or false-positive outcomes.
- Require blocked recovery resolutions to have an unresolved first-class
blocker, and removed the UI shortcut that could mark recovery blocked
without a blocker selection path.
- Surfaced recovery indicators/actions in the issue UI, blocker notices,
active run panels, issue rows, and Storybook coverage.
- Updated docs and focused tests for recovery semantics, ownership,
races, stale comments, and UI behavior.

## Verification

- `pnpm exec vitest run
server/src/__tests__/issue-recovery-actions.test.ts
server/src/__tests__/heartbeat-process-recovery.test.ts
ui/src/components/IssueRecoveryActionCard.test.tsx
ui/src/components/IssueBlockedNotice.test.tsx ui/src/api/issues.test.ts`
— 5 files, 72 tests passed.
- `pnpm --filter @paperclipai/shared typecheck` — passed.
- `pnpm --filter @paperclipai/db typecheck` — passed, including
migration numbering check.
- `pnpm --filter @paperclipai/server typecheck` — passed.
- `pnpm --filter @paperclipai/ui typecheck` — passed.
- Follow-up verification after blocker-resolution guard: `pnpm exec
vitest run server/src/__tests__/issue-recovery-actions.test.ts
ui/src/components/IssueRecoveryActionCard.test.tsx
ui/src/api/issues.test.ts` — 3 files, 27 tests passed.
- Follow-up `pnpm --filter @paperclipai/server typecheck` — passed.
- Follow-up `pnpm --filter @paperclipai/ui typecheck` — passed.
- UI states are available in
`ui/storybook/stories/source-issue-recovery.stories.tsx`; screenshot
capture helper is `scripts/screenshot-recovery-card.cjs`.

## Risks

- Medium: recovery behavior changes from child recovery issue ownership
toward source-scoped actions, so operators may see stalled-work state in
new places.
- Migration risk is mitigated by using the next migration slot after
`master` and making the table/constraints/index creation idempotent for
anyone who previously applied the old branch-local
`0082_dizzy_master_mold` migration.
- Existing child recovery issue paths are still guarded for
already-created recovery issues, but new source-scoped flows should be
watched in CI and Greptile review.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex, GPT-5 coding agent, tool use enabled for shell, Git,
GitHub, and local test execution. Context window not exposed by the
runtime.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-05-12 09:37:15 -05:00

296 lines
9.9 KiB
TypeScript

import { and, desc, eq, inArray } from "drizzle-orm";
import type { Db } from "@paperclipai/db";
import { issueRecoveryActions } from "@paperclipai/db";
import type {
IssueRecoveryAction,
IssueRecoveryActionKind,
IssueRecoveryActionOwnerType,
IssueRecoveryActionOutcome,
IssueRecoveryActionStatus,
} from "@paperclipai/shared";
const ACTIVE_RECOVERY_ACTION_STATUSES = ["active", "escalated"] as const satisfies readonly IssueRecoveryActionStatus[];
const MAX_UPSERT_RETRIES = 3;
type IssueRecoveryActionRow = typeof issueRecoveryActions.$inferSelect;
type DbTransaction = Parameters<Parameters<Db["transaction"]>[0]>[0];
type DbOrTransaction = Db | DbTransaction;
export type UpsertIssueRecoveryActionInput = {
companyId: string;
sourceIssueId: string;
recoveryIssueId?: string | null;
kind: IssueRecoveryActionKind;
ownerType?: IssueRecoveryActionOwnerType;
ownerAgentId?: string | null;
ownerUserId?: string | null;
previousOwnerAgentId?: string | null;
returnOwnerAgentId?: string | null;
cause: string;
fingerprint: string;
evidence?: Record<string, unknown>;
nextAction: string;
wakePolicy?: Record<string, unknown> | null;
monitorPolicy?: Record<string, unknown> | null;
maxAttempts?: number | null;
timeoutAt?: Date | null;
lastAttemptAt?: Date | null;
};
export type ResolveIssueRecoveryActionInput = {
companyId: string;
sourceIssueId: string;
actionId?: string | null;
status: Extract<IssueRecoveryActionStatus, "resolved" | "cancelled">;
outcome: IssueRecoveryActionOutcome;
resolutionNote?: string | null;
};
function toReadModel(row: IssueRecoveryActionRow): IssueRecoveryAction {
return {
id: row.id,
companyId: row.companyId,
sourceIssueId: row.sourceIssueId,
recoveryIssueId: row.recoveryIssueId,
kind: row.kind as IssueRecoveryAction["kind"],
status: row.status as IssueRecoveryAction["status"],
ownerType: row.ownerType as IssueRecoveryAction["ownerType"],
ownerAgentId: row.ownerAgentId,
ownerUserId: row.ownerUserId,
previousOwnerAgentId: row.previousOwnerAgentId,
returnOwnerAgentId: row.returnOwnerAgentId,
cause: row.cause,
fingerprint: row.fingerprint,
evidence: row.evidence,
nextAction: row.nextAction,
wakePolicy: row.wakePolicy,
monitorPolicy: row.monitorPolicy,
attemptCount: row.attemptCount,
maxAttempts: row.maxAttempts,
timeoutAt: row.timeoutAt,
lastAttemptAt: row.lastAttemptAt,
outcome: row.outcome as IssueRecoveryAction["outcome"],
resolutionNote: row.resolutionNote,
resolvedAt: row.resolvedAt,
createdAt: row.createdAt,
updatedAt: row.updatedAt,
};
}
function isUniqueRecoveryActionConflict(error: unknown) {
const maybe = error as { code?: string; constraint?: string; message?: string } | null;
return Boolean(
maybe &&
maybe.code === "23505" &&
(
maybe.constraint === "issue_recovery_actions_active_source_uq" ||
maybe.constraint === "issue_recovery_actions_active_fingerprint_uq" ||
typeof maybe.message === "string" && (
maybe.message.includes("issue_recovery_actions_active_source_uq") ||
maybe.message.includes("issue_recovery_actions_active_fingerprint_uq")
)
),
);
}
export function issueRecoveryActionService(db: Db) {
const upsertQueues = new Map<string, Promise<void>>();
async function runExclusiveUpsert<T>(
input: UpsertIssueRecoveryActionInput,
task: () => Promise<T>,
): Promise<T> {
const key = `${input.companyId}:${input.sourceIssueId}`;
const previous = upsertQueues.get(key) ?? Promise.resolve();
let release: () => void = () => {};
const current = new Promise<void>((resolve) => {
release = resolve;
});
const next = previous.catch(() => undefined).then(() => current);
upsertQueues.set(key, next);
await previous.catch(() => undefined);
try {
return await task();
} finally {
release();
if (upsertQueues.get(key) === next) {
upsertQueues.delete(key);
}
}
}
async function getActiveForIssue(companyId: string, sourceIssueId: string): Promise<IssueRecoveryAction | null> {
const row = await db
.select()
.from(issueRecoveryActions)
.where(
and(
eq(issueRecoveryActions.companyId, companyId),
eq(issueRecoveryActions.sourceIssueId, sourceIssueId),
inArray(issueRecoveryActions.status, [...ACTIVE_RECOVERY_ACTION_STATUSES]),
),
)
.orderBy(desc(issueRecoveryActions.updatedAt))
.limit(1)
.then((rows) => rows[0] ?? null);
return row ? toReadModel(row) : null;
}
async function listActiveForIssues(companyId: string, sourceIssueIds: string[]) {
if (sourceIssueIds.length === 0) return new Map<string, IssueRecoveryAction>();
const rows = await db
.select()
.from(issueRecoveryActions)
.where(
and(
eq(issueRecoveryActions.companyId, companyId),
inArray(issueRecoveryActions.sourceIssueId, [...new Set(sourceIssueIds)]),
inArray(issueRecoveryActions.status, [...ACTIVE_RECOVERY_ACTION_STATUSES]),
),
)
.orderBy(desc(issueRecoveryActions.updatedAt));
const result = new Map<string, IssueRecoveryAction>();
for (const row of rows) {
if (!result.has(row.sourceIssueId)) result.set(row.sourceIssueId, toReadModel(row));
}
return result;
}
async function retryUpsertSourceScoped(
input: UpsertIssueRecoveryActionInput,
retryCount: number,
error?: unknown,
): Promise<IssueRecoveryAction> {
if (retryCount >= MAX_UPSERT_RETRIES) {
if (error) throw error;
throw new Error(
`Failed to upsert active recovery action for issue ${input.sourceIssueId} after ${MAX_UPSERT_RETRIES} retries`,
);
}
return upsertSourceScopedUnlocked(input, retryCount + 1);
}
async function upsertSourceScopedUnlocked(
input: UpsertIssueRecoveryActionInput,
retryCount = 0,
): Promise<IssueRecoveryAction> {
const existing = await getActiveForIssue(input.companyId, input.sourceIssueId);
const now = new Date();
const ownerType = input.ownerType ?? (input.ownerAgentId ? "agent" : "board");
if (existing) {
const [updated] = await db
.update(issueRecoveryActions)
.set({
recoveryIssueId: input.recoveryIssueId ?? null,
kind: input.kind,
status: "active",
ownerType,
ownerAgentId: input.ownerAgentId ?? null,
ownerUserId: input.ownerUserId ?? null,
previousOwnerAgentId: input.previousOwnerAgentId ?? existing.previousOwnerAgentId,
returnOwnerAgentId: input.returnOwnerAgentId ?? existing.returnOwnerAgentId,
cause: input.cause,
fingerprint: input.fingerprint,
evidence: input.evidence ?? existing.evidence,
nextAction: input.nextAction,
wakePolicy: input.wakePolicy ?? null,
monitorPolicy: input.monitorPolicy ?? null,
attemptCount: existing.attemptCount + 1,
maxAttempts: input.maxAttempts ?? null,
timeoutAt: input.timeoutAt ?? null,
lastAttemptAt: input.lastAttemptAt ?? now,
outcome: null,
resolutionNote: null,
resolvedAt: null,
updatedAt: now,
})
.where(
and(
eq(issueRecoveryActions.id, existing.id),
inArray(issueRecoveryActions.status, [...ACTIVE_RECOVERY_ACTION_STATUSES]),
),
)
.returning();
if (!updated) {
return retryUpsertSourceScoped(input, retryCount);
}
return toReadModel(updated!);
}
try {
const [created] = await db
.insert(issueRecoveryActions)
.values({
companyId: input.companyId,
sourceIssueId: input.sourceIssueId,
recoveryIssueId: input.recoveryIssueId ?? null,
kind: input.kind,
status: "active",
ownerType,
ownerAgentId: input.ownerAgentId ?? null,
ownerUserId: input.ownerUserId ?? null,
previousOwnerAgentId: input.previousOwnerAgentId ?? null,
returnOwnerAgentId: input.returnOwnerAgentId ?? null,
cause: input.cause,
fingerprint: input.fingerprint,
evidence: input.evidence ?? {},
nextAction: input.nextAction,
wakePolicy: input.wakePolicy ?? null,
monitorPolicy: input.monitorPolicy ?? null,
attemptCount: 1,
maxAttempts: input.maxAttempts ?? null,
timeoutAt: input.timeoutAt ?? null,
lastAttemptAt: input.lastAttemptAt ?? now,
})
.returning();
return toReadModel(created!);
} catch (error) {
if (!isUniqueRecoveryActionConflict(error)) throw error;
return retryUpsertSourceScoped(input, retryCount, error);
}
}
async function upsertSourceScoped(
input: UpsertIssueRecoveryActionInput,
): Promise<IssueRecoveryAction> {
return runExclusiveUpsert(input, () => upsertSourceScopedUnlocked(input));
}
async function resolveActiveForIssue(
input: ResolveIssueRecoveryActionInput,
dbOrTx: DbOrTransaction = db,
): Promise<IssueRecoveryAction | null> {
const now = new Date();
const predicates = [
eq(issueRecoveryActions.companyId, input.companyId),
eq(issueRecoveryActions.sourceIssueId, input.sourceIssueId),
inArray(issueRecoveryActions.status, [...ACTIVE_RECOVERY_ACTION_STATUSES]),
];
if (input.actionId) {
predicates.push(eq(issueRecoveryActions.id, input.actionId));
}
const [updated] = await dbOrTx
.update(issueRecoveryActions)
.set({
status: input.status,
outcome: input.outcome,
resolutionNote: input.resolutionNote ?? null,
resolvedAt: now,
updatedAt: now,
})
.where(and(...predicates))
.returning();
return updated ? toReadModel(updated) : null;
}
return {
getActiveForIssue,
listActiveForIssues,
resolveActiveForIssue,
upsertSourceScoped,
};
}