forked from farhoodlabs/paperclip
0808b388ee
## Thinking Path > - Paperclip is a control plane for autonomous AI companies, where work must end with a clear disposition rather than ambiguous agent liveness. > - Recovery currently detects stalled or missing-next-step issues, but source issue recovery can become split across child recovery issues, blockers, and comments. > - That makes it harder for operators and agents to see who owns recovery and what exact action is needed on the original issue. > - Source-scoped recovery actions give the original issue a first-class active recovery state with owner, evidence, wake policy, and resolution outcome. > - This pull request adds the recovery-action data model, backend reconciliation and resolution APIs, and board UI indicators/actions. > - The benefit is clearer stalled-work recovery without losing source issue context or relying on comments as the liveness path. ## What Changed - Added the `issue_recovery_actions` schema, shared types/constants/validators, and an idempotent `0084_issue_recovery_actions` migration ordered after current `master` migrations. - Updated stranded/missing-disposition recovery to create source-scoped recovery actions, wake the recovery owner on the source issue, and avoid locking the source issue for recovery-action wakes. - Added API support for reading active recovery actions on issue detail/list surfaces and resolving them with restored, blocked, cancelled, or false-positive outcomes. - Require blocked recovery resolutions to have an unresolved first-class blocker, and removed the UI shortcut that could mark recovery blocked without a blocker selection path. - Surfaced recovery indicators/actions in the issue UI, blocker notices, active run panels, issue rows, and Storybook coverage. - Updated docs and focused tests for recovery semantics, ownership, races, stale comments, and UI behavior. ## Verification - `pnpm exec vitest run server/src/__tests__/issue-recovery-actions.test.ts server/src/__tests__/heartbeat-process-recovery.test.ts ui/src/components/IssueRecoveryActionCard.test.tsx ui/src/components/IssueBlockedNotice.test.tsx ui/src/api/issues.test.ts` — 5 files, 72 tests passed. - `pnpm --filter @paperclipai/shared typecheck` — passed. - `pnpm --filter @paperclipai/db typecheck` — passed, including migration numbering check. - `pnpm --filter @paperclipai/server typecheck` — passed. - `pnpm --filter @paperclipai/ui typecheck` — passed. - Follow-up verification after blocker-resolution guard: `pnpm exec vitest run server/src/__tests__/issue-recovery-actions.test.ts ui/src/components/IssueRecoveryActionCard.test.tsx ui/src/api/issues.test.ts` — 3 files, 27 tests passed. - Follow-up `pnpm --filter @paperclipai/server typecheck` — passed. - Follow-up `pnpm --filter @paperclipai/ui typecheck` — passed. - UI states are available in `ui/storybook/stories/source-issue-recovery.stories.tsx`; screenshot capture helper is `scripts/screenshot-recovery-card.cjs`. ## Risks - Medium: recovery behavior changes from child recovery issue ownership toward source-scoped actions, so operators may see stalled-work state in new places. - Migration risk is mitigated by using the next migration slot after `master` and making the table/constraints/index creation idempotent for anyone who previously applied the old branch-local `0082_dizzy_master_mold` migration. - Existing child recovery issue paths are still guarded for already-created recovery issues, but new source-scoped flows should be watched in CI and Greptile review. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex, GPT-5 coding agent, tool use enabled for shell, Git, GitHub, and local test execution. Context window not exposed by the runtime. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip <noreply@paperclip.ing>
296 lines
9.9 KiB
TypeScript
296 lines
9.9 KiB
TypeScript
import { and, desc, eq, inArray } from "drizzle-orm";
|
|
import type { Db } from "@paperclipai/db";
|
|
import { issueRecoveryActions } from "@paperclipai/db";
|
|
import type {
|
|
IssueRecoveryAction,
|
|
IssueRecoveryActionKind,
|
|
IssueRecoveryActionOwnerType,
|
|
IssueRecoveryActionOutcome,
|
|
IssueRecoveryActionStatus,
|
|
} from "@paperclipai/shared";
|
|
|
|
const ACTIVE_RECOVERY_ACTION_STATUSES = ["active", "escalated"] as const satisfies readonly IssueRecoveryActionStatus[];
|
|
const MAX_UPSERT_RETRIES = 3;
|
|
|
|
type IssueRecoveryActionRow = typeof issueRecoveryActions.$inferSelect;
|
|
type DbTransaction = Parameters<Parameters<Db["transaction"]>[0]>[0];
|
|
type DbOrTransaction = Db | DbTransaction;
|
|
|
|
export type UpsertIssueRecoveryActionInput = {
|
|
companyId: string;
|
|
sourceIssueId: string;
|
|
recoveryIssueId?: string | null;
|
|
kind: IssueRecoveryActionKind;
|
|
ownerType?: IssueRecoveryActionOwnerType;
|
|
ownerAgentId?: string | null;
|
|
ownerUserId?: string | null;
|
|
previousOwnerAgentId?: string | null;
|
|
returnOwnerAgentId?: string | null;
|
|
cause: string;
|
|
fingerprint: string;
|
|
evidence?: Record<string, unknown>;
|
|
nextAction: string;
|
|
wakePolicy?: Record<string, unknown> | null;
|
|
monitorPolicy?: Record<string, unknown> | null;
|
|
maxAttempts?: number | null;
|
|
timeoutAt?: Date | null;
|
|
lastAttemptAt?: Date | null;
|
|
};
|
|
|
|
export type ResolveIssueRecoveryActionInput = {
|
|
companyId: string;
|
|
sourceIssueId: string;
|
|
actionId?: string | null;
|
|
status: Extract<IssueRecoveryActionStatus, "resolved" | "cancelled">;
|
|
outcome: IssueRecoveryActionOutcome;
|
|
resolutionNote?: string | null;
|
|
};
|
|
|
|
function toReadModel(row: IssueRecoveryActionRow): IssueRecoveryAction {
|
|
return {
|
|
id: row.id,
|
|
companyId: row.companyId,
|
|
sourceIssueId: row.sourceIssueId,
|
|
recoveryIssueId: row.recoveryIssueId,
|
|
kind: row.kind as IssueRecoveryAction["kind"],
|
|
status: row.status as IssueRecoveryAction["status"],
|
|
ownerType: row.ownerType as IssueRecoveryAction["ownerType"],
|
|
ownerAgentId: row.ownerAgentId,
|
|
ownerUserId: row.ownerUserId,
|
|
previousOwnerAgentId: row.previousOwnerAgentId,
|
|
returnOwnerAgentId: row.returnOwnerAgentId,
|
|
cause: row.cause,
|
|
fingerprint: row.fingerprint,
|
|
evidence: row.evidence,
|
|
nextAction: row.nextAction,
|
|
wakePolicy: row.wakePolicy,
|
|
monitorPolicy: row.monitorPolicy,
|
|
attemptCount: row.attemptCount,
|
|
maxAttempts: row.maxAttempts,
|
|
timeoutAt: row.timeoutAt,
|
|
lastAttemptAt: row.lastAttemptAt,
|
|
outcome: row.outcome as IssueRecoveryAction["outcome"],
|
|
resolutionNote: row.resolutionNote,
|
|
resolvedAt: row.resolvedAt,
|
|
createdAt: row.createdAt,
|
|
updatedAt: row.updatedAt,
|
|
};
|
|
}
|
|
|
|
function isUniqueRecoveryActionConflict(error: unknown) {
|
|
const maybe = error as { code?: string; constraint?: string; message?: string } | null;
|
|
return Boolean(
|
|
maybe &&
|
|
maybe.code === "23505" &&
|
|
(
|
|
maybe.constraint === "issue_recovery_actions_active_source_uq" ||
|
|
maybe.constraint === "issue_recovery_actions_active_fingerprint_uq" ||
|
|
typeof maybe.message === "string" && (
|
|
maybe.message.includes("issue_recovery_actions_active_source_uq") ||
|
|
maybe.message.includes("issue_recovery_actions_active_fingerprint_uq")
|
|
)
|
|
),
|
|
);
|
|
}
|
|
|
|
export function issueRecoveryActionService(db: Db) {
|
|
const upsertQueues = new Map<string, Promise<void>>();
|
|
|
|
async function runExclusiveUpsert<T>(
|
|
input: UpsertIssueRecoveryActionInput,
|
|
task: () => Promise<T>,
|
|
): Promise<T> {
|
|
const key = `${input.companyId}:${input.sourceIssueId}`;
|
|
const previous = upsertQueues.get(key) ?? Promise.resolve();
|
|
let release: () => void = () => {};
|
|
const current = new Promise<void>((resolve) => {
|
|
release = resolve;
|
|
});
|
|
const next = previous.catch(() => undefined).then(() => current);
|
|
upsertQueues.set(key, next);
|
|
|
|
await previous.catch(() => undefined);
|
|
try {
|
|
return await task();
|
|
} finally {
|
|
release();
|
|
if (upsertQueues.get(key) === next) {
|
|
upsertQueues.delete(key);
|
|
}
|
|
}
|
|
}
|
|
|
|
async function getActiveForIssue(companyId: string, sourceIssueId: string): Promise<IssueRecoveryAction | null> {
|
|
const row = await db
|
|
.select()
|
|
.from(issueRecoveryActions)
|
|
.where(
|
|
and(
|
|
eq(issueRecoveryActions.companyId, companyId),
|
|
eq(issueRecoveryActions.sourceIssueId, sourceIssueId),
|
|
inArray(issueRecoveryActions.status, [...ACTIVE_RECOVERY_ACTION_STATUSES]),
|
|
),
|
|
)
|
|
.orderBy(desc(issueRecoveryActions.updatedAt))
|
|
.limit(1)
|
|
.then((rows) => rows[0] ?? null);
|
|
return row ? toReadModel(row) : null;
|
|
}
|
|
|
|
async function listActiveForIssues(companyId: string, sourceIssueIds: string[]) {
|
|
if (sourceIssueIds.length === 0) return new Map<string, IssueRecoveryAction>();
|
|
const rows = await db
|
|
.select()
|
|
.from(issueRecoveryActions)
|
|
.where(
|
|
and(
|
|
eq(issueRecoveryActions.companyId, companyId),
|
|
inArray(issueRecoveryActions.sourceIssueId, [...new Set(sourceIssueIds)]),
|
|
inArray(issueRecoveryActions.status, [...ACTIVE_RECOVERY_ACTION_STATUSES]),
|
|
),
|
|
)
|
|
.orderBy(desc(issueRecoveryActions.updatedAt));
|
|
const result = new Map<string, IssueRecoveryAction>();
|
|
for (const row of rows) {
|
|
if (!result.has(row.sourceIssueId)) result.set(row.sourceIssueId, toReadModel(row));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
async function retryUpsertSourceScoped(
|
|
input: UpsertIssueRecoveryActionInput,
|
|
retryCount: number,
|
|
error?: unknown,
|
|
): Promise<IssueRecoveryAction> {
|
|
if (retryCount >= MAX_UPSERT_RETRIES) {
|
|
if (error) throw error;
|
|
throw new Error(
|
|
`Failed to upsert active recovery action for issue ${input.sourceIssueId} after ${MAX_UPSERT_RETRIES} retries`,
|
|
);
|
|
}
|
|
return upsertSourceScopedUnlocked(input, retryCount + 1);
|
|
}
|
|
|
|
async function upsertSourceScopedUnlocked(
|
|
input: UpsertIssueRecoveryActionInput,
|
|
retryCount = 0,
|
|
): Promise<IssueRecoveryAction> {
|
|
const existing = await getActiveForIssue(input.companyId, input.sourceIssueId);
|
|
const now = new Date();
|
|
const ownerType = input.ownerType ?? (input.ownerAgentId ? "agent" : "board");
|
|
if (existing) {
|
|
const [updated] = await db
|
|
.update(issueRecoveryActions)
|
|
.set({
|
|
recoveryIssueId: input.recoveryIssueId ?? null,
|
|
kind: input.kind,
|
|
status: "active",
|
|
ownerType,
|
|
ownerAgentId: input.ownerAgentId ?? null,
|
|
ownerUserId: input.ownerUserId ?? null,
|
|
previousOwnerAgentId: input.previousOwnerAgentId ?? existing.previousOwnerAgentId,
|
|
returnOwnerAgentId: input.returnOwnerAgentId ?? existing.returnOwnerAgentId,
|
|
cause: input.cause,
|
|
fingerprint: input.fingerprint,
|
|
evidence: input.evidence ?? existing.evidence,
|
|
nextAction: input.nextAction,
|
|
wakePolicy: input.wakePolicy ?? null,
|
|
monitorPolicy: input.monitorPolicy ?? null,
|
|
attemptCount: existing.attemptCount + 1,
|
|
maxAttempts: input.maxAttempts ?? null,
|
|
timeoutAt: input.timeoutAt ?? null,
|
|
lastAttemptAt: input.lastAttemptAt ?? now,
|
|
outcome: null,
|
|
resolutionNote: null,
|
|
resolvedAt: null,
|
|
updatedAt: now,
|
|
})
|
|
.where(
|
|
and(
|
|
eq(issueRecoveryActions.id, existing.id),
|
|
inArray(issueRecoveryActions.status, [...ACTIVE_RECOVERY_ACTION_STATUSES]),
|
|
),
|
|
)
|
|
.returning();
|
|
if (!updated) {
|
|
return retryUpsertSourceScoped(input, retryCount);
|
|
}
|
|
return toReadModel(updated!);
|
|
}
|
|
|
|
try {
|
|
const [created] = await db
|
|
.insert(issueRecoveryActions)
|
|
.values({
|
|
companyId: input.companyId,
|
|
sourceIssueId: input.sourceIssueId,
|
|
recoveryIssueId: input.recoveryIssueId ?? null,
|
|
kind: input.kind,
|
|
status: "active",
|
|
ownerType,
|
|
ownerAgentId: input.ownerAgentId ?? null,
|
|
ownerUserId: input.ownerUserId ?? null,
|
|
previousOwnerAgentId: input.previousOwnerAgentId ?? null,
|
|
returnOwnerAgentId: input.returnOwnerAgentId ?? null,
|
|
cause: input.cause,
|
|
fingerprint: input.fingerprint,
|
|
evidence: input.evidence ?? {},
|
|
nextAction: input.nextAction,
|
|
wakePolicy: input.wakePolicy ?? null,
|
|
monitorPolicy: input.monitorPolicy ?? null,
|
|
attemptCount: 1,
|
|
maxAttempts: input.maxAttempts ?? null,
|
|
timeoutAt: input.timeoutAt ?? null,
|
|
lastAttemptAt: input.lastAttemptAt ?? now,
|
|
})
|
|
.returning();
|
|
return toReadModel(created!);
|
|
} catch (error) {
|
|
if (!isUniqueRecoveryActionConflict(error)) throw error;
|
|
return retryUpsertSourceScoped(input, retryCount, error);
|
|
}
|
|
}
|
|
|
|
async function upsertSourceScoped(
|
|
input: UpsertIssueRecoveryActionInput,
|
|
): Promise<IssueRecoveryAction> {
|
|
return runExclusiveUpsert(input, () => upsertSourceScopedUnlocked(input));
|
|
}
|
|
|
|
async function resolveActiveForIssue(
|
|
input: ResolveIssueRecoveryActionInput,
|
|
dbOrTx: DbOrTransaction = db,
|
|
): Promise<IssueRecoveryAction | null> {
|
|
const now = new Date();
|
|
const predicates = [
|
|
eq(issueRecoveryActions.companyId, input.companyId),
|
|
eq(issueRecoveryActions.sourceIssueId, input.sourceIssueId),
|
|
inArray(issueRecoveryActions.status, [...ACTIVE_RECOVERY_ACTION_STATUSES]),
|
|
];
|
|
if (input.actionId) {
|
|
predicates.push(eq(issueRecoveryActions.id, input.actionId));
|
|
}
|
|
|
|
const [updated] = await dbOrTx
|
|
.update(issueRecoveryActions)
|
|
.set({
|
|
status: input.status,
|
|
outcome: input.outcome,
|
|
resolutionNote: input.resolutionNote ?? null,
|
|
resolvedAt: now,
|
|
updatedAt: now,
|
|
})
|
|
.where(and(...predicates))
|
|
.returning();
|
|
|
|
return updated ? toReadModel(updated) : null;
|
|
}
|
|
|
|
return {
|
|
getActiveForIssue,
|
|
listActiveForIssues,
|
|
resolveActiveForIssue,
|
|
upsertSourceScoped,
|
|
};
|
|
}
|