forked from farhoodlabs/paperclip
09d0678840
## Thinking Path > - Paperclip orchestrates AI agents through issue checkout, heartbeat runs, routines, and auditable control-plane state > - The runtime path has to recover from lost local processes, transient adapter failures, blocked dependencies, and routine coalescing without stranding work > - The existing branch carried several reliability fixes across heartbeat scheduling, issue runtime controls, routine dispatch, and operator-facing run state > - These changes belong together because they share backend contracts, migrations, and runtime status semantics > - This pull request groups the control-plane/runtime slice so it can merge independently from board UI polish and adapter sandbox work > - The benefit is safer heartbeat recovery, clearer runtime controls, and more predictable recurring execution behavior ## What Changed - Adds bounded heartbeat retry scheduling, scheduled retry state, and Codex transient failure recovery handling. - Tightens heartbeat process recovery, blocker wake behavior, issue comment wake handling, routine dispatch coalescing, and activity/dashboard bounds. - Adds runtime-control MCP tools and Paperclip skill docs for issue workspace runtime management. - Adds migrations `0061_lively_thor_girl.sql` and `0062_routine_run_dispatch_fingerprint.sql`. - Surfaces retry state in run ledger/agent UI and keeps related shared types synchronized. ## Verification - `pnpm exec vitest run server/src/__tests__/heartbeat-retry-scheduling.test.ts server/src/__tests__/heartbeat-process-recovery.test.ts server/src/__tests__/routines-service.test.ts` - `pnpm exec vitest run src/tools.test.ts` from `packages/mcp-server` ## Risks - Medium risk: this touches heartbeat recovery and routine dispatch, which are central execution paths. - Migration order matters if split branches land out of order: merge this PR before branches that assume the new runtime/routine fields. - Runtime retry behavior should be watched in CI and in local operator smoke tests because it changes how transient failures are resumed. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex, GPT-5-based coding agent runtime, shell/git tool use enabled. Exact hosted model build and context window are not exposed in this Paperclip heartbeat environment. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [ ] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
163 lines
5.5 KiB
TypeScript
163 lines
5.5 KiB
TypeScript
import { and, eq, gte, sql } from "drizzle-orm";
|
|
import type { Db } from "@paperclipai/db";
|
|
import { agents, approvals, companies, costEvents, heartbeatRuns, issues } from "@paperclipai/db";
|
|
import { notFound } from "../errors.js";
|
|
import { budgetService } from "./budgets.js";
|
|
|
|
const DASHBOARD_RUN_ACTIVITY_DAYS = 14;
|
|
|
|
function formatUtcDateKey(date: Date): string {
|
|
return date.toISOString().slice(0, 10);
|
|
}
|
|
|
|
export function getUtcMonthStart(date: Date): Date {
|
|
return new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), 1));
|
|
}
|
|
|
|
function getRecentUtcDateKeys(now: Date, days: number): string[] {
|
|
const todayUtc = Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate());
|
|
return Array.from({ length: days }, (_, index) => {
|
|
const dayOffset = index - (days - 1);
|
|
return formatUtcDateKey(new Date(todayUtc + dayOffset * 24 * 60 * 60 * 1000));
|
|
});
|
|
}
|
|
|
|
export function dashboardService(db: Db) {
|
|
const budgets = budgetService(db);
|
|
return {
|
|
summary: async (companyId: string) => {
|
|
const company = await db
|
|
.select()
|
|
.from(companies)
|
|
.where(eq(companies.id, companyId))
|
|
.then((rows) => rows[0] ?? null);
|
|
|
|
if (!company) throw notFound("Company not found");
|
|
|
|
const agentRows = await db
|
|
.select({ status: agents.status, count: sql<number>`count(*)` })
|
|
.from(agents)
|
|
.where(eq(agents.companyId, companyId))
|
|
.groupBy(agents.status);
|
|
|
|
const taskRows = await db
|
|
.select({ status: issues.status, count: sql<number>`count(*)` })
|
|
.from(issues)
|
|
.where(eq(issues.companyId, companyId))
|
|
.groupBy(issues.status);
|
|
|
|
const pendingApprovals = await db
|
|
.select({ count: sql<number>`count(*)` })
|
|
.from(approvals)
|
|
.where(and(eq(approvals.companyId, companyId), eq(approvals.status, "pending")))
|
|
.then((rows) => Number(rows[0]?.count ?? 0));
|
|
|
|
const agentCounts: Record<string, number> = {
|
|
active: 0,
|
|
running: 0,
|
|
paused: 0,
|
|
error: 0,
|
|
};
|
|
for (const row of agentRows) {
|
|
const count = Number(row.count);
|
|
// "idle" agents are operational — count them as active
|
|
const bucket = row.status === "idle" ? "active" : row.status;
|
|
agentCounts[bucket] = (agentCounts[bucket] ?? 0) + count;
|
|
}
|
|
|
|
const taskCounts: Record<string, number> = {
|
|
open: 0,
|
|
inProgress: 0,
|
|
blocked: 0,
|
|
done: 0,
|
|
};
|
|
for (const row of taskRows) {
|
|
const count = Number(row.count);
|
|
if (row.status === "in_progress") taskCounts.inProgress += count;
|
|
if (row.status === "blocked") taskCounts.blocked += count;
|
|
if (row.status === "done") taskCounts.done += count;
|
|
if (row.status !== "done" && row.status !== "cancelled") taskCounts.open += count;
|
|
}
|
|
|
|
const now = new Date();
|
|
const monthStart = getUtcMonthStart(now);
|
|
const runActivityDays = getRecentUtcDateKeys(now, DASHBOARD_RUN_ACTIVITY_DAYS);
|
|
const runActivityStart = new Date(`${runActivityDays[0]}T00:00:00.000Z`);
|
|
const [{ monthSpend }] = await db
|
|
.select({
|
|
monthSpend: sql<number>`coalesce(sum(${costEvents.costCents}), 0)::double precision`,
|
|
})
|
|
.from(costEvents)
|
|
.where(
|
|
and(
|
|
eq(costEvents.companyId, companyId),
|
|
gte(costEvents.occurredAt, monthStart),
|
|
),
|
|
);
|
|
|
|
const monthSpendCents = Number(monthSpend);
|
|
const runActivityDayExpr = sql<string>`to_char(${heartbeatRuns.createdAt} at time zone 'UTC', 'YYYY-MM-DD')`;
|
|
const runActivityRows = await db
|
|
.select({
|
|
date: runActivityDayExpr,
|
|
status: heartbeatRuns.status,
|
|
count: sql<number>`count(*)::double precision`,
|
|
})
|
|
.from(heartbeatRuns)
|
|
.where(
|
|
and(
|
|
eq(heartbeatRuns.companyId, companyId),
|
|
gte(heartbeatRuns.createdAt, runActivityStart),
|
|
),
|
|
)
|
|
.groupBy(runActivityDayExpr, heartbeatRuns.status);
|
|
|
|
const runActivity = new Map(
|
|
runActivityDays.map((date) => [
|
|
date,
|
|
{ date, succeeded: 0, failed: 0, other: 0, total: 0 },
|
|
]),
|
|
);
|
|
for (const row of runActivityRows) {
|
|
const bucket = runActivity.get(row.date);
|
|
if (!bucket) continue;
|
|
const count = Number(row.count);
|
|
if (row.status === "succeeded") bucket.succeeded += count;
|
|
else if (row.status === "failed" || row.status === "timed_out") bucket.failed += count;
|
|
else bucket.other += count;
|
|
bucket.total += count;
|
|
}
|
|
|
|
const utilization =
|
|
company.budgetMonthlyCents > 0
|
|
? (monthSpendCents / company.budgetMonthlyCents) * 100
|
|
: 0;
|
|
const budgetOverview = await budgets.overview(companyId);
|
|
|
|
return {
|
|
companyId,
|
|
agents: {
|
|
active: agentCounts.active,
|
|
running: agentCounts.running,
|
|
paused: agentCounts.paused,
|
|
error: agentCounts.error,
|
|
},
|
|
tasks: taskCounts,
|
|
costs: {
|
|
monthSpendCents,
|
|
monthBudgetCents: company.budgetMonthlyCents,
|
|
monthUtilizationPercent: Number(utilization.toFixed(2)),
|
|
},
|
|
pendingApprovals,
|
|
budgets: {
|
|
activeIncidents: budgetOverview.activeIncidents.length,
|
|
pendingApprovals: budgetOverview.pendingApprovalCount,
|
|
pausedAgents: budgetOverview.pausedAgentCount,
|
|
pausedProjects: budgetOverview.pausedProjectCount,
|
|
},
|
|
runActivity: Array.from(runActivity.values()),
|
|
};
|
|
},
|
|
};
|
|
}
|