Files
paperclip/server/src/routes/health.ts
T
Dotta e89d3f7e11 [codex] Add backup endpoint and dev runtime hardening (#4087)
## Thinking Path

> - Paperclip is a local-first control plane for AI-agent companies.
> - Operators need predictable local dev behavior, recoverable instance
data, and scripts that do not churn the running app.
> - Several accumulated changes improve backup streaming, dev-server
health, static UI caching/logging, diagnostic-file ignores, and instance
isolation.
> - These are operational improvements that can land independently from
product UI work.
> - This pull request groups the dev-infra and backup changes from the
split branch into one standalone branch.
> - The benefit is safer local operation, easier manual backups, less
noisy dev output, and less cross-instance auth leakage.

## What Changed

- Added a manual instance database backup endpoint and route tests.
- Streamed backup/restore handling to avoid materializing large payloads
at once.
- Reduced dev static UI log/cache churn and ignored Node diagnostic
report captures.
- Added guarded dev auto-restart health polling coverage.
- Preserved worktree config during provisioning and scoped auth cookies
by instance.
- Added a Discord daily digest helper script and environment
documentation.
- Hardened adapter-route and startup feedback export tests around the
changed infrastructure.

## Verification

- `pnpm install --frozen-lockfile`
- `pnpm exec vitest run packages/db/src/backup-lib.test.ts
server/src/__tests__/instance-database-backups-routes.test.ts
server/src/__tests__/server-startup-feedback-export.test.ts
server/src/__tests__/adapter-routes.test.ts
server/src/__tests__/dev-runner-paths.test.ts
server/src/__tests__/health-dev-server-token.test.ts
server/src/__tests__/http-log-policy.test.ts
server/src/__tests__/vite-html-renderer.test.ts
server/src/__tests__/workspace-runtime.test.ts
server/src/__tests__/better-auth.test.ts`
- Split integration check: merged after the runtime/governance branch
and before UI branches with no merge conflicts.
- Confirmed this branch does not include `pnpm-lock.yaml`.

## Risks

- Medium risk: touches server startup, backup streaming, auth cookie
naming, dev health checks, and worktree provisioning.
- Backup endpoint behavior depends on existing board/admin access
controls and database backup helpers.
- No database migrations are included.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex, GPT-5.4 tool-enabled coding model, agentic
code-editing/runtime with local shell and GitHub CLI access; exact
context window and reasoning mode are not exposed by the Paperclip
harness.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-04-20 06:08:55 -05:00

150 lines
4.9 KiB
TypeScript

import { timingSafeEqual } from "node:crypto";
import { Router } from "express";
import type { Db } from "@paperclipai/db";
import { and, count, eq, gt, inArray, isNull, sql } from "drizzle-orm";
import { heartbeatRuns, instanceUserRoles, invites } from "@paperclipai/db";
import type { DeploymentExposure, DeploymentMode } from "@paperclipai/shared";
import { readPersistedDevServerStatus, toDevServerHealthStatus } from "../dev-server-status.js";
import { logger } from "../middleware/logger.js";
import { instanceSettingsService } from "../services/instance-settings.js";
import { serverVersion } from "../version.js";
function shouldExposeFullHealthDetails(
actorType: "none" | "board" | "agent" | null | undefined,
deploymentMode: DeploymentMode,
) {
if (deploymentMode !== "authenticated") return true;
return actorType === "board" || actorType === "agent";
}
function hasDevServerStatusToken(providedToken: string | undefined) {
const expectedToken = process.env.PAPERCLIP_DEV_SERVER_STATUS_TOKEN?.trim();
const token = providedToken?.trim();
if (!expectedToken || !token) return false;
const expected = Buffer.from(expectedToken);
const provided = Buffer.from(token);
if (expected.length !== provided.length) return false;
return timingSafeEqual(expected, provided);
}
export function healthRoutes(
db?: Db,
opts: {
deploymentMode: DeploymentMode;
deploymentExposure: DeploymentExposure;
authReady: boolean;
companyDeletionEnabled: boolean;
} = {
deploymentMode: "local_trusted",
deploymentExposure: "private",
authReady: true,
companyDeletionEnabled: true,
},
) {
const router = Router();
router.get("/", async (req, res) => {
const actorType = "actor" in req ? req.actor?.type : null;
const exposeFullDetails = shouldExposeFullHealthDetails(
actorType,
opts.deploymentMode,
);
const exposeDevServerDetails =
exposeFullDetails || hasDevServerStatusToken(req.get("x-paperclip-dev-server-status-token"));
if (!db) {
res.json(
exposeFullDetails
? { status: "ok", version: serverVersion }
: { status: "ok", deploymentMode: opts.deploymentMode },
);
return;
}
try {
await db.execute(sql`SELECT 1`);
} catch (error) {
logger.warn({ err: error }, "Health check database probe failed");
res.status(503).json({
status: "unhealthy",
version: serverVersion,
error: "database_unreachable"
});
return;
}
let bootstrapStatus: "ready" | "bootstrap_pending" = "ready";
let bootstrapInviteActive = false;
if (opts.deploymentMode === "authenticated") {
const roleCount = await db
.select({ count: count() })
.from(instanceUserRoles)
.where(sql`${instanceUserRoles.role} = 'instance_admin'`)
.then((rows) => Number(rows[0]?.count ?? 0));
bootstrapStatus = roleCount > 0 ? "ready" : "bootstrap_pending";
if (bootstrapStatus === "bootstrap_pending") {
const now = new Date();
const inviteCount = await db
.select({ count: count() })
.from(invites)
.where(
and(
eq(invites.inviteType, "bootstrap_ceo"),
isNull(invites.revokedAt),
isNull(invites.acceptedAt),
gt(invites.expiresAt, now),
),
)
.then((rows) => Number(rows[0]?.count ?? 0));
bootstrapInviteActive = inviteCount > 0;
}
}
const persistedDevServerStatus = readPersistedDevServerStatus();
let devServer: ReturnType<typeof toDevServerHealthStatus> | undefined;
if (exposeDevServerDetails && persistedDevServerStatus && typeof (db as { select?: unknown }).select === "function") {
const instanceSettings = instanceSettingsService(db);
const experimentalSettings = await instanceSettings.getExperimental();
const activeRunCount = await db
.select({ count: count() })
.from(heartbeatRuns)
.where(inArray(heartbeatRuns.status, ["queued", "running"]))
.then((rows) => Number(rows[0]?.count ?? 0));
devServer = toDevServerHealthStatus(persistedDevServerStatus, {
autoRestartEnabled: experimentalSettings.autoRestartDevServerWhenIdle ?? false,
activeRunCount,
});
}
if (!exposeFullDetails) {
res.json({
status: "ok",
deploymentMode: opts.deploymentMode,
bootstrapStatus,
bootstrapInviteActive,
...(devServer ? { devServer } : {}),
});
return;
}
res.json({
status: "ok",
version: serverVersion,
deploymentMode: opts.deploymentMode,
deploymentExposure: opts.deploymentExposure,
authReady: opts.authReady,
bootstrapStatus,
bootstrapInviteActive,
features: {
companyDeletionEnabled: opts.companyDeletionEnabled,
},
...(devServer ? { devServer } : {}),
});
});
return router;
}