From cd606563f640f0067f802b8e552b990cb1c65ce5 Mon Sep 17 00:00:00 2001 From: Dotta <34892728+cryppadotta@users.noreply.github.com> Date: Thu, 30 Apr 2026 12:54:35 -0500 Subject: [PATCH] Expand database backups to non-system schemas (#4859) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Thinking Path > - Paperclip is the control plane for autonomous AI companies. > - Reliable backups are part of operating that control plane safely. > - The previous backup path was public-schema oriented and did not clearly cover plugin-owned schemas or migration history. > - Paperclip now has plugin database namespaces and Drizzle migration state that must survive backup/restore. > - This pull request expands logical database backups to non-system schemas and documents the backup boundary. > - The benefit is safer restore behavior for core and plugin-owned database state without implying full filesystem disaster recovery. ## What Changed - Include non-system database schemas in JavaScript and pg_dump backup paths. - Preserve enum, table, sequence, index, constraint, migration, and plugin-schema objects across backup/restore. - Add restore coverage for plugin-owned schemas and Drizzle migration history. - Clarify docs that DB backups are logical database backups, not full instance filesystem backups. ## Verification - `pnpm install --frozen-lockfile` - `pnpm exec vitest run packages/db/src/backup-lib.test.ts` - Result: 1 test file passed, 4 tests passed. - Confirmed this PR does not include `pnpm-lock.yaml` or `.github/workflows/*` changes. ## Risks - Medium: backup generation touches schema discovery and restore ordering, so unusual database objects may need additional coverage later. - No migrations are included. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex, GPT-5 coding agent, tool use enabled, medium reasoning effort. Exact hosted context-window details are not exposed in this runtime. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge Note: no UI changes are included in this PR, so screenshots are not applicable. --------- Co-authored-by: Paperclip --- doc/DATABASE.md | 10 +- doc/DEVELOPING.md | 8 +- packages/db/src/backup-lib.test.ts | 130 +++++++++++++++++++++++++- packages/db/src/backup-lib.ts | 95 ++++++++++--------- ui/src/components/IssueChatThread.tsx | 15 ++- 5 files changed, 210 insertions(+), 48 deletions(-) diff --git a/doc/DATABASE.md b/doc/DATABASE.md index ab6db4b7..23abd32d 100644 --- a/doc/DATABASE.md +++ b/doc/DATABASE.md @@ -149,7 +149,15 @@ The plugin runtime tracks plugin-owned database namespaces and migrations in `pl ## Backups -Paperclip supports automatic and manual database backups. See `doc/DEVELOPING.md` for the current `paperclipai db:backup` / `pnpm db:backup` commands and backup retention configuration. +Paperclip supports automatic and manual logical database backups. These dumps include +non-system database schemas such as `public`, the Drizzle migration journal, and +plugin-owned database schemas. See `doc/DEVELOPING.md` for the current +`paperclipai db:backup` / `pnpm db:backup` commands and backup retention +configuration. + +Database backups do not include non-database instance files such as local-disk +uploads, workspace files, or the local encrypted secrets master key. Back those paths +up separately when you need full instance disaster recovery. ## Secret storage diff --git a/doc/DEVELOPING.md b/doc/DEVELOPING.md index f221da39..d95bb04d 100644 --- a/doc/DEVELOPING.md +++ b/doc/DEVELOPING.md @@ -421,7 +421,9 @@ If you set `DATABASE_URL`, the server will use that instead of embedded PostgreS ## Automatic DB Backups -Paperclip can run automatic DB backups on a timer. Defaults: +Paperclip can run automatic logical database backups on a timer. These backups cover +non-system database schemas, including migration history and plugin-owned database +schemas. Defaults: - enabled - every 60 minutes @@ -449,6 +451,10 @@ Environment overrides: - `PAPERCLIP_DB_BACKUP_RETENTION_DAYS=` - `PAPERCLIP_DB_BACKUP_DIR=/absolute/or/~/path` +DB backups are not full instance filesystem backups. For full local disaster +recovery, also back up local storage files and the local encrypted secrets key if +those providers are enabled. + ## Secrets in Dev Agent env vars now support secret references. By default, secret values are stored with local encryption and only secret refs are persisted in agent config. diff --git a/packages/db/src/backup-lib.test.ts b/packages/db/src/backup-lib.test.ts index 6497cbe6..8a83a8f4 100644 --- a/packages/db/src/backup-lib.test.ts +++ b/packages/db/src/backup-lib.test.ts @@ -182,7 +182,135 @@ describeEmbeddedPostgres("runDatabaseBackup", () => { ); it( - "restores statements incrementally when backup comments precede the first breakpoint", + "backs up and restores non-public database schemas and migration history", + async () => { + const sourceConnectionString = await createTempDatabase(); + const restoreConnectionString = await createSiblingDatabase( + sourceConnectionString, + "paperclip_full_logical_restore_target", + ); + const backupDir = createTempDir("paperclip-db-full-logical-backup-"); + const sourceSql = postgres(sourceConnectionString, { max: 1, onnotice: () => {} }); + const restoreSql = postgres(restoreConnectionString, { max: 1, onnotice: () => {} }); + + try { + await sourceSql.unsafe(` + CREATE SCHEMA IF NOT EXISTS "drizzle"; + CREATE TABLE IF NOT EXISTS "drizzle"."__drizzle_migrations" ( + "id" serial PRIMARY KEY, + "hash" text NOT NULL, + "created_at" bigint + ); + INSERT INTO "drizzle"."__drizzle_migrations" ("hash", "created_at") + VALUES ('paperclip-migration-history', 1770000000000); + `); + await sourceSql.unsafe(` + CREATE TABLE "public"."backup_parent_records" ( + "id" uuid PRIMARY KEY, + "name" text NOT NULL + ); + INSERT INTO "public"."backup_parent_records" ("id", "name") + VALUES ('11111111-1111-4111-8111-111111111111', 'parent'); + `); + await sourceSql.unsafe(` + CREATE TABLE "public"."plugin_rows" ( + "id" serial PRIMARY KEY, + "note" text NOT NULL + ); + CREATE TABLE "public"."audit_rows" ( + "id" serial PRIMARY KEY, + "secret_note" text + ); + INSERT INTO "public"."plugin_rows" ("note") + VALUES ('public-collision'); + INSERT INTO "public"."audit_rows" ("secret_note") + VALUES ('public-secret'); + `); + await sourceSql.unsafe(` + CREATE SCHEMA "plugin_backup_scope"; + CREATE TYPE "plugin_backup_scope"."plugin_status" AS ENUM ('ready', 'done'); + CREATE TABLE "plugin_backup_scope"."plugin_rows" ( + "id" serial PRIMARY KEY, + "parent_id" uuid NOT NULL REFERENCES "public"."backup_parent_records"("id") ON DELETE CASCADE, + "status" "plugin_backup_scope"."plugin_status" NOT NULL, + "note" text NOT NULL + ); + CREATE TABLE "plugin_backup_scope"."audit_rows" ( + "id" serial PRIMARY KEY, + "secret_note" text + ); + CREATE UNIQUE INDEX "plugin_rows_note_uq" ON "plugin_backup_scope"."plugin_rows" ("note"); + INSERT INTO "plugin_backup_scope"."plugin_rows" ("parent_id", "status", "note") + VALUES ('11111111-1111-4111-8111-111111111111', 'ready', 'first'); + INSERT INTO "plugin_backup_scope"."audit_rows" ("secret_note") + VALUES ('plugin-secret'); + `); + + const result = await runDatabaseBackup({ + connectionString: sourceConnectionString, + backupDir, + retention: { dailyDays: 7, weeklyWeeks: 4, monthlyMonths: 1 }, + filenamePrefix: "paperclip-full-logical-test", + backupEngine: "javascript", + excludeTables: ["plugin_rows"], + nullifyColumns: { + audit_rows: ["secret_note"], + }, + }); + + await runDatabaseRestore({ + connectionString: restoreConnectionString, + backupFile: result.backupFile, + }); + + const migrationRows = await restoreSql.unsafe<{ hash: string }[]>(` + SELECT "hash" + FROM "drizzle"."__drizzle_migrations" + WHERE "hash" = 'paperclip-migration-history' + `); + expect(migrationRows).toEqual([{ hash: "paperclip-migration-history" }]); + + const pluginRows = await restoreSql.unsafe<{ note: string; status: string; parent_name: string }[]>(` + SELECT r."note", r."status"::text AS "status", p."name" AS "parent_name" + FROM "plugin_backup_scope"."plugin_rows" r + JOIN "public"."backup_parent_records" p ON p."id" = r."parent_id" + `); + expect(pluginRows).toEqual([{ note: "first", status: "ready", parent_name: "parent" }]); + + const publicCollisionRows = await restoreSql.unsafe<{ count: number }[]>(` + SELECT count(*)::int AS count + FROM "public"."plugin_rows" + `); + expect(publicCollisionRows[0]?.count).toBe(0); + + const publicAuditRows = await restoreSql.unsafe<{ secret_note: string | null }[]>(` + SELECT "secret_note" + FROM "public"."audit_rows" + `); + expect(publicAuditRows).toEqual([{ secret_note: null }]); + + const pluginAuditRows = await restoreSql.unsafe<{ secret_note: string | null }[]>(` + SELECT "secret_note" + FROM "plugin_backup_scope"."audit_rows" + `); + expect(pluginAuditRows).toEqual([{ secret_note: "plugin-secret" }]); + + await expect( + restoreSql.unsafe(` + INSERT INTO "plugin_backup_scope"."plugin_rows" ("parent_id", "status", "note") + VALUES ('11111111-1111-4111-8111-111111111111', 'done', 'first') + `), + ).rejects.toThrow(); + } finally { + await sourceSql.end(); + await restoreSql.end(); + } + }, + 60_000, + ); + + it( + "restores legacy public-only backups without migration history", async () => { const restoreConnectionString = await createTempDatabase(); const restoreSql = postgres(restoreConnectionString, { max: 1, onnotice: () => {} }); diff --git a/packages/db/src/backup-lib.ts b/packages/db/src/backup-lib.ts index 2662b6dc..4fb8d4fa 100644 --- a/packages/db/src/backup-lib.ts +++ b/packages/db/src/backup-lib.ts @@ -19,6 +19,11 @@ export type RunDatabaseBackupOptions = { retention: BackupRetentionPolicy; filenamePrefix?: string; connectTimeoutSeconds?: number; + /** + * @deprecated Migration-journal schemas are included with the normal backup + * scope. This option is kept for compatibility and no longer changes backup + * engine selection. + */ includeMigrationJournal?: boolean; excludeTables?: string[]; nullifyColumns?: Record; @@ -61,8 +66,6 @@ type ExtensionDefinition = { schema_name: string; }; -const DRIZZLE_SCHEMA = "drizzle"; -const DRIZZLE_MIGRATIONS_TABLE = "__drizzle_migrations"; const DEFAULT_BACKUP_WRITE_BUFFER_BYTES = 1024 * 1024; const BACKUP_DATA_CURSOR_ROWS = 100; const BACKUP_CLI_STDERR_BYTES = 64 * 1024; @@ -194,16 +197,22 @@ function formatSqlLiteral(value: string): string { function normalizeTableNameSet(values: string[] | undefined): Set { return new Set( (values ?? []) - .map((value) => value.trim()) + .map(normalizeTableSelector) .filter((value) => value.length > 0), ); } +function normalizeTableSelector(value: string): string { + const trimmed = value.trim(); + if (trimmed.length === 0) return ""; + return trimmed.includes(".") ? trimmed : tableKey("public", trimmed); +} + function normalizeNullifyColumnMap(values: Record | undefined): Map> { const out = new Map>(); if (!values) return out; for (const [tableName, columns] of Object.entries(values)) { - const normalizedTable = tableName.trim(); + const normalizedTable = normalizeTableSelector(tableName); if (normalizedTable.length === 0) continue; const normalizedColumns = new Set( columns @@ -229,9 +238,14 @@ function tableKey(schemaName: string, tableName: string): string { return `${schemaName}.${tableName}`; } +function nonSystemSchemaPredicate(identifier: string): string { + return `${identifier} NOT IN ('pg_catalog', 'information_schema') + AND ${identifier} NOT LIKE 'pg_toast%' + AND ${identifier} NOT LIKE 'pg_temp_%'`; +} + function hasBackupTransforms(opts: RunDatabaseBackupOptions): boolean { - return opts.includeMigrationJournal === true || - (opts.excludeTables?.length ?? 0) > 0 || + return (opts.excludeTables?.length ?? 0) > 0 || Object.keys(opts.nullifyColumns ?? {}).length > 0; } @@ -285,7 +299,6 @@ async function runPgDumpBackup(opts: { "--if-exists", "--no-owner", "--no-privileges", - "--schema=public", ], { stdio: ["ignore", "pipe", "pipe"], @@ -484,7 +497,6 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise const connectTimeout = Math.max(1, Math.trunc(opts.connectTimeoutSeconds ?? 5)); const backupEngine = opts.backupEngine ?? "auto"; const canUsePgDump = !hasBackupTransforms(opts); - const includeMigrationJournal = opts.includeMigrationJournal === true; const excludedTableNames = normalizeTableNameSet(opts.excludeTables); const nullifiedColumnsByTable = normalizeNullifyColumnMap(opts.nullifyColumns); let sql = postgres(opts.connectionString, { max: 1, connect_timeout: connectTimeout }); @@ -552,31 +564,24 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise SELECT table_schema AS schema_name, table_name AS tablename FROM information_schema.tables WHERE table_type = 'BASE TABLE' - AND ( - table_schema = 'public' - OR (${includeMigrationJournal}::boolean AND table_schema = ${DRIZZLE_SCHEMA} AND table_name = ${DRIZZLE_MIGRATIONS_TABLE}) - ) + AND ${sql.unsafe(nonSystemSchemaPredicate("table_schema"))} ORDER BY table_schema, table_name `; const tables = allTables; const includedTableNames = new Set(tables.map(({ schema_name, tablename }) => tableKey(schema_name, tablename))); + const includedSchemas = new Set(tables.map(({ schema_name }) => schema_name)); // Get all enums - const enums = await sql<{ typname: string; labels: string[] }[]>` - SELECT t.typname, array_agg(e.enumlabel ORDER BY e.enumsortorder) AS labels + const enums = await sql<{ schema_name: string; typname: string; labels: string[] }[]>` + SELECT n.nspname AS schema_name, t.typname, array_agg(e.enumlabel ORDER BY e.enumsortorder) AS labels FROM pg_type t JOIN pg_enum e ON t.oid = e.enumtypid JOIN pg_namespace n ON t.typnamespace = n.oid - WHERE n.nspname = 'public' - GROUP BY t.typname - ORDER BY t.typname + WHERE ${sql.unsafe(nonSystemSchemaPredicate("n.nspname"))} + GROUP BY n.nspname, t.typname + ORDER BY n.nspname, t.typname `; - - for (const e of enums) { - const labels = e.labels.map((l) => `'${l.replace(/'/g, "''")}'`).join(", "); - emitStatement(`CREATE TYPE "public"."${e.typname}" AS ENUM (${labels});`); - } - if (enums.length > 0) emit(""); + for (const e of enums) includedSchemas.add(e.schema_name); const allSequences = await sql` SELECT @@ -598,16 +603,14 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise LEFT JOIN pg_class tbl ON tbl.oid = dep.refobjid LEFT JOIN pg_namespace tblns ON tblns.oid = tbl.relnamespace LEFT JOIN pg_attribute attr ON attr.attrelid = tbl.oid AND attr.attnum = dep.refobjsubid - WHERE s.sequence_schema = 'public' - OR (${includeMigrationJournal}::boolean AND s.sequence_schema = ${DRIZZLE_SCHEMA}) + WHERE ${sql.unsafe(nonSystemSchemaPredicate("s.sequence_schema"))} ORDER BY s.sequence_schema, s.sequence_name `; const sequences = allSequences.filter( (seq) => !seq.owner_table || includedTableNames.has(tableKey(seq.owner_schema ?? "public", seq.owner_table)), ); - const schemas = new Set(); - for (const table of tables) schemas.add(table.schema_name); + const schemas = new Set(includedSchemas); for (const seq of sequences) schemas.add(seq.sequence_schema); const extraSchemas = [...schemas].filter((schemaName) => schemaName !== "public"); if (extraSchemas.length > 0) { @@ -618,6 +621,12 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise emit(""); } + for (const e of enums) { + const labels = e.labels.map((l) => `'${l.replace(/'/g, "''")}'`).join(", "); + emitStatement(`CREATE TYPE ${quoteQualifiedName(e.schema_name, e.typname)} AS ENUM (${labels});`); + } + if (enums.length > 0) emit(""); + const extensions = await sql` SELECT e.extname AS extension_name, @@ -655,6 +664,7 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise const columns = await sql<{ column_name: string; data_type: string; + udt_schema: string; udt_name: string; is_nullable: string; column_default: string | null; @@ -662,7 +672,7 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise numeric_precision: number | null; numeric_scale: number | null; }[]>` - SELECT column_name, data_type, udt_name, is_nullable, column_default, + SELECT column_name, data_type, udt_schema, udt_name, is_nullable, column_default, character_maximum_length, numeric_precision, numeric_scale FROM information_schema.columns WHERE table_schema = ${schema_name} AND table_name = ${tablename} @@ -676,9 +686,12 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise for (const col of columns) { let typeStr: string; if (col.data_type === "USER-DEFINED") { - typeStr = `"${col.udt_name}"`; + typeStr = quoteQualifiedName(col.udt_schema, col.udt_name); } else if (col.data_type === "ARRAY") { - typeStr = `${col.udt_name.replace(/^_/, "")}[]`; + const elementType = col.udt_name.replace(/^_/, ""); + typeStr = col.udt_schema === "pg_catalog" + ? `${elementType}[]` + : `${quoteQualifiedName(col.udt_schema, elementType)}[]`; } else if (col.data_type === "character varying") { typeStr = col.character_maximum_length ? `varchar(${col.character_maximum_length})` @@ -761,10 +774,8 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise JOIN pg_namespace tgtn ON tgtn.oid = tgt.relnamespace JOIN pg_attribute sa ON sa.attrelid = src.oid AND sa.attnum = ANY(c.conkey) JOIN pg_attribute ta ON ta.attrelid = tgt.oid AND ta.attnum = ANY(c.confkey) - WHERE c.contype = 'f' AND ( - srcn.nspname = 'public' - OR (${includeMigrationJournal}::boolean AND srcn.nspname = ${DRIZZLE_SCHEMA}) - ) + WHERE c.contype = 'f' + AND ${sql.unsafe(nonSystemSchemaPredicate("srcn.nspname"))} GROUP BY c.conname, srcn.nspname, src.relname, tgtn.nspname, tgt.relname, c.confupdtype, c.confdeltype ORDER BY srcn.nspname, src.relname, c.conname `; @@ -800,10 +811,8 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise JOIN pg_class t ON t.oid = c.conrelid JOIN pg_namespace n ON n.oid = t.relnamespace JOIN pg_attribute a ON a.attrelid = t.oid AND a.attnum = ANY(c.conkey) - WHERE c.contype = 'u' AND ( - n.nspname = 'public' - OR (${includeMigrationJournal}::boolean AND n.nspname = ${DRIZZLE_SCHEMA}) - ) + WHERE c.contype = 'u' + AND ${sql.unsafe(nonSystemSchemaPredicate("n.nspname"))} GROUP BY c.conname, n.nspname, t.relname ORDER BY n.nspname, t.relname, c.conname `; @@ -822,10 +831,7 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise const allIndexes = await sql<{ schema_name: string; tablename: string; indexdef: string }[]>` SELECT schemaname AS schema_name, tablename, indexdef FROM pg_indexes - WHERE ( - schemaname = 'public' - OR (${includeMigrationJournal}::boolean AND schemaname = ${DRIZZLE_SCHEMA}) - ) + WHERE ${sql.unsafe(nonSystemSchemaPredicate("schemaname"))} AND indexname NOT IN ( SELECT conname FROM pg_constraint c JOIN pg_namespace n ON n.oid = c.connamespace @@ -845,9 +851,10 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise // Dump data for each table for (const { schema_name, tablename } of tables) { + const currentTableKey = tableKey(schema_name, tablename); const qualifiedTableName = quoteQualifiedName(schema_name, tablename); const count = await sql.unsafe<{ n: number }[]>(`SELECT count(*)::int AS n FROM ${qualifiedTableName}`); - if (excludedTableNames.has(tablename) || (count[0]?.n ?? 0) === 0) continue; + if (excludedTableNames.has(currentTableKey) || (count[0]?.n ?? 0) === 0) continue; // Get column info for this table const cols = await sql<{ column_name: string; data_type: string }[]>` @@ -860,7 +867,7 @@ export async function runDatabaseBackup(opts: RunDatabaseBackupOptions): Promise emit(`-- Data for: ${schema_name}.${tablename} (${count[0]!.n} rows)`); - const nullifiedColumns = nullifiedColumnsByTable.get(tablename) ?? new Set(); + const nullifiedColumns = nullifiedColumnsByTable.get(currentTableKey) ?? new Set(); if (backupEngine !== "javascript" && nullifiedColumns.size === 0) { emit(`COPY ${qualifiedTableName} (${colNames}) FROM stdin;`); await writer.writeRaw("\n"); diff --git a/ui/src/components/IssueChatThread.tsx b/ui/src/components/IssueChatThread.tsx index 6a89c073..80d9cfdc 100644 --- a/ui/src/components/IssueChatThread.tsx +++ b/ui/src/components/IssueChatThread.tsx @@ -3101,6 +3101,7 @@ export function IssueChatThread({ const lastUserMessageIdRef = useRef(null); const spacerBaselineAnchorRef = useRef(null); const spacerInitialReserveRef = useRef(0); + const latestSettleTimeoutsRef = useRef([]); const [bottomSpacerHeight, setBottomSpacerHeight] = useState(0); const displayLiveRuns = useMemo(() => { const deduped = new Map(); @@ -3141,6 +3142,15 @@ export function IssueChatThread({ } return ids; }, [displayLiveRuns]); + const clearLatestSettleTimeouts = useCallback(() => { + for (const timeout of latestSettleTimeoutsRef.current) { + window.clearTimeout(timeout); + } + latestSettleTimeoutsRef.current = []; + }, []); + + useEffect(() => clearLatestSettleTimeouts, [clearLatestSettleTimeouts]); + const { transcriptByRun, hasOutputForRun } = useLiveRunTranscripts({ runs: enableLiveTranscriptPolling ? transcriptRuns : [], companyId, @@ -3383,9 +3393,11 @@ export function IssueChatThread({ if (typeof window === "undefined") return; + clearLatestSettleTimeouts(); const settleDelays = [380, 760, 1140]; settleDelays.forEach((delay) => { - window.setTimeout(() => { + const timeout = window.setTimeout(() => { + if (typeof document === "undefined") return; const el = document.getElementById(latestCommentAnchor); if (el) { el.scrollIntoView({ behavior: "smooth", block: "end" }); @@ -3399,6 +3411,7 @@ export function IssueChatThread({ behavior: "auto", }); }, delay); + latestSettleTimeoutsRef.current.push(timeout); }); }