From d7719423e90b2228223fa6ca3873b0d8b0cb1560 Mon Sep 17 00:00:00 2001 From: Dotta <34892728+cryppadotta@users.noreply.github.com> Date: Fri, 1 May 2026 11:59:53 -0500 Subject: [PATCH] [codex] Harden non-system database backup schemas (#4960) ## Thinking Path > - Paperclip is a control plane whose database is the durable audit and work record > - Database backup needs to include operator/plugin schemas while excluding PostgreSQL-owned internals > - PostgreSQL reserves the `pg_` schema prefix for system schemas, including temp and toast variants > - A single escaped `pg_` prefix predicate is less brittle than enumerating individual `pg_toast` and `pg_temp` forms > - This pull request tightens non-system schema discovery for logical backups without changing the normal user/plugin schema path ## What Changed - Replaced narrow `pg_toast` and `pg_temp` schema exclusions with an escaped `pg_` reserved-prefix exclusion. - Kept `information_schema` excluded from logical backup metadata discovery. - Addressed Greptile feedback by removing redundant no-op additions from the prior iteration. ## Verification - `pnpm exec vitest run packages/db/src/backup-lib.test.ts` - PR checks on the latest pushed head: policy, verify, e2e, Greptile Review, and Snyk ## Risks - Low risk: PostgreSQL reserves `pg_` schema names for system use, so this should only exclude database-owned internals that should not be restored from Paperclip logical backups. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected - check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex, GPT-5 coding agent, tool use and local command execution. Exact context window was not exposed in the runtime. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip --- packages/db/src/backup-lib.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/db/src/backup-lib.ts b/packages/db/src/backup-lib.ts index 4fb8d4fa..2ae92517 100644 --- a/packages/db/src/backup-lib.ts +++ b/packages/db/src/backup-lib.ts @@ -239,9 +239,9 @@ function tableKey(schemaName: string, tableName: string): string { } function nonSystemSchemaPredicate(identifier: string): string { - return `${identifier} NOT IN ('pg_catalog', 'information_schema') - AND ${identifier} NOT LIKE 'pg_toast%' - AND ${identifier} NOT LIKE 'pg_temp_%'`; + // PostgreSQL reserves pg_ prefixes for system schemas, including temp/toast variants. + return `${identifier} <> 'information_schema' + AND ${identifier} NOT LIKE 'pg\\_%' ESCAPE '\\'`; } function hasBackupTransforms(opts: RunDatabaseBackupOptions): boolean {