From 1e0747324d5c1c74033d0a1323d6f472573c0bc2 Mon Sep 17 00:00:00 2001 From: Flea Flicker <22+gb_flea@noreply.git.farh.net> Date: Tue, 9 Jun 2026 08:44:58 +0000 Subject: [PATCH] =?UTF-8?q?fix(GRO-2139):=20serialize=20reset=E2=86=92migr?= =?UTF-8?q?ate=E2=86=92seed=20under=20the=20seed=20advisory=20lock=20(#160?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Serialize the entire db:reset chain (DROP → migrate → seed) inside one withSeedAdvisoryLock callback so a concurrent same-PRNG seeder cannot interleave and collide on invoices_pkey. Pool sized max:6 (1 reserved for the lock + work headroom) to avoid the connection-starvation deadlock the CTO caught. Verified with three end-to-end live db:reset runs against a throwaway Postgres. cc @cpfarhood --- packages/db/package.json | 2 +- packages/db/src/reset.ts | 153 +++++++++++++++++++++++++++++---------- packages/db/src/seed.ts | 14 ++-- 3 files changed, 123 insertions(+), 46 deletions(-) diff --git a/packages/db/package.json b/packages/db/package.json index 7f97370..cb3811f 100644 --- a/packages/db/package.json +++ b/packages/db/package.json @@ -21,7 +21,7 @@ "wait-for-db": "node ./scripts/wait-for-db.mjs", "migrate": "node ./scripts/wait-for-db.mjs && drizzle-kit migrate", "seed": "node ./scripts/wait-for-db.mjs && tsx src/seed.ts", - "reset": "node ./scripts/wait-for-db.mjs && tsx src/reset.ts && drizzle-kit migrate && tsx src/seed.ts", + "reset": "node ./scripts/wait-for-db.mjs && tsx src/reset.ts", "studio": "drizzle-kit studio", "typecheck": "tsc --noEmit" }, diff --git a/packages/db/src/reset.ts b/packages/db/src/reset.ts index 41c3ce8..fb88e20 100644 --- a/packages/db/src/reset.ts +++ b/packages/db/src/reset.ts @@ -1,13 +1,52 @@ /** - * reset.ts — Drop all application tables and re-run migrations + seed. + * reset.ts — Drop all application tables, re-run migrations, and re-seed. * * Intended for local development only. Never run against production. * * Usage: * DATABASE_URL=postgres://... npx tsx packages/db/src/reset.ts + * + * GRO-2139: the entire drop→migrate→seed chain runs inside a single + * Postgres advisory lock (SEED_ADVISORY_LOCK_KEY) so a concurrent + * `seed.ts` (e.g. the dev `seed-test-data-*` Job being recreated at + * the top of the hour) cannot interleave between `reset.ts` (DROP) + * and `seed.ts` (TRUNCATE+insert) and collide on `invoices_pkey`. + * + * Why this matters: `seed.ts` derives every primary key from a single + * shared Mulberry32 PRNG seeded with 42 (see `createPrng(42)` and + * `uuid()` in seed.ts). Two concurrent same-profile seeders therefore + * emit *identical* ids for the same logical row, and any moment + * between a concurrent `seed.ts` TRUNCATE and INSERT is exactly the + * window in which the second seeder's INSERT can hit a pkey already + * taken by the first. Pre-GRO-2123 this raced unconditionally; + * GRO-2123 added the advisory lock around `runSeedBody` but left + * `reset.ts` and `drizzle-kit migrate` outside the lock. This script + * now wraps the *whole* chain in the same lock: `withSeedAdvisoryLock` + * pins the lock to one reserved session and the DROP → migrate → seed + * work runs on the rest of the pool, so the lock guarantees mutual + * exclusion against any concurrent seeder for the entire chain. + * + * See: groombook/infra `apps/base/reset-cronjob.yaml` (CronJob) and + * `apps/base/seed-job.yaml` (one-shot Job) — both invoke the same + * `seed.ts` code path on the same database in `groombook-dev`. */ - import postgres from "postgres"; +import { drizzle } from "drizzle-orm/postgres-js"; +import { migrate } from "drizzle-orm/postgres-js/migrator"; +import { fileURLToPath } from "node:url"; +import { dirname, resolve } from "node:path"; +import * as schema from "./schema.js"; +import { + SEED_ADVISORY_LOCK_KEY, + withSeedAdvisoryLock, + getProfile, + runSeedBody, + profiles, +} from "./seed.js"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); +const MIGRATIONS_FOLDER = resolve(__dirname, "../migrations"); async function reset() { const url = process.env.DATABASE_URL; @@ -16,52 +55,88 @@ async function reset() { process.exit(1); } - if (process.env.NODE_ENV === "production" && process.env.ALLOW_RESET !== "true") { - console.error("[FATAL] db:reset must not be run in production without ALLOW_RESET=true."); + if ( + process.env.NODE_ENV === "production" && + process.env.ALLOW_RESET !== "true" + ) { + console.error( + "[FATAL] db:reset must not be run in production without ALLOW_RESET=true.", + ); process.exit(1); } - const client = postgres(url, { max: 1 }); + // Pool sizing is load-bearing here. `withSeedAdvisoryLock` does + // `pool.reserve()` to pin the advisory lock to one dedicated session + // (a session-level lock released on a *different* pooled connection is + // a no-op), and the DROP / migrate / seed work then runs on the + // *remaining* pooled connections. The lock provides mutual exclusion + // across processes regardless of how many connections the work uses — + // it does NOT require the work to share the lock's session. + // + // Therefore `max` must be ≥ 2: 1 reserved for the lock + ≥1 free for + // the work. `max: 1` would let `reserve()` consume the only connection + // and every query inside the callback would block forever waiting for + // a connection that never frees (connection-starvation deadlock). We + // use `max: 6` to match `seed()`'s headroom (1 reserved + 5 work). + const client = postgres(url, { max: 6 }); + const db = drizzle(client, { schema }); - console.log("Dropping all application tables...\n"); + try { + await withSeedAdvisoryLock(client, async () => { + console.log("Dropping all application tables...\n"); - // Drop in dependency order (children before parents) - await client` - DO $$ DECLARE - r RECORD; - BEGIN - FOR r IN ( - SELECT tablename FROM pg_tables - WHERE schemaname = 'public' - ) LOOP - EXECUTE 'DROP TABLE IF EXISTS public.' || quote_ident(r.tablename) || ' CASCADE'; - END LOOP; - END $$; - `; + // Drop dependencies (tables) first + await client` + DO $$ DECLARE + r RECORD; + BEGIN + FOR r IN ( + SELECT tablename FROM pg_tables + WHERE schemaname = 'public' + ) LOOP + EXECUTE 'DROP TABLE IF EXISTS public.' || quote_ident(r.tablename) || ' CASCADE'; + END LOOP; + END $$; + `; - // Drop custom enums - await client` - DO $$ DECLARE - r RECORD; - BEGIN - FOR r IN ( - SELECT typname FROM pg_type - WHERE typtype = 'e' AND typnamespace = ( - SELECT oid FROM pg_namespace WHERE nspname = 'public' - ) - ) LOOP - EXECUTE 'DROP TYPE IF EXISTS ' || quote_ident(r.typname) || ' CASCADE'; - END LOOP; - END $$; - `; + // Drop custom enums + await client` + DO $$ DECLARE + r RECORD; + BEGIN + FOR r IN ( + SELECT typname FROM pg_type + WHERE typtype = 'e' AND typnamespace = ( + SELECT oid FROM pg_namespace WHERE nspname = 'public' + ) + ) LOOP + EXECUTE 'DROP TYPE IF EXISTS ' || quote_ident(r.typname) || ' CASCADE'; + END LOOP; + END $$; + `; - // Drop the drizzle migrations tracking table - await client`DROP TABLE IF EXISTS drizzle.__drizzle_migrations CASCADE`; - await client`DROP SCHEMA IF EXISTS drizzle CASCADE`; + // Drop the drizzle migrations tracking table + await client`DROP TABLE IF EXISTS drizzle.__drizzle_migrations CASCADE`; + await client`DROP SCHEMA IF EXISTS drizzle CASCADE`; - console.log("✓ All tables and enums dropped\n"); + console.log("✓ All tables and enums dropped\n"); - await client.end(); + console.log("Running migrations..."); + await migrate(db, { migrationsFolder: MIGRATIONS_FOLDER }); + console.log("✓ Migrations applied\n"); + + console.log("Seeding database..."); + const profile = getProfile(); + const cfg = profiles[profile]; + await runSeedBody(client, db, profile, cfg); + }); + + console.log( + `\n✓ Reset complete (advisory lock key=0x${SEED_ADVISORY_LOCK_KEY.toString(16)})`, + ); + } finally { + await client.end(); + } } reset().catch((err) => { diff --git a/packages/db/src/seed.ts b/packages/db/src/seed.ts index 55b2ee4..b519c04 100644 --- a/packages/db/src/seed.ts +++ b/packages/db/src/seed.ts @@ -24,9 +24,9 @@ import type { MedicalAlert } from "@groombook/types"; // ── Seed profile configuration ───────────────────────────────────────────── -type SeedProfile = "dev" | "uat" | "demo"; +export type SeedProfile = "dev" | "uat" | "demo"; -interface ProfileConfig { +export interface ProfileConfig { staffCount: { manager: number; receptionist: number; groomer: number; bather: number }; clientCount: number; appointmentsBackDays: number; @@ -35,7 +35,7 @@ interface ProfileConfig { includeUatClients: boolean; } -const profiles: Record = { +export const profiles: Record = { dev: { staffCount: { manager: 1, receptionist: 1, groomer: 2, bather: 0 }, clientCount: 100, @@ -70,6 +70,8 @@ function getProfile(): SeedProfile { return "uat"; } +export { getProfile }; + // ── Deterministic PRNG (Mulberry32) ────────────────────────────────────────── /** @@ -1194,7 +1196,7 @@ async function seedKnownUsers() { // from runbooks without ambiguity and binds to the single-argument // `pg_advisory_lock(int)` form, which postgres-js serializes as a plain // number (no bigint type plumbing required). -const SEED_ADVISORY_LOCK_KEY = 0x47524f4f; // "GROO" in ASCII — arbitrary, stable +export const SEED_ADVISORY_LOCK_KEY = 0x47524f4f; // "GROO" in ASCII — arbitrary, stable /** * Reserve a dedicated connection from `pool`, take the seed advisory lock @@ -1207,7 +1209,7 @@ const SEED_ADVISORY_LOCK_KEY = 0x47524f4f; // "GROO" in ASCII — arbitrary, sta * for the lock and release it from the same reserved connection. The * seed work itself still runs on the pooled connections. */ -async function withSeedAdvisoryLock( +export async function withSeedAdvisoryLock( pool: ReturnType, fn: () => Promise, ): Promise { @@ -1265,7 +1267,7 @@ async function seed() { await client.end(); } -async function runSeedBody( +export async function runSeedBody( client: ReturnType, db: ReturnType, profile: SeedProfile,