fix(GRO-2139): serialize the entire reset→migrate→seed chain under the seed advisory lock
CI / Test (pull_request) Successful in 10s
CI / Lint & Typecheck (pull_request) Successful in 15s
CI / Build & Push Docker Images (pull_request) Successful in 1m21s

The dev reset-demo-data CronJob intermittently produced one Error pod per
run with `invoices_pkey` duplicate-key violations. The CTO analysis
(traced in GRO-2136) concluded the race is between the reset image's
three-step chain and a concurrent same-PRNG seeder (the dev
seed-test-data Job being recreated at the top of the hour by Flux).

GRO-2123 added `pg_advisory_lock(0x47524f4f)` around `runSeedBody`,
but `reset.ts` (DROP TABLE … CASCADE) and `drizzle-kit migrate`
ran as separate processes outside that lock — so a concurrent locked
seed could still interleave with the reset's drop+recreate, leaving
two same-seed writers emitting identical invoice ids (the
Mulberry32(seed=42) stream is fully deterministic per process).

This commit makes the whole chain a single locked unit:

- `reset.ts` now takes the same advisory lock and runs DROP → migrate
  → runSeedBody under a single Postgres session (max: 1). The lock
  spans the entire chain, so any concurrent `seed.ts` invocation
  (via the seed-test-data Job or CI) blocks until the reset finishes.
- `packages/db/package.json` `reset` script is now a single
  `tsx src/reset.ts` invocation — `drizzle-kit migrate` no longer
  runs as a separate un-locked process.
- `withSeedAdvisoryLock`, `runSeedBody`, `getProfile`, `profiles`,
  `SEED_ADVISORY_LOCK_KEY`, and the `SeedProfile`/`ProfileConfig`
  types are now exported from `seed.ts` so `reset.ts` can use them
  while preserving the deterministic seed contract.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Flea Flicker
2026-06-05 05:11:13 +00:00
parent 93be4d8f72
commit 1a4c02476d
3 changed files with 112 additions and 45 deletions
+1 -1
View File
@@ -20,7 +20,7 @@
"generate": "drizzle-kit generate",
"migrate": "drizzle-kit migrate",
"seed": "tsx src/seed.ts",
"reset": "tsx src/reset.ts && drizzle-kit migrate && tsx src/seed.ts",
"reset": "tsx src/reset.ts",
"studio": "drizzle-kit studio",
"typecheck": "tsc --noEmit"
},
+103 -38
View File
@@ -1,13 +1,51 @@
/**
* reset.ts — Drop all application tables and re-run migrations + seed.
* reset.ts — Drop all application tables, re-run migrations, and re-seed.
*
* Intended for local development only. Never run against production.
*
* Usage:
* DATABASE_URL=postgres://... npx tsx packages/db/src/reset.ts
*
* GRO-2139: the entire drop→migrate→seed chain runs inside a single
* Postgres advisory lock (SEED_ADVISORY_LOCK_KEY) so a concurrent
* `seed.ts` (e.g. the dev `seed-test-data-*` Job being recreated at
* the top of the hour) cannot interleave between `reset.ts` (DROP)
* and `seed.ts` (TRUNCATE+insert) and collide on `invoices_pkey`.
*
* Why this matters: `seed.ts` derives every primary key from a single
* shared Mulberry32 PRNG seeded with 42 (see `createPrng(42)` and
* `uuid()` in seed.ts). Two concurrent same-profile seeders therefore
* emit *identical* ids for the same logical row, and any moment
* between a concurrent `seed.ts` TRUNCATE and INSERT is exactly the
* window in which the second seeder's INSERT can hit a pkey already
* taken by the first. Pre-GRO-2123 this raced unconditionally;
* GRO-2123 added the advisory lock around `runSeedBody` but left
* `reset.ts` and `drizzle-kit migrate` outside the lock. This script
* now wraps the *whole* chain in the same lock, using `max: 1` so the
* single Postgres session that holds the lock is the one that runs
* the DROP, the migrations, and the seed body.
*
* See: groombook/infra `apps/base/reset-cronjob.yaml` (CronJob) and
* `apps/base/seed-job.yaml` (one-shot Job) — both invoke the same
* `seed.ts` code path on the same database in `groombook-dev`.
*/
import postgres from "postgres";
import { drizzle } from "drizzle-orm/postgres-js";
import { migrate } from "drizzle-orm/postgres-js/migrator";
import { fileURLToPath } from "node:url";
import { dirname, resolve } from "node:path";
import * as schema from "./schema.js";
import {
SEED_ADVISORY_LOCK_KEY,
withSeedAdvisoryLock,
getProfile,
runSeedBody,
profiles,
} from "./seed.js";
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const MIGRATIONS_FOLDER = resolve(__dirname, "../migrations");
async function reset() {
const url = process.env.DATABASE_URL;
@@ -16,52 +54,79 @@ async function reset() {
process.exit(1);
}
if (process.env.NODE_ENV === "production" && process.env.ALLOW_RESET !== "true") {
console.error("[FATAL] db:reset must not be run in production without ALLOW_RESET=true.");
if (
process.env.NODE_ENV === "production" &&
process.env.ALLOW_RESET !== "true"
) {
console.error(
"[FATAL] db:reset must not be run in production without ALLOW_RESET=true.",
);
process.exit(1);
}
// max: 1 so the advisory lock and every DROP / migrate / seed query
// share a single Postgres session. With max > 1 postgres-js could
// route a query to a different pooled connection that does NOT hold
// the lock, defeating the point of "lock spans the whole chain".
const client = postgres(url, { max: 1 });
const db = drizzle(client, { schema });
console.log("Dropping all application tables...\n");
try {
await withSeedAdvisoryLock(client, async () => {
console.log("Dropping all application tables...\n");
// Drop in dependency order (children before parents)
await client`
DO $$ DECLARE
r RECORD;
BEGIN
FOR r IN (
SELECT tablename FROM pg_tables
WHERE schemaname = 'public'
) LOOP
EXECUTE 'DROP TABLE IF EXISTS public.' || quote_ident(r.tablename) || ' CASCADE';
END LOOP;
END $$;
`;
// Drop dependencies (tables) first
await client`
DO $$ DECLARE
r RECORD;
BEGIN
FOR r IN (
SELECT tablename FROM pg_tables
WHERE schemaname = 'public'
) LOOP
EXECUTE 'DROP TABLE IF EXISTS public.' || quote_ident(r.tablename) || ' CASCADE';
END LOOP;
END $$;
`;
// Drop custom enums
await client`
DO $$ DECLARE
r RECORD;
BEGIN
FOR r IN (
SELECT typname FROM pg_type
WHERE typtype = 'e' AND typnamespace = (
SELECT oid FROM pg_namespace WHERE nspname = 'public'
)
) LOOP
EXECUTE 'DROP TYPE IF EXISTS ' || quote_ident(r.typname) || ' CASCADE';
END LOOP;
END $$;
`;
// Drop custom enums
await client`
DO $$ DECLARE
r RECORD;
BEGIN
FOR r IN (
SELECT typname FROM pg_type
WHERE typtype = 'e' AND typnamespace = (
SELECT oid FROM pg_namespace WHERE nspname = 'public'
)
) LOOP
EXECUTE 'DROP TYPE IF EXISTS ' || quote_ident(r.typname) || ' CASCADE';
END LOOP;
END $$;
`;
// Drop the drizzle migrations tracking table
await client`DROP TABLE IF EXISTS drizzle.__drizzle_migrations CASCADE`;
await client`DROP SCHEMA IF EXISTS drizzle CASCADE`;
// Drop the drizzle migrations tracking table
await client`DROP TABLE IF EXISTS drizzle.__drizzle_migrations CASCADE`;
await client`DROP SCHEMA IF EXISTS drizzle CASCADE`;
console.log("✓ All tables and enums dropped\n");
console.log("✓ All tables and enums dropped\n");
await client.end();
console.log("Running migrations...");
await migrate(db, { migrationsFolder: MIGRATIONS_FOLDER });
console.log("✓ Migrations applied\n");
console.log("Seeding database...");
const profile = getProfile();
const cfg = profiles[profile];
await runSeedBody(client, db, profile, cfg);
});
console.log(
`\n✓ Reset complete (advisory lock key=0x${SEED_ADVISORY_LOCK_KEY.toString(16)})`,
);
} finally {
await client.end();
}
}
reset().catch((err) => {
+8 -6
View File
@@ -24,9 +24,9 @@ import type { MedicalAlert } from "@groombook/types";
// ── Seed profile configuration ─────────────────────────────────────────────
type SeedProfile = "dev" | "uat" | "demo";
export type SeedProfile = "dev" | "uat" | "demo";
interface ProfileConfig {
export interface ProfileConfig {
staffCount: { manager: number; receptionist: number; groomer: number; bather: number };
clientCount: number;
appointmentsBackDays: number;
@@ -35,7 +35,7 @@ interface ProfileConfig {
includeUatClients: boolean;
}
const profiles: Record<SeedProfile, ProfileConfig> = {
export const profiles: Record<SeedProfile, ProfileConfig> = {
dev: {
staffCount: { manager: 1, receptionist: 1, groomer: 2, bather: 0 },
clientCount: 100,
@@ -70,6 +70,8 @@ function getProfile(): SeedProfile {
return "uat";
}
export { getProfile };
// ── Deterministic PRNG (Mulberry32) ──────────────────────────────────────────
/**
@@ -989,7 +991,7 @@ async function seedKnownUsers() {
// from runbooks without ambiguity and binds to the single-argument
// `pg_advisory_lock(int)` form, which postgres-js serializes as a plain
// number (no bigint type plumbing required).
const SEED_ADVISORY_LOCK_KEY = 0x47524f4f; // "GROO" in ASCII — arbitrary, stable
export const SEED_ADVISORY_LOCK_KEY = 0x47524f4f; // "GROO" in ASCII — arbitrary, stable
/**
* Reserve a dedicated connection from `pool`, take the seed advisory lock
@@ -1002,7 +1004,7 @@ const SEED_ADVISORY_LOCK_KEY = 0x47524f4f; // "GROO" in ASCII — arbitrary, sta
* for the lock and release it from the same reserved connection. The
* seed work itself still runs on the pooled connections.
*/
async function withSeedAdvisoryLock<T>(
export async function withSeedAdvisoryLock<T>(
pool: ReturnType<typeof postgres>,
fn: () => Promise<T>,
): Promise<T> {
@@ -1060,7 +1062,7 @@ async function seed() {
await client.end();
}
async function runSeedBody(
export async function runSeedBody(
client: ReturnType<typeof postgres>,
db: ReturnType<typeof drizzle>,
profile: SeedProfile,