diff --git a/packages/db/package.json b/packages/db/package.json index 4cdd0d9..97a593e 100644 --- a/packages/db/package.json +++ b/packages/db/package.json @@ -18,7 +18,10 @@ "scripts": { "build": "tsc --project .", "generate": "drizzle-kit generate", + "pre-migrate": "node ./scripts/wait-for-db.mjs", "migrate": "drizzle-kit migrate", + "pre-seed": "node ./scripts/wait-for-db.mjs", + "pre-reset": "node ./scripts/wait-for-db.mjs", "seed": "tsx src/seed.ts", "reset": "tsx src/reset.ts && drizzle-kit migrate && tsx src/seed.ts", "studio": "drizzle-kit studio", diff --git a/packages/db/scripts/wait-for-db.mjs b/packages/db/scripts/wait-for-db.mjs new file mode 100644 index 0000000..04d9d9e --- /dev/null +++ b/packages/db/scripts/wait-for-db.mjs @@ -0,0 +1,104 @@ +#!/usr/bin/env node +// wait-for-db.mjs +// +// GRO-2163: wait for / retry DNS resolution of the database hostname derived +// from DATABASE_URL before invoking `drizzle-kit migrate`. The first attempt +// of a fresh migrate-schema pod occasionally hits a transient CoreDNS miss +// (EAI_AGAIN) on `groombook-postgres-rw..svc`; with backoffLimit: 2 the +// retry pod usually wins, but three unlucky attempts in a row trips +// BackoffLimitExceeded. Resolving once here, with backoff, removes the dice +// roll at the source so the first attempt reliably succeeds. +// +// Mirrors the belt-and-braces pattern used in GRO-1985 (no Corepack +// download fallback): we don't try to outsmart CoreDNS, we just don't ask +// drizzle-kit to do the very first DNS lookup of a freshly-scheduled pod. +// +// Configuration (env): +// WAIT_FOR_DB_MAX_ATTEMPTS default 12 (~30s of total wait at default backoff) +// WAIT_FOR_DB_BASE_DELAY_MS default 500 +// WAIT_FOR_DB_MAX_DELAY_MS default 5000 +// WAIT_FOR_DB_SKIP default unset; set to "1" to skip (debug only) +// +// On success: exit 0. On exhaustion: exit 1 so the Job's backoff is +// preserved (we don't want to silently mask a real outage by giving up +// after 30s and letting drizzle-kit fail with a less-actionable error). + +import { setTimeout as delay } from "node:timers/promises"; +import dns from "node:dns/promises"; + +const MAX_ATTEMPTS = Number(process.env.WAIT_FOR_DB_MAX_ATTEMPTS ?? 12); +const BASE_DELAY_MS = Number(process.env.WAIT_FOR_DB_BASE_DELAY_MS ?? 500); +const MAX_DELAY_MS = Number(process.env.WAIT_FOR_DB_MAX_DELAY_MS ?? 5000); + +function parseHost(databaseUrl) { + try { + return new URL(databaseUrl).hostname || null; + } catch { + return null; + } +} + +async function resolveOnce(host) { + const start = Date.now(); + const result = await dns.lookup(host); + return { address: result.address, ms: Date.now() - start }; +} + +async function main() { + if (process.env.WAIT_FOR_DB_SKIP === "1") { + console.log("[wait-for-db] WAIT_FOR_DB_SKIP=1, skipping"); + return; + } + const databaseUrl = process.env.DATABASE_URL; + if (!databaseUrl) { + // Don't gate the migrate on a misconfigured env — let drizzle-kit fail + // loudly with its own clear error. + console.warn("[wait-for-db] DATABASE_URL not set; skipping"); + return; + } + const host = parseHost(databaseUrl); + if (!host) { + console.warn(`[wait-for-db] could not parse hostname from DATABASE_URL; skipping`); + return; + } + console.log( + `[wait-for-db] host=${host} max_attempts=${MAX_ATTEMPTS} ` + + `base_delay_ms=${BASE_DELAY_MS} max_delay_ms=${MAX_DELAY_MS}`, + ); + + for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) { + try { + const { address, ms } = await resolveOnce(host); + console.log(`[wait-for-db] ok attempt=${attempt} host=${host} -> ${address} (${ms}ms)`); + return; + } catch (err) { + const code = err?.code ?? "UNKNOWN"; + const transient = code === "EAI_AGAIN" || code === "ENOTFOUND" || code === "EAI_NODATA"; + if (!transient) { + // Hard error (e.g. invalid hostname): surface and let drizzle-kit fail + // with a real error rather than spinning. + console.error(`[wait-for-db] non-transient DNS error attempt=${attempt} code=${code}: ${err.message}`); + process.exit(1); + } + if (attempt === MAX_ATTEMPTS) { + console.error( + `[wait-for-db] exhausted attempts=${MAX_ATTEMPTS} host=${host} last_code=${code}; exiting 1`, + ); + process.exit(1); + } + const backoff = Math.min( + MAX_DELAY_MS, + BASE_DELAY_MS * 2 ** (attempt - 1) + Math.floor(Math.random() * BASE_DELAY_MS), + ); + console.log( + `[wait-for-db] transient attempt=${attempt} code=${code} retry_in_ms=${backoff}`, + ); + await delay(backoff); + } + } +} + +main().catch((err) => { + console.error(`[wait-for-db] fatal: ${err?.message ?? err}`); + process.exit(1); +});