fix(db): wait for/retry DB DNS resolution before drizzle-kit migrate (GRO-2163) #161
@@ -18,9 +18,10 @@
|
||||
"scripts": {
|
||||
"build": "tsc --project .",
|
||||
"generate": "drizzle-kit generate",
|
||||
"migrate": "drizzle-kit migrate",
|
||||
"seed": "tsx src/seed.ts",
|
||||
"reset": "tsx src/reset.ts && drizzle-kit migrate && tsx src/seed.ts",
|
||||
"wait-for-db": "node ./scripts/wait-for-db.mjs",
|
||||
"migrate": "node ./scripts/wait-for-db.mjs && drizzle-kit migrate",
|
||||
"seed": "node ./scripts/wait-for-db.mjs && tsx src/seed.ts",
|
||||
"reset": "node ./scripts/wait-for-db.mjs && tsx src/reset.ts && drizzle-kit migrate && tsx src/seed.ts",
|
||||
"studio": "drizzle-kit studio",
|
||||
"typecheck": "tsc --noEmit"
|
||||
},
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env node
|
||||
// wait-for-db.mjs
|
||||
//
|
||||
// GRO-2163: wait for / retry DNS resolution of the database hostname derived
|
||||
// from DATABASE_URL before invoking `drizzle-kit migrate`. The first attempt
|
||||
// of a fresh migrate-schema pod occasionally hits a transient CoreDNS miss
|
||||
// (EAI_AGAIN) on `groombook-postgres-rw.<ns>.svc`; with backoffLimit: 2 the
|
||||
// retry pod usually wins, but three unlucky attempts in a row trips
|
||||
// BackoffLimitExceeded. Resolving once here, with backoff, removes the dice
|
||||
// roll at the source so the first attempt reliably succeeds.
|
||||
//
|
||||
// Mirrors the belt-and-braces pattern used in GRO-1985 (no Corepack
|
||||
// download fallback): we don't try to outsmart CoreDNS, we just don't ask
|
||||
// drizzle-kit to do the very first DNS lookup of a freshly-scheduled pod.
|
||||
//
|
||||
// Configuration (env):
|
||||
// WAIT_FOR_DB_MAX_ATTEMPTS default 12 (~30s of total wait at default backoff)
|
||||
// WAIT_FOR_DB_BASE_DELAY_MS default 500
|
||||
// WAIT_FOR_DB_MAX_DELAY_MS default 5000
|
||||
// WAIT_FOR_DB_SKIP default unset; set to "1" to skip (debug only)
|
||||
//
|
||||
// On success: exit 0. On exhaustion: exit 1 so the Job's backoff is
|
||||
// preserved (we don't want to silently mask a real outage by giving up
|
||||
// after 30s and letting drizzle-kit fail with a less-actionable error).
|
||||
|
||||
import { setTimeout as delay } from "node:timers/promises";
|
||||
import dns from "node:dns/promises";
|
||||
|
||||
const MAX_ATTEMPTS = Number(process.env.WAIT_FOR_DB_MAX_ATTEMPTS ?? 12);
|
||||
const BASE_DELAY_MS = Number(process.env.WAIT_FOR_DB_BASE_DELAY_MS ?? 500);
|
||||
const MAX_DELAY_MS = Number(process.env.WAIT_FOR_DB_MAX_DELAY_MS ?? 5000);
|
||||
|
||||
function parseHost(databaseUrl) {
|
||||
try {
|
||||
return new URL(databaseUrl).hostname || null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveOnce(host) {
|
||||
const start = Date.now();
|
||||
const result = await dns.lookup(host);
|
||||
return { address: result.address, ms: Date.now() - start };
|
||||
}
|
||||
|
||||
async function main() {
|
||||
if (process.env.WAIT_FOR_DB_SKIP === "1") {
|
||||
console.log("[wait-for-db] WAIT_FOR_DB_SKIP=1, skipping");
|
||||
return;
|
||||
}
|
||||
const databaseUrl = process.env.DATABASE_URL;
|
||||
if (!databaseUrl) {
|
||||
// Don't gate the migrate on a misconfigured env — let drizzle-kit fail
|
||||
// loudly with its own clear error.
|
||||
console.warn("[wait-for-db] DATABASE_URL not set; skipping");
|
||||
return;
|
||||
}
|
||||
const host = parseHost(databaseUrl);
|
||||
if (!host) {
|
||||
console.warn(`[wait-for-db] could not parse hostname from DATABASE_URL; skipping`);
|
||||
return;
|
||||
}
|
||||
console.log(
|
||||
`[wait-for-db] host=${host} max_attempts=${MAX_ATTEMPTS} ` +
|
||||
`base_delay_ms=${BASE_DELAY_MS} max_delay_ms=${MAX_DELAY_MS}`,
|
||||
);
|
||||
|
||||
for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
|
||||
try {
|
||||
const { address, ms } = await resolveOnce(host);
|
||||
console.log(`[wait-for-db] ok attempt=${attempt} host=${host} -> ${address} (${ms}ms)`);
|
||||
return;
|
||||
} catch (err) {
|
||||
const code = err?.code ?? "UNKNOWN";
|
||||
const transient = code === "EAI_AGAIN" || code === "ENOTFOUND" || code === "EAI_NODATA";
|
||||
if (!transient) {
|
||||
// Hard error (e.g. invalid hostname): surface and let drizzle-kit fail
|
||||
// with a real error rather than spinning.
|
||||
console.error(`[wait-for-db] non-transient DNS error attempt=${attempt} code=${code}: ${err.message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
if (attempt === MAX_ATTEMPTS) {
|
||||
console.error(
|
||||
`[wait-for-db] exhausted attempts=${MAX_ATTEMPTS} host=${host} last_code=${code}; exiting 1`,
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
const backoff = Math.min(
|
||||
MAX_DELAY_MS,
|
||||
BASE_DELAY_MS * 2 ** (attempt - 1) + Math.floor(Math.random() * BASE_DELAY_MS),
|
||||
);
|
||||
console.log(
|
||||
`[wait-for-db] transient attempt=${attempt} code=${code} retry_in_ms=${backoff}`,
|
||||
);
|
||||
await delay(backoff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(`[wait-for-db] fatal: ${err?.message ?? err}`);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user