diff --git a/src/server/execute.ts b/src/server/execute.ts index 6770c5a..b5a4b91 100644 --- a/src/server/execute.ts +++ b/src/server/execute.ts @@ -456,18 +456,54 @@ export async function execute(ctx: AdapterExecutionContext): Promise { - const silenceSec = Math.round((Date.now() - lastLogAt) / 1000); - void onLog("stdout", `[paperclip] keepalive — job ${jobName} running (${silenceSec}s since last output)\n`); + // Fire-and-forget the async work; setInterval callbacks must be + // synchronous or the timer will drift. + void (async () => { + if (keepaliveJobTerminal) return; - // Refresh updatedAt every ~4 minutes (16 ticks × 15s) to stay - // well within the 5-minute reaper staleness window. - keepaliveTick++; - if (ctx.onSpawn && keepaliveTick % 16 === 0) { - void ctx.onSpawn({ pid: -1, processGroupId: null, startedAt: new Date().toISOString() }).catch(() => {}); - } + // Verify the Job is still alive before announcing or refreshing. + try { + const job = await batchApi.readNamespacedJob({ name: jobName, namespace }); + const terminal = job.status?.conditions?.some( + (c) => (c.type === "Complete" || c.type === "Failed") && c.status === "True", + ); + if (terminal) { + keepaliveJobTerminal = true; + return; + } + } catch { + // Job may have been deleted out from under us, or the API call + // transiently failed. Either way, do not refresh updatedAt — + // either the Job really is gone, or the next tick will re-check. + keepaliveJobTerminal = true; + return; + } + + const silenceSec = Math.round((Date.now() - lastLogAt) / 1000); + void onLog("stdout", `[paperclip] keepalive — job ${jobName} running (${silenceSec}s since last output)\n`); + + // Refresh updatedAt every ~4 minutes (16 ticks × 15s) to stay + // well within the 5-minute reaper staleness window. + keepaliveTick++; + if (ctx.onSpawn && keepaliveTick % 16 === 0) { + void ctx.onSpawn({ pid: -1, processGroupId: null, startedAt: new Date().toISOString() }).catch(() => {}); + } + })(); }, KEEPALIVE_INTERVAL_MS); const wrappedOnLog: typeof onLog = async (stream, chunk) => { lastLogAt = Date.now(); @@ -486,6 +522,15 @@ export async function execute(ctx: AdapterExecutionContext): Promise