forked from farhoodlabs/paperclip
[codex] Add runtime lifecycle recovery and live issue visibility (#4419)
This commit is contained in:
@@ -0,0 +1,48 @@
|
||||
import { logger } from "../middleware/logger.js";
|
||||
|
||||
const AGENT_START_LOCK_STALE_MS = 30_000;
|
||||
const startLocksByAgent = new Map<string, { promise: Promise<void>; startedAtMs: number }>();
|
||||
|
||||
async function waitForAgentStartLock(agentId: string, lock: { promise: Promise<void>; startedAtMs: number }) {
|
||||
const elapsedMs = Date.now() - lock.startedAtMs;
|
||||
const remainingMs = AGENT_START_LOCK_STALE_MS - elapsedMs;
|
||||
if (remainingMs <= 0) {
|
||||
logger.warn({ agentId, staleMs: elapsedMs }, "agent start lock stale; continuing queued-run start");
|
||||
return;
|
||||
}
|
||||
|
||||
let timedOut = false;
|
||||
let timeout: ReturnType<typeof setTimeout> | null = null;
|
||||
await Promise.race([
|
||||
lock.promise,
|
||||
new Promise<void>((resolve) => {
|
||||
timeout = setTimeout(() => {
|
||||
timedOut = true;
|
||||
resolve();
|
||||
}, remainingMs);
|
||||
}),
|
||||
]);
|
||||
if (timeout) clearTimeout(timeout);
|
||||
|
||||
if (timedOut) {
|
||||
logger.warn({ agentId, staleMs: AGENT_START_LOCK_STALE_MS }, "agent start lock timed out; continuing queued-run start");
|
||||
}
|
||||
}
|
||||
|
||||
export async function withAgentStartLock<T>(agentId: string, fn: () => Promise<T>) {
|
||||
const previous = startLocksByAgent.get(agentId);
|
||||
const waitForPrevious = previous ? waitForAgentStartLock(agentId, previous) : Promise.resolve();
|
||||
const run = waitForPrevious.then(fn);
|
||||
const marker = run.then(
|
||||
() => undefined,
|
||||
() => undefined,
|
||||
);
|
||||
startLocksByAgent.set(agentId, { promise: marker, startedAtMs: Date.now() });
|
||||
try {
|
||||
return await run;
|
||||
} finally {
|
||||
if (startLocksByAgent.get(agentId)?.promise === marker) {
|
||||
startLocksByAgent.delete(agentId);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user