fix: return k8s_job_deleted_externally error code when job deleted mid-run (FAR-31)

When a K8s Job is deleted externally (kubectl delete job or TTL before
terminal condition observed) and stdout has no result event, the adapter
now returns errorCode "k8s_job_deleted_externally" with the message
"K8s Job was deleted externally before Claude could complete" instead of
the misleading "Claude exited with code -1".

Tracks a jobDeletedExternally flag in execute() on the jobGone path and
checks it in the !parsed branch before falling through to buildPartialRunError.
Only applies when exitCode is null (pod gone alongside the job).

Adds regression test: FAR-31 scenario where job 404s mid-run with partial
stdout and missing pod produces the new error code.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
2026-04-24 19:58:46 +00:00
parent 986f2fc7fa
commit 602afa9b84
2 changed files with 69 additions and 0 deletions
+44
View File
@@ -914,6 +914,50 @@ describe("execute: happy path", () => {
expect(result.sessionId).toBe("sess_test123");
});
it("returns k8s_job_deleted_externally when job 404s mid-run and stdout has no result event (FAR-31)", async () => {
// Reproduces the observed scenario: kubectl delete job while Claude is mid-run.
// The log stream captures only partial output (no result event), and the pod
// is also gone so getPodExitCode returns null. The adapter must emit a
// descriptive error instead of the misleading "Claude exited with code -1".
// Log stream writes only the init line — no result event (mid-run deletion)
const partialOutput = JSON.stringify({
type: "system",
subtype: "init",
model: "claude-sonnet-4-6",
session_id: "sess_x",
}) + "\n";
mockLogFn.mockImplementation(
async (_ns: string, _pod: string, _ctr: string, writable: Writable) => {
writable.write(partialOutput);
},
);
// Job is gone (404) — matches the kubectl-delete-job-mid-run scenario
mockBatchReadJob.mockRejectedValue(
Object.assign(new Error("Not Found"), { response: { statusCode: 404 } }),
);
// Pod is also gone — getPodExitCode returns null (no pod found)
mockCoreListPods.mockReset();
mockCoreListPods
.mockResolvedValueOnce({
items: [{
metadata: { name: "pod-abc" },
status: { phase: "Running", containerStatuses: [], initContainerStatuses: [] },
}],
})
.mockResolvedValue({ items: [] }); // pod gone → exitCode null
const executePromise = execute(makeCtx());
await vi.advanceTimersByTimeAsync(3_100);
const result = await executePromise;
expect(result.errorCode).toBe("k8s_job_deleted_externally");
expect(result.errorMessage).toBe("K8s Job was deleted externally before Claude could complete");
expect(result.exitCode).toBeNull();
});
it("reconnects log stream and logs status when job completion takes > 3s", async () => {
// Make waitForJobCompletion take 4s so the 3s stream reconnect fires first.
// timeoutSec=4, graceSec=0 → completionTimeoutMs=4000.
+25
View File
@@ -975,6 +975,9 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
// Set when we return a mismatch error so the finally block knows not to
// delete a job that is still alive and the UI is waiting on.
let skipCleanup = false;
// Set when the job disappeared (404) or grace-timer fired before we saw a
// terminal condition — used to emit a clearer error when stdout parsing fails.
let jobDeletedExternally = false;
const activeJobRef: ActiveJobRef = {
namespace,
@@ -1231,6 +1234,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
// condition. The container must have exited first (TTL only fires after
// completion), so log streaming has captured the full output — continue
// to stdout parsing rather than returning an error.
jobDeletedExternally = true;
await onLog("stdout", `[paperclip] Job ${jobName} was deleted before terminal condition was observed (TTL or external deletion) — proceeding with captured output.\n`);
}
} else {
@@ -1247,6 +1251,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
} else if (actualState.jobGone) {
// Job was deleted before we could confirm terminal state — same as the
// fulfilled+jobGone case above: proceed with captured output.
jobDeletedExternally = true;
await onLog("stdout", `[paperclip] Job ${jobName} was deleted before terminal condition was observed (TTL or external deletion) — proceeding with captured output.\n`);
} else if (!actualState.succeeded) {
// Job still not terminal — the completion error was likely transient.
@@ -1314,6 +1319,26 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
}
if (!parsed) {
if (jobDeletedExternally && exitCode === null) {
return {
exitCode,
signal: null,
timedOut: false,
errorMessage: "K8s Job was deleted externally before Claude could complete",
errorCode: "k8s_job_deleted_externally",
resultJson: { stdout },
};
}
if (parsedStream.llmApiEmptyResponse) {
return {
exitCode,
signal: null,
timedOut: false,
errorMessage: "LLM API returned an empty response (stop_reason: null, output_tokens: 0) — the upstream model API may be degraded or misconfigured",
errorCode: "llm_api_error",
resultJson: { stdout },
};
}
return {
exitCode,
signal: null,