Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d9928030d6 | |||
| 76fc6fcdfc | |||
| 3169f49f23 | |||
| e0b35d230f | |||
| 4e2c36319d | |||
| 8474f78fe1 | |||
| 88896eddcf | |||
| a2874c0426 | |||
| 818aa0f1d6 | |||
| 55fd3021fb | |||
| 83b58f9207 | |||
| 602afa9b84 | |||
| 986f2fc7fa |
Generated
+2
-2
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "paperclip-adapter-claude-k8s",
|
||||
"version": "0.1.42",
|
||||
"version": "0.1.48",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "paperclip-adapter-claude-k8s",
|
||||
"version": "0.1.42",
|
||||
"version": "0.1.48",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@kubernetes/client-node": "^1.0.0",
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "paperclip-adapter-claude-k8s",
|
||||
"version": "0.1.43",
|
||||
"version": "0.1.48",
|
||||
"description": "Paperclip adapter plugin that runs Claude Code agents as Kubernetes Jobs",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
+359
-7
@@ -60,7 +60,7 @@ vi.mock("@paperclipai/adapter-utils/server-utils", async (importOriginal) => {
|
||||
});
|
||||
});
|
||||
|
||||
const { isK8s404, buildPartialRunError, classifyOrphan, describePodTerminatedError, streamPodLogsOnce, shouldAbortForCancellation, execute } = await import("./execute.js");
|
||||
const { isK8s404, buildPartialRunError, classifyOrphan, describePodTerminatedError, describeTruncationCause, streamPodLogsOnce, shouldAbortForCancellation, execute } = await import("./execute.js");
|
||||
|
||||
function makeJob(opts: {
|
||||
runId?: string;
|
||||
@@ -150,10 +150,10 @@ describe("buildPartialRunError", () => {
|
||||
expect(buildPartialRunError(null, "", "")).toBe("Claude exited with code -1");
|
||||
});
|
||||
|
||||
it("skips system/init events and returns generic message when only init captured", () => {
|
||||
it("returns init-only message when stdout is init-only with non-zero exit code (FAR-101)", () => {
|
||||
const msg = buildPartialRunError(1, "claude-sonnet-4-6", initLine);
|
||||
expect(msg).toBe(
|
||||
"Claude started but did not produce a result (model: claude-sonnet-4-6) — check API credentials, model support, and adapter config",
|
||||
"Claude exited immediately after init (model: claude-sonnet-4-6) (exit code 1) — the model may be unsupported or the session may have been rejected before producing output",
|
||||
);
|
||||
});
|
||||
|
||||
@@ -170,15 +170,15 @@ describe("buildPartialRunError", () => {
|
||||
expect(msg).toBe("Claude exited with code 1: Error: no API key configured");
|
||||
});
|
||||
|
||||
it("skips result events (structured protocol artefact — not surfaced verbatim)", () => {
|
||||
it("returns init-only message when stdout has init + result event but no plain content (structured artefact, not surfaced verbatim)", () => {
|
||||
// In production, buildPartialRunError is only called when parseClaudeStreamJson
|
||||
// returns null (no result event). If somehow a result event appears here, the
|
||||
// raw JSON blob must not be shown — the "did not produce a result" message is
|
||||
// cleaner and avoids leaking protocol internals to the UI.
|
||||
// raw JSON blob must not be shown — the init-only message is cleaner and avoids
|
||||
// leaking protocol internals to the UI.
|
||||
const resultLike = JSON.stringify({ type: "result", subtype: "error", result: "rate limit" });
|
||||
const stdout = [initLine, resultLike].join("\n");
|
||||
const msg = buildPartialRunError(2, "claude-sonnet-4-6", stdout);
|
||||
expect(msg).toContain("did not produce a result");
|
||||
expect(msg).toContain("Claude exited immediately after init");
|
||||
expect(msg).toContain("claude-sonnet-4-6");
|
||||
expect(msg).not.toMatch(/\{.*type.*result/);
|
||||
});
|
||||
@@ -245,6 +245,44 @@ describe("buildPartialRunError", () => {
|
||||
const msg = buildPartialRunError(1, "model-x", stdout);
|
||||
expect(msg).toBe("Claude exited with code 1: real error line");
|
||||
});
|
||||
|
||||
it("appends pod terminated reason/message when state is provided (FAR-100)", () => {
|
||||
const msg = buildPartialRunError(1, "claude-sonnet-4-6", initLine, {
|
||||
exitCode: 1,
|
||||
reason: "Error",
|
||||
message: "model not supported",
|
||||
signal: null,
|
||||
});
|
||||
expect(msg).toContain("Claude exited immediately after init");
|
||||
expect(msg).toContain("claude-sonnet-4-6");
|
||||
expect(msg).toContain("[pod: reason=Error, message=model not supported]");
|
||||
});
|
||||
|
||||
it("flags exit 137 as OOMKilled in pod cause", () => {
|
||||
const msg = buildPartialRunError(137, "claude-sonnet-4-6", initLine, {
|
||||
exitCode: 137,
|
||||
reason: "OOMKilled",
|
||||
message: null,
|
||||
signal: null,
|
||||
});
|
||||
expect(msg).toContain("[pod: reason=OOMKilled, SIGKILL (commonly OOMKilled)]");
|
||||
});
|
||||
|
||||
it("appends pod cause to content-line message", () => {
|
||||
const stdout = [initLine, "Error: bad request"].join("\n");
|
||||
const msg = buildPartialRunError(1, "claude-sonnet-4-6", stdout, {
|
||||
exitCode: 1,
|
||||
reason: "Error",
|
||||
message: null,
|
||||
signal: null,
|
||||
});
|
||||
expect(msg).toBe("Claude exited with code 1: Error: bad request [pod: reason=Error]");
|
||||
});
|
||||
|
||||
it("does not append anything when podState is null (back-compat)", () => {
|
||||
const msg = buildPartialRunError(1, "claude-sonnet-4-6", initLine, null);
|
||||
expect(msg).not.toContain("[pod:");
|
||||
});
|
||||
});
|
||||
|
||||
describe("classifyOrphan", () => {
|
||||
@@ -362,6 +400,33 @@ describe("describePodTerminatedError", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("describeTruncationCause", () => {
|
||||
it("annotates exit code 137 as SIGKILL/OOM", () => {
|
||||
const msg = describeTruncationCause({ exitCode: 137, reason: "OOMKilled", message: "Memory cgroup out of memory", signal: null });
|
||||
expect(msg).toContain("exit code 137");
|
||||
expect(msg).toContain("SIGKILL");
|
||||
expect(msg).toContain("OOMKilled");
|
||||
expect(msg).toContain("Memory cgroup out of memory");
|
||||
});
|
||||
|
||||
it("annotates exit code 143 as SIGTERM", () => {
|
||||
const msg = describeTruncationCause({ exitCode: 143, reason: null, message: null, signal: null });
|
||||
expect(msg).toContain("exit code 143");
|
||||
expect(msg).toContain("SIGTERM");
|
||||
});
|
||||
|
||||
it("falls back to 'pod state unavailable' when state is null", () => {
|
||||
const msg = describeTruncationCause(null);
|
||||
expect(msg).toContain("pod state unavailable");
|
||||
});
|
||||
|
||||
it("emits 'no exit code' when exitCode is null but state exists", () => {
|
||||
const msg = describeTruncationCause({ exitCode: null, reason: "Error", message: null, signal: null });
|
||||
expect(msg).toContain("no exit code");
|
||||
expect(msg).toContain("reason=Error");
|
||||
});
|
||||
});
|
||||
|
||||
describe("execute: all-invalid agent.id (N4)", () => {
|
||||
it("returns hard error without creating a Job when agent.id sanitizes to null", async () => {
|
||||
const logs: string[] = [];
|
||||
@@ -577,6 +642,28 @@ describe("execute: concurrency guard", () => {
|
||||
expect(result.errorMessage).toContain("still running for this agent");
|
||||
});
|
||||
|
||||
it("ignores terminating jobs (deletionTimestamp set) and proceeds past the concurrency guard", async () => {
|
||||
// A job being force-deleted has deletionTimestamp set but no Complete/Failed condition.
|
||||
// The guard must treat it as terminal so subsequent runs are not blocked.
|
||||
const terminating: k8s.V1Job = {
|
||||
metadata: {
|
||||
name: "terminating-job",
|
||||
namespace: "paperclip",
|
||||
labels: { "paperclip.io/agent-id": "agent-abc", "paperclip.io/adapter-type": "claude_k8s" },
|
||||
deletionTimestamp: new Date(),
|
||||
},
|
||||
status: { conditions: [] },
|
||||
};
|
||||
mockBatchListJobs.mockResolvedValue({ items: [terminating] });
|
||||
// Guard passes → next failure is job creation (no further mocks set up)
|
||||
mockBatchCreateJob.mockRejectedValue(new Error("quota exceeded"));
|
||||
mockPrepareBundle.mockResolvedValue(makeBundle());
|
||||
const result = await execute(makeCtx());
|
||||
// Must NOT be a concurrency error — the guard let us through
|
||||
expect(result.errorCode).not.toBe("k8s_concurrent_run_blocked");
|
||||
expect(result.errorCode).toBe("k8s_job_create_failed");
|
||||
});
|
||||
|
||||
it("reattaches to a matching orphan and returns k8s_pod_reattach_failed when pod is missing", async () => {
|
||||
// Orphan with matching taskId and sessionId → reattach classification → reattachTarget is set
|
||||
const orphan = makeJob({
|
||||
@@ -892,6 +979,127 @@ describe("execute: happy path", () => {
|
||||
expect(result.sessionId).toBe("sess_test123");
|
||||
});
|
||||
|
||||
it("returns k8s_job_deleted_externally when job 404s mid-run and stdout has no result event (FAR-31)", async () => {
|
||||
// Reproduces the observed scenario: kubectl delete job while Claude is mid-run.
|
||||
// The log stream captures only partial output (no result event), and the pod
|
||||
// is also gone so getPodExitCode returns null. The adapter must emit a
|
||||
// descriptive error instead of the misleading "Claude exited with code -1".
|
||||
|
||||
// Log stream writes only the init line — no result event (mid-run deletion)
|
||||
const partialOutput = JSON.stringify({
|
||||
type: "system",
|
||||
subtype: "init",
|
||||
model: "claude-sonnet-4-6",
|
||||
session_id: "sess_x",
|
||||
}) + "\n";
|
||||
mockLogFn.mockImplementation(
|
||||
async (_ns: string, _pod: string, _ctr: string, writable: Writable) => {
|
||||
writable.write(partialOutput);
|
||||
},
|
||||
);
|
||||
|
||||
// Job is gone (404) — matches the kubectl-delete-job-mid-run scenario
|
||||
mockBatchReadJob.mockRejectedValue(
|
||||
Object.assign(new Error("Not Found"), { response: { statusCode: 404 } }),
|
||||
);
|
||||
|
||||
// Pod is also gone — getPodExitCode returns null (no pod found)
|
||||
mockCoreListPods.mockReset();
|
||||
mockCoreListPods
|
||||
.mockResolvedValueOnce({
|
||||
items: [{
|
||||
metadata: { name: "pod-abc" },
|
||||
status: { phase: "Running", containerStatuses: [], initContainerStatuses: [] },
|
||||
}],
|
||||
})
|
||||
.mockResolvedValue({ items: [] }); // pod gone → exitCode null
|
||||
|
||||
const executePromise = execute(makeCtx());
|
||||
await vi.advanceTimersByTimeAsync(3_100);
|
||||
const result = await executePromise;
|
||||
|
||||
expect(result.errorCode).toBe("k8s_job_deleted_externally");
|
||||
expect(result.errorMessage).toBe("K8s Job was deleted externally before Claude could complete");
|
||||
expect(result.exitCode).toBeNull();
|
||||
});
|
||||
|
||||
it("returns llm_api_error when assistant event has stop_reason:null and output_tokens:0 (FAR-30)", async () => {
|
||||
// Reproduces the MiniMax degradation pattern: init event + assistant event with
|
||||
// stop_reason:null and output_tokens:0, no result event, Claude exits -1.
|
||||
const emptyResponseOutput = [
|
||||
JSON.stringify({ type: "system", subtype: "init", model: "MiniMax-M2.7", session_id: "sess_mm" }),
|
||||
JSON.stringify({
|
||||
type: "assistant",
|
||||
session_id: "sess_mm",
|
||||
message: {
|
||||
id: "msg_empty",
|
||||
stop_reason: null,
|
||||
usage: { input_tokens: 500, output_tokens: 0, cache_creation_input_tokens: 0, cache_read_input_tokens: 0 },
|
||||
content: [],
|
||||
},
|
||||
}),
|
||||
].join("\n") + "\n";
|
||||
|
||||
mockLogFn.mockImplementation(
|
||||
async (_ns: string, _pod: string, _ctr: string, writable: Writable) => {
|
||||
writable.write(emptyResponseOutput);
|
||||
},
|
||||
);
|
||||
// getPodExitCode: exit code -1 (as reported in the issue)
|
||||
mockCoreListPods.mockResolvedValue({
|
||||
items: [{ metadata: { name: "pod-abc" }, status: { containerStatuses: [{ name: "claude", state: { terminated: { exitCode: -1 } } }] } }],
|
||||
});
|
||||
|
||||
const executePromise = execute(makeCtx());
|
||||
await vi.advanceTimersByTimeAsync(3_100);
|
||||
const result = await executePromise;
|
||||
|
||||
expect(result.errorCode).toBe("llm_api_error");
|
||||
expect(result.errorMessage).toContain("stop_reason: null");
|
||||
expect(result.errorMessage).toContain("output_tokens: 0");
|
||||
});
|
||||
|
||||
it("returns claude_truncated when assistant produced content but no result event arrived (FAR-95)", async () => {
|
||||
const truncatedOutput = [
|
||||
JSON.stringify({ type: "system", subtype: "init", model: "claude-opus-4-7", session_id: "sess_trunc" }),
|
||||
JSON.stringify({
|
||||
type: "assistant",
|
||||
session_id: "sess_trunc",
|
||||
message: {
|
||||
id: "msg_trunc",
|
||||
stop_reason: null,
|
||||
usage: { input_tokens: 1, output_tokens: 35, cache_creation_input_tokens: 523, cache_read_input_tokens: 46295 },
|
||||
content: [{ type: "tool_use", id: "tool_1", name: "Bash", input: { command: "echo hi" } }],
|
||||
},
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: "user",
|
||||
message: { role: "user", content: [{ tool_use_id: "tool_1", type: "tool_result", content: "hi", is_error: false }] },
|
||||
}),
|
||||
].join("\n") + "\n";
|
||||
|
||||
mockLogFn.mockImplementation(
|
||||
async (_ns: string, _pod: string, _ctr: string, writable: Writable) => {
|
||||
writable.write(truncatedOutput);
|
||||
},
|
||||
);
|
||||
mockCoreListPods.mockResolvedValue({
|
||||
items: [{ metadata: { name: "pod-abc" }, status: { containerStatuses: [{ name: "claude", state: { terminated: { exitCode: 137, reason: "OOMKilled", message: "Memory cgroup out of memory" } } }] } }],
|
||||
});
|
||||
|
||||
const executePromise = execute(makeCtx());
|
||||
await vi.advanceTimersByTimeAsync(3_100);
|
||||
const result = await executePromise;
|
||||
|
||||
expect(result.errorCode).toBe("claude_truncated");
|
||||
expect(result.errorMessage).toContain("truncated mid-stream");
|
||||
expect(result.errorMessage).toContain("claude-opus-4-7");
|
||||
expect(result.errorMessage).toContain("exit code 137");
|
||||
expect(result.errorMessage).toContain("SIGKILL");
|
||||
expect(result.errorMessage).toContain("OOMKilled");
|
||||
expect(result.errorMessage).toContain("Memory cgroup out of memory");
|
||||
});
|
||||
|
||||
it("reconnects log stream and logs status when job completion takes > 3s", async () => {
|
||||
// Make waitForJobCompletion take 4s so the 3s stream reconnect fires first.
|
||||
// timeoutSec=4, graceSec=0 → completionTimeoutMs=4000.
|
||||
@@ -1065,6 +1273,62 @@ describe("execute: happy path", () => {
|
||||
|
||||
expect(result.exitCode).toBe(0);
|
||||
});
|
||||
|
||||
it("logs bundled skill names and count (FAR-36 diagnostic)", async () => {
|
||||
const skills = [
|
||||
{ key: "safety--abc123", runtimeName: "safety--abc123", desired: true, managed: true, required: true, state: "configured" as const },
|
||||
{ key: "sdlc--def456", runtimeName: "sdlc--def456", desired: true, managed: true, required: true, state: "configured" as const },
|
||||
];
|
||||
mockReadSkillEntries.mockResolvedValue(skills);
|
||||
|
||||
const logs: Array<{ stream: string; msg: string }> = [];
|
||||
const onLog = vi.fn().mockImplementation(async (stream: string, msg: string) => { logs.push({ stream, msg }); });
|
||||
|
||||
const executePromise = execute(makeCtx({ onLog } as Partial<AdapterExecutionContext>));
|
||||
await vi.advanceTimersByTimeAsync(3_100);
|
||||
await executePromise;
|
||||
|
||||
const skillLine = logs.find((l) => l.msg.includes("Skills bundled"));
|
||||
expect(skillLine).toBeDefined();
|
||||
expect(skillLine?.stream).toBe("stdout");
|
||||
expect(skillLine?.msg).toContain("(2):");
|
||||
expect(skillLine?.msg).toContain("safety--abc123");
|
||||
expect(skillLine?.msg).toContain("sdlc--def456");
|
||||
});
|
||||
|
||||
it("logs Skills bundled (0): none when no skills are configured (FAR-36 diagnostic)", async () => {
|
||||
mockReadSkillEntries.mockResolvedValue([]);
|
||||
|
||||
const logs: Array<{ stream: string; msg: string }> = [];
|
||||
const onLog = vi.fn().mockImplementation(async (stream: string, msg: string) => { logs.push({ stream, msg }); });
|
||||
|
||||
const executePromise = execute(makeCtx({ onLog } as Partial<AdapterExecutionContext>));
|
||||
await vi.advanceTimersByTimeAsync(3_100);
|
||||
await executePromise;
|
||||
|
||||
const skillLine = logs.find((l) => l.msg.includes("Skills bundled"));
|
||||
expect(skillLine).toBeDefined();
|
||||
expect(skillLine?.msg).toContain("(0): none");
|
||||
});
|
||||
|
||||
it("includes skill count in onMeta commandNotes (FAR-36 diagnostic)", async () => {
|
||||
const skills = [
|
||||
{ key: "safety--abc123", runtimeName: "safety--abc123", desired: true, managed: true, required: true, state: "configured" as const },
|
||||
];
|
||||
mockReadSkillEntries.mockResolvedValue(skills);
|
||||
|
||||
const onMeta = vi.fn().mockResolvedValue(undefined);
|
||||
const executePromise = execute(makeCtx({ onMeta } as Partial<AdapterExecutionContext>));
|
||||
await vi.advanceTimersByTimeAsync(3_100);
|
||||
await executePromise;
|
||||
|
||||
expect(onMeta).toHaveBeenCalled();
|
||||
const notes: string[] = onMeta.mock.calls[0][0].commandNotes;
|
||||
const skillsNote = notes.find((n: string) => n.startsWith("Skills"));
|
||||
expect(skillsNote).toBeDefined();
|
||||
expect(skillsNote).toContain("(1):");
|
||||
expect(skillsNote).toContain("safety--abc123");
|
||||
});
|
||||
});
|
||||
|
||||
// ─── execute: waitForPod edge cases ──────────────────────────────────────────
|
||||
@@ -1535,3 +1799,91 @@ describe("execute: SIGTERM handler best-effort cleanup", () => {
|
||||
// so we do not need to settle executePromise.
|
||||
});
|
||||
});
|
||||
|
||||
// ─── execute: per-agent creation mutex (FAR-29 TOCTOU fix) ───────────────────
|
||||
//
|
||||
// Verifies that two concurrent execute() calls for the same agent cannot both
|
||||
// enter the listNamespacedJob → createNamespacedJob sequence simultaneously.
|
||||
// Without the per-agent mutex, both would pass the concurrency guard before
|
||||
// either job appears in the other's list query.
|
||||
|
||||
describe("execute: per-agent creation mutex prevents TOCTOU race", () => {
|
||||
beforeEach(() => {
|
||||
vi.resetAllMocks();
|
||||
mockReadSkillEntries.mockResolvedValue([]);
|
||||
mockGetSelfPodInfo.mockResolvedValue(makeSelfPodResult());
|
||||
mockPrepareBundle.mockResolvedValue(makeBundle());
|
||||
// Make job creation fail so the guard+create phase exits quickly and
|
||||
// releases the mutex without needing to mock the full streaming path.
|
||||
mockBatchCreateJob.mockRejectedValue(new Error("mock: create not configured"));
|
||||
mockBatchDeleteJob.mockResolvedValue({});
|
||||
mockCoreDeleteSecret.mockResolvedValue({});
|
||||
});
|
||||
|
||||
it("serializes guard phases for the same agent: call-2 waits until call-1 exits guard+create", async () => {
|
||||
const listCalls: string[] = [];
|
||||
let resolveFirstList!: (v: { items: [] }) => void;
|
||||
|
||||
mockBatchListJobs
|
||||
.mockImplementationOnce(() => {
|
||||
listCalls.push("call-1");
|
||||
return new Promise<{ items: [] }>((resolve) => { resolveFirstList = resolve; });
|
||||
})
|
||||
.mockImplementation(() => {
|
||||
listCalls.push("call-2");
|
||||
return Promise.resolve({ items: [] });
|
||||
});
|
||||
|
||||
const p1 = execute(makeCtx({ runId: "run-1" }));
|
||||
const p2 = execute(makeCtx({ runId: "run-2" }));
|
||||
|
||||
// Drain microtasks: call-1 should be suspended in listNamespacedJob while
|
||||
// call-2 waits behind the per-agent mutex, not yet calling list.
|
||||
for (let i = 0; i < 20; i++) await Promise.resolve();
|
||||
expect(listCalls).toEqual(["call-1"]);
|
||||
|
||||
// Let call-1's guard resolve (no running jobs). It will proceed to job
|
||||
// creation, fail (mock rejects), and release the mutex in finally.
|
||||
resolveFirstList({ items: [] });
|
||||
await Promise.allSettled([p1, p2]);
|
||||
|
||||
// call-2 must have listed, and only AFTER call-1's guard resolved.
|
||||
// The exact order: call-1 listed → call-1 list resolved → call-2 listed.
|
||||
expect(listCalls).toEqual(["call-1", "call-2"]);
|
||||
});
|
||||
|
||||
it("does not serialize guard phases for different agents", async () => {
|
||||
const listCalls: string[] = [];
|
||||
let resolveAgentAList!: (v: { items: [] }) => void;
|
||||
|
||||
// Agent A's list is artificially slow. Agent B (different id) should
|
||||
// proceed immediately without waiting — the mutex is keyed by agent id.
|
||||
mockBatchListJobs
|
||||
.mockImplementationOnce(() => {
|
||||
listCalls.push("A");
|
||||
return new Promise<{ items: [] }>((resolve) => { resolveAgentAList = resolve; });
|
||||
})
|
||||
.mockImplementation(() => {
|
||||
listCalls.push("B");
|
||||
return Promise.resolve({ items: [] });
|
||||
});
|
||||
|
||||
const ctxA = makeCtx({ runId: "run-A" });
|
||||
const ctxB = makeCtx({
|
||||
runId: "run-B",
|
||||
agent: { id: "agent-other", companyId: "co1", name: "Other Agent", adapterType: "claude_k8s", adapterConfig: {} },
|
||||
} as Partial<AdapterExecutionContext>);
|
||||
|
||||
const pA = execute(ctxA);
|
||||
const pB = execute(ctxB);
|
||||
|
||||
// Drain microtasks — B should have called list even though A is still
|
||||
// suspended, because they use separate mutex slots.
|
||||
for (let i = 0; i < 20; i++) await Promise.resolve();
|
||||
expect(listCalls).toContain("B");
|
||||
|
||||
// Let A complete so the promises settle cleanly.
|
||||
resolveAgentAList({ items: [] });
|
||||
await Promise.allSettled([pA, pB]);
|
||||
});
|
||||
});
|
||||
|
||||
+210
-38
@@ -48,6 +48,10 @@ interface ActiveJobRef {
|
||||
kubeconfigPath?: string;
|
||||
}
|
||||
const activeJobs = new Set<ActiveJobRef>();
|
||||
// Per-agent serialization lock: prevents the TOCTOU race (FAR-29) where two
|
||||
// concurrent execute() calls for the same agent both pass the list-then-create
|
||||
// guard and create K8s Jobs simultaneously on the shared PVC.
|
||||
const agentCreationMutex = new Map<string, Promise<void>>();
|
||||
let sigtermHandlerRegistered = false;
|
||||
|
||||
function ensureSigtermHandler(): void {
|
||||
@@ -106,24 +110,12 @@ export function shouldAbortForCancellation(runStatus: string | undefined): boole
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the error message when Claude's stdout contains no result event.
|
||||
* Skips system/init event lines so the UI doesn't display the raw init JSON.
|
||||
* Exported for unit tests.
|
||||
* Returns the first non-JSON/plain-text line in stdout, treating JSON objects
|
||||
* with a "type" field as protocol artefacts and skipping them.
|
||||
* Used by buildPartialRunError to detect init-only runs.
|
||||
*/
|
||||
export function buildPartialRunError(
|
||||
exitCode: number | null,
|
||||
model: string,
|
||||
stdout: string,
|
||||
): string {
|
||||
if (exitCode === 0) return "Failed to parse Claude JSON output";
|
||||
|
||||
// Walk stdout lines and skip every structured streaming event (any JSON
|
||||
// object that carries a non-empty "type" field: system, assistant, user,
|
||||
// rate_limit_event, result, …). All of these are protocol artefacts and
|
||||
// produce confusing raw-JSON blobs when surfaced verbatim as an error
|
||||
// message. Only plain-text lines (non-JSON, or JSON without a type field)
|
||||
// are treated as human-readable content worth including in the error.
|
||||
const firstContentLine = stdout.split(/\r?\n/)
|
||||
function firstContentLine(stdout: string): string {
|
||||
return stdout.split(/\r?\n/)
|
||||
.map((l) => l.trim())
|
||||
.find((l) => {
|
||||
if (!l) return false;
|
||||
@@ -138,19 +130,82 @@ export function buildPartialRunError(
|
||||
}
|
||||
return true;
|
||||
}) ?? "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true when stdout contains only init/system/assistant events from the
|
||||
* given model with no human-readable content lines. Used to detect init-only
|
||||
* non-zero-exit runs that should be classified as claude_init_failed rather than
|
||||
* the generic "Claude exited with code N" message.
|
||||
*/
|
||||
function isInitOnlyRun(model: string, stdout: string): boolean {
|
||||
if (!stdout.trim() || !model) return false;
|
||||
const content = firstContentLine(stdout);
|
||||
if (content) return false;
|
||||
// Check that at least the init event for this model was seen
|
||||
const hasModelInit = stdout.includes(`"model":"${model}"`) || stdout.includes(`"model":"${model.replace(/-/g, "_")}"`);
|
||||
return hasModelInit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Append the pod's terminated-state detail (reason/message/signal) to a
|
||||
* partial-run error message when available. Exit code is already in the
|
||||
* caller-supplied message, so we only append fields that add new signal —
|
||||
* specifically reason (e.g. OOMKilled, Error, ContainerCannotRun), message
|
||||
* (kubelet diagnostic text), and signal. Saves the operator a kubectl trip.
|
||||
*/
|
||||
function appendPodCause(message: string, state: PodTerminatedState | null): string {
|
||||
if (!state) return message;
|
||||
const parts: string[] = [];
|
||||
if (state.reason) parts.push(`reason=${state.reason}`);
|
||||
if (state.message) parts.push(`message=${state.message}`);
|
||||
if (state.signal !== null) parts.push(`signal=${state.signal}`);
|
||||
if (state.exitCode === 137) parts.push("SIGKILL (commonly OOMKilled)");
|
||||
if (parts.length === 0) return message;
|
||||
return `${message} [pod: ${parts.join(", ")}]`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the error message when Claude's stdout contains no result event.
|
||||
* Skips system/init event lines so the UI doesn't display the raw init JSON.
|
||||
* When `podState` is provided, appends the K8s container terminated reason/
|
||||
* message so failures self-explain without requiring `kubectl`.
|
||||
* Exported for unit tests.
|
||||
*/
|
||||
export function buildPartialRunError(
|
||||
exitCode: number | null,
|
||||
model: string,
|
||||
stdout: string,
|
||||
podState: PodTerminatedState | null = null,
|
||||
): string {
|
||||
if (exitCode === 0) return "Failed to parse Claude JSON output";
|
||||
|
||||
// If the stream contained only structured events with no plain-text output,
|
||||
// surface the model name so the operator can diagnose missing credentials
|
||||
// or unsupported/misconfigured model.
|
||||
const initOnlyOutput = stdout.trim() !== "" && model !== "" && !firstContentLine;
|
||||
if (initOnlyOutput) {
|
||||
const modelHint = model ? ` (model: ${model})` : "";
|
||||
return `Claude started but did not produce a result${modelHint} — check API credentials, model support, and adapter config`;
|
||||
const contentLine = firstContentLine(stdout);
|
||||
if (contentLine) {
|
||||
return appendPodCause(`Claude exited with code ${exitCode ?? -1}: ${contentLine}`, podState);
|
||||
}
|
||||
|
||||
return firstContentLine
|
||||
? `Claude exited with code ${exitCode ?? -1}: ${firstContentLine}`
|
||||
: `Claude exited with code ${exitCode ?? -1}`;
|
||||
if (isInitOnlyRun(model, stdout) && (exitCode ?? 0) !== 0) {
|
||||
const modelHint = model ? ` (model: ${model})` : "";
|
||||
return appendPodCause(
|
||||
`Claude exited immediately after init${modelHint} (exit code ${exitCode ?? -1}) — the model may be unsupported or the session may have been rejected before producing output`,
|
||||
podState,
|
||||
);
|
||||
}
|
||||
|
||||
const initOnlyOutput = stdout.trim() !== "" && model !== "";
|
||||
if (initOnlyOutput) {
|
||||
const modelHint = model ? ` (model: ${model})` : "";
|
||||
return appendPodCause(
|
||||
`Claude started but did not produce a result${modelHint} — check API credentials, model support, and adapter config`,
|
||||
podState,
|
||||
);
|
||||
}
|
||||
|
||||
return appendPodCause(`Claude exited with code ${exitCode ?? -1}`, podState);
|
||||
}
|
||||
|
||||
export type OrphanClassification =
|
||||
@@ -570,6 +625,27 @@ async function waitForJobCompletion(
|
||||
* Get the exit code from the Job's pod.
|
||||
*/
|
||||
async function getPodExitCode(namespace: string, jobName: string, kubeconfigPath?: string): Promise<number | null> {
|
||||
const state = await getPodTerminatedState(namespace, jobName, kubeconfigPath);
|
||||
return state?.exitCode ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the claude container's terminated state (exit code, reason, message,
|
||||
* signal) from the Job's pod. Returns null if the pod or container is gone.
|
||||
* Used by the no-result error path to explain *why* a run was truncated.
|
||||
*/
|
||||
export interface PodTerminatedState {
|
||||
exitCode: number | null;
|
||||
reason: string | null;
|
||||
message: string | null;
|
||||
signal: number | null;
|
||||
}
|
||||
|
||||
async function getPodTerminatedState(
|
||||
namespace: string,
|
||||
jobName: string,
|
||||
kubeconfigPath?: string,
|
||||
): Promise<PodTerminatedState | null> {
|
||||
const coreApi = getCoreApi(kubeconfigPath);
|
||||
const podList = await coreApi.listNamespacedPod({
|
||||
namespace,
|
||||
@@ -579,7 +655,40 @@ async function getPodExitCode(namespace: string, jobName: string, kubeconfigPath
|
||||
if (!pod) return null;
|
||||
|
||||
const containerStatus = pod.status?.containerStatuses?.find((s) => s.name === "claude");
|
||||
return containerStatus?.state?.terminated?.exitCode ?? null;
|
||||
const terminated = containerStatus?.state?.terminated;
|
||||
if (!terminated) return null;
|
||||
return {
|
||||
exitCode: terminated.exitCode ?? null,
|
||||
reason: terminated.reason ?? null,
|
||||
message: (terminated.message ?? "").trim() || null,
|
||||
signal: terminated.signal ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a human-readable explanation for a truncated run, including the
|
||||
* pod's claude-container terminated state when available. Exit code 137
|
||||
* is annotated as SIGKILL/OOM since that is the most common cause.
|
||||
* Exported for unit tests.
|
||||
*/
|
||||
export function describeTruncationCause(
|
||||
state: PodTerminatedState | null,
|
||||
): string {
|
||||
if (!state) {
|
||||
return "pod state unavailable — likely deleted before exit could be read";
|
||||
}
|
||||
const parts: string[] = [];
|
||||
if (state.exitCode !== null) {
|
||||
parts.push(`exit code ${state.exitCode}`);
|
||||
if (state.exitCode === 137) parts.push("SIGKILL (commonly OOMKilled)");
|
||||
else if (state.exitCode === 143) parts.push("SIGTERM");
|
||||
} else {
|
||||
parts.push("no exit code");
|
||||
}
|
||||
if (state.signal !== null) parts.push(`signal ${state.signal}`);
|
||||
if (state.reason) parts.push(`reason=${state.reason}`);
|
||||
if (state.message) parts.push(`message=${state.message}`);
|
||||
return parts.join(", ");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -633,17 +742,40 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
||||
errorCode: "k8s_agent_id_invalid",
|
||||
};
|
||||
}
|
||||
// FAR-29: serialize guard+create per agent within this process to prevent the
|
||||
// TOCTOU race where two concurrent execute() calls both pass the list-then-create
|
||||
// guard and create K8s Jobs simultaneously on the shared PVC.
|
||||
const _prevCreation = agentCreationMutex.get(agentId) ?? Promise.resolve();
|
||||
let _releaseMutex: () => void = () => {};
|
||||
const _mutexSlot = new Promise<void>((resolve) => { _releaseMutex = resolve; });
|
||||
// Chain: next caller for this agent waits on _mutexSlot, which resolves in finally.
|
||||
agentCreationMutex.set(agentId, _prevCreation.then(() => _mutexSlot, () => _mutexSlot));
|
||||
// Wait for any prior execute() call to finish its guard+create phase.
|
||||
await _prevCreation.catch(() => {});
|
||||
|
||||
// Hoist declarations used in both the guard+create phase and the log-streaming
|
||||
// section so the mutex try/finally can be added without a large re-indent.
|
||||
let reattachTarget: { jobName: string; namespace: string; priorRunId: string; image: string } | null = null;
|
||||
// eslint-disable-next-line prefer-const
|
||||
let jobName!: string;
|
||||
// eslint-disable-next-line prefer-const
|
||||
let namespace!: string;
|
||||
let promptSecret: { name: string; namespace: string; data: Record<string, string> } | null = null;
|
||||
// runtimeSessionParams and currentSessionIdRaw are also used after the
|
||||
// try block (in the result-parsing section) so hoist them here.
|
||||
const runtimeSessionParams = parseObject(runtime.sessionParams);
|
||||
const currentSessionIdRaw = asString(runtimeSessionParams.sessionId, runtime.sessionId ?? "");
|
||||
const coreApi = getCoreApi(kubeconfigPath);
|
||||
const batchApi = getBatchApi(kubeconfigPath);
|
||||
|
||||
try {
|
||||
const selfPod = await getSelfPodInfo(kubeconfigPath);
|
||||
const guardNamespace = asString(config.namespace, "") || selfPod.namespace;
|
||||
const reattachOrphanedJobs = asBoolean(config.reattachOrphanedJobs, true);
|
||||
const runtimeSessionParams = parseObject(runtime.sessionParams);
|
||||
const currentSessionIdRaw = asString(runtimeSessionParams.sessionId, runtime.sessionId ?? "");
|
||||
const currentSessionLabel = currentSessionIdRaw ? sanitizeLabelValue(currentSessionIdRaw) : null;
|
||||
const currentTaskIdRaw = asString(ctx.context.taskId, "") || asString(ctx.context.issueId, "");
|
||||
const currentTaskLabel = currentTaskIdRaw ? sanitizeLabelValue(currentTaskIdRaw) : null;
|
||||
let reattachTarget: { jobName: string; namespace: string; priorRunId: string; image: string } | null = null;
|
||||
try {
|
||||
const batchApi = getBatchApi(kubeconfigPath);
|
||||
const existing = await batchApi.listNamespacedJob({
|
||||
namespace: guardNamespace,
|
||||
labelSelector: `paperclip.io/agent-id=${sanitizedAgentId},paperclip.io/adapter-type=claude_k8s`,
|
||||
@@ -763,19 +895,14 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
||||
};
|
||||
}
|
||||
|
||||
const coreApi = getCoreApi(kubeconfigPath);
|
||||
const batchApi = getBatchApi(kubeconfigPath);
|
||||
|
||||
let jobName: string;
|
||||
let namespace: string;
|
||||
let promptSecret: { name: string; namespace: string; data: Record<string, string> } | null = null;
|
||||
|
||||
// Prepare the prompt bundle (skills + instructions) on the server filesystem.
|
||||
// The K8s Job pod mounts the same PVC at /paperclip, so bundle paths written
|
||||
// here are accessible inside the pod at the identical absolute path.
|
||||
const skillEntries = await readPaperclipRuntimeSkillEntries(config, import.meta.dirname ?? __dirname);
|
||||
const desiredSkillNames = new Set(resolvePaperclipDesiredSkillNames(config, skillEntries));
|
||||
const desiredSkills = skillEntries.filter((e) => desiredSkillNames.has(e.key));
|
||||
const skillSummary = desiredSkills.length > 0 ? desiredSkills.map((s) => s.runtimeName ?? s.key).join(", ") : "none";
|
||||
await onLog("stdout", `[paperclip] Skills bundled (${desiredSkills.length}): ${skillSummary}\n`);
|
||||
const instructionsFilePath = asString(config.instructionsFilePath, "").trim();
|
||||
const instructionsFileDir = instructionsFilePath ? `${path.dirname(instructionsFilePath)}/` : "";
|
||||
let instructionsContents: string | null = null;
|
||||
@@ -872,6 +999,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
||||
`Image: ${job.spec?.template.spec?.containers[0]?.image ?? "unknown"}`,
|
||||
`Namespace: ${namespace}`,
|
||||
`Timeout: ${timeoutSec}s`,
|
||||
`Skills (${desiredSkills.length}): ${skillSummary}`,
|
||||
],
|
||||
prompt,
|
||||
...(promptMetrics ? { promptMetrics } : {}),
|
||||
@@ -967,14 +1095,23 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
||||
|
||||
await onLog("stdout", `[paperclip] Created K8s Job: ${jobName} in namespace ${namespace} (deadline: ${timeoutSec > 0 ? `${timeoutSec}s` : "none"})\n`);
|
||||
}
|
||||
} finally {
|
||||
// Release the per-agent creation mutex so the next queued execute() call
|
||||
// can proceed with its guard+create phase (FAR-29).
|
||||
_releaseMutex();
|
||||
}
|
||||
|
||||
let stdout = "";
|
||||
let exitCode: number | null = null;
|
||||
let podTerminatedState: PodTerminatedState | null = null;
|
||||
let jobTimedOut = false;
|
||||
let keepaliveTimer: ReturnType<typeof setInterval> | null = null;
|
||||
// Set when we return a mismatch error so the finally block knows not to
|
||||
// delete a job that is still alive and the UI is waiting on.
|
||||
let skipCleanup = false;
|
||||
// Set when the job disappeared (404) or grace-timer fired before we saw a
|
||||
// terminal condition — used to emit a clearer error when stdout parsing fails.
|
||||
let jobDeletedExternally = false;
|
||||
|
||||
const activeJobRef: ActiveJobRef = {
|
||||
namespace,
|
||||
@@ -1231,6 +1368,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
||||
// condition. The container must have exited first (TTL only fires after
|
||||
// completion), so log streaming has captured the full output — continue
|
||||
// to stdout parsing rather than returning an error.
|
||||
jobDeletedExternally = true;
|
||||
await onLog("stdout", `[paperclip] Job ${jobName} was deleted before terminal condition was observed (TTL or external deletion) — proceeding with captured output.\n`);
|
||||
}
|
||||
} else {
|
||||
@@ -1247,6 +1385,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
||||
} else if (actualState.jobGone) {
|
||||
// Job was deleted before we could confirm terminal state — same as the
|
||||
// fulfilled+jobGone case above: proceed with captured output.
|
||||
jobDeletedExternally = true;
|
||||
await onLog("stdout", `[paperclip] Job ${jobName} was deleted before terminal condition was observed (TTL or external deletion) — proceeding with captured output.\n`);
|
||||
} else if (!actualState.succeeded) {
|
||||
// Job still not terminal — the completion error was likely transient.
|
||||
@@ -1264,7 +1403,8 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
||||
}
|
||||
}
|
||||
|
||||
exitCode = await getPodExitCode(namespace, jobName, kubeconfigPath);
|
||||
podTerminatedState = await getPodTerminatedState(namespace, jobName, kubeconfigPath);
|
||||
exitCode = podTerminatedState?.exitCode ?? null;
|
||||
} finally {
|
||||
if (keepaliveTimer) clearInterval(keepaliveTimer);
|
||||
activeJobs.delete(activeJobRef);
|
||||
@@ -1314,11 +1454,43 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
||||
}
|
||||
|
||||
if (!parsed) {
|
||||
if (jobDeletedExternally && exitCode === null) {
|
||||
return {
|
||||
exitCode,
|
||||
signal: null,
|
||||
timedOut: false,
|
||||
errorMessage: "K8s Job was deleted externally before Claude could complete",
|
||||
errorCode: "k8s_job_deleted_externally",
|
||||
resultJson: { stdout },
|
||||
};
|
||||
}
|
||||
if (parsedStream.llmApiEmptyResponse) {
|
||||
return {
|
||||
exitCode,
|
||||
signal: null,
|
||||
timedOut: false,
|
||||
errorMessage: "LLM API returned an empty response (stop_reason: null, output_tokens: 0) — the upstream model API may be degraded or misconfigured",
|
||||
errorCode: "llm_api_error",
|
||||
resultJson: { stdout },
|
||||
};
|
||||
}
|
||||
if (parsedStream.truncatedMidStream) {
|
||||
const cause = describeTruncationCause(podTerminatedState);
|
||||
const modelHint = parsedStream.model ? ` (model: ${parsedStream.model})` : "";
|
||||
return {
|
||||
exitCode,
|
||||
signal: null,
|
||||
timedOut: false,
|
||||
errorMessage: `Claude run was truncated mid-stream${modelHint} — assistant produced content but no result event arrived; ${cause}`,
|
||||
errorCode: "claude_truncated",
|
||||
resultJson: { stdout },
|
||||
};
|
||||
}
|
||||
return {
|
||||
exitCode,
|
||||
signal: null,
|
||||
timedOut: false,
|
||||
errorMessage: buildPartialRunError(exitCode, parsedStream.model, stdout),
|
||||
errorMessage: buildPartialRunError(exitCode, parsedStream.model, stdout, podTerminatedState),
|
||||
resultJson: { stdout },
|
||||
};
|
||||
}
|
||||
|
||||
@@ -154,6 +154,131 @@ more raw output`;
|
||||
// Should not be "Hello world\n\nHello world"
|
||||
expect(result.summary.split("Hello world").length).toBe(2);
|
||||
});
|
||||
|
||||
it("sets llmApiEmptyResponse=true when stop_reason:null and usage.output_tokens:0", () => {
|
||||
const initLine = JSON.stringify({ type: "system", subtype: "init", model: "MiniMax-M2.7", session_id: "sess_1" });
|
||||
const assistantEvent = JSON.stringify({
|
||||
type: "assistant",
|
||||
session_id: "sess_1",
|
||||
message: {
|
||||
id: "msg_abc",
|
||||
stop_reason: null,
|
||||
usage: { input_tokens: 100, output_tokens: 0, cache_creation_input_tokens: 0, cache_read_input_tokens: 0 },
|
||||
content: [],
|
||||
},
|
||||
});
|
||||
const result = parseClaudeStreamJson([initLine, assistantEvent].join("\n"));
|
||||
expect(result.llmApiEmptyResponse).toBe(true);
|
||||
expect(result.resultJson).toBeNull();
|
||||
});
|
||||
|
||||
it("sets llmApiEmptyResponse=true when stop_reason:null and message-level output_tokens:0", () => {
|
||||
const assistantEvent = JSON.stringify({
|
||||
type: "assistant",
|
||||
message: { stop_reason: null, output_tokens: 0, content: [] },
|
||||
});
|
||||
const result = parseClaudeStreamJson(assistantEvent);
|
||||
expect(result.llmApiEmptyResponse).toBe(true);
|
||||
});
|
||||
|
||||
it("does not set llmApiEmptyResponse when stop_reason is non-null", () => {
|
||||
const assistantEvent = JSON.stringify({
|
||||
type: "assistant",
|
||||
message: {
|
||||
stop_reason: "end_turn",
|
||||
usage: { output_tokens: 0 },
|
||||
content: [],
|
||||
},
|
||||
});
|
||||
const result = parseClaudeStreamJson(assistantEvent);
|
||||
expect(result.llmApiEmptyResponse).toBe(false);
|
||||
});
|
||||
|
||||
it("does not set llmApiEmptyResponse when output_tokens > 0", () => {
|
||||
const assistantEvent = JSON.stringify({
|
||||
type: "assistant",
|
||||
message: {
|
||||
stop_reason: null,
|
||||
usage: { output_tokens: 5 },
|
||||
content: [{ type: "text", text: "hello" }],
|
||||
},
|
||||
});
|
||||
const result = parseClaudeStreamJson(assistantEvent);
|
||||
expect(result.llmApiEmptyResponse).toBe(false);
|
||||
});
|
||||
|
||||
it("clears llmApiEmptyResponse when a result event follows the empty assistant event", () => {
|
||||
const assistantEvent = JSON.stringify({
|
||||
type: "assistant",
|
||||
message: { stop_reason: null, usage: { output_tokens: 0 }, content: [] },
|
||||
});
|
||||
const resultEvent = JSON.stringify({
|
||||
type: "result",
|
||||
result: "Done",
|
||||
subtype: "stop",
|
||||
total_cost_usd: 0.001,
|
||||
usage: { input_tokens: 10, output_tokens: 5, cache_read_input_tokens: 0 },
|
||||
});
|
||||
const result = parseClaudeStreamJson([assistantEvent, resultEvent].join("\n"));
|
||||
expect(result.llmApiEmptyResponse).toBe(false);
|
||||
expect(result.resultJson).not.toBeNull();
|
||||
});
|
||||
|
||||
it("sets truncatedMidStream=true when assistant event with output_tokens>0 has no result (FAR-95)", () => {
|
||||
const initLine = JSON.stringify({ type: "system", subtype: "init", model: "claude-opus-4-7", session_id: "sess_1" });
|
||||
const assistantEvent = JSON.stringify({
|
||||
type: "assistant",
|
||||
session_id: "sess_1",
|
||||
message: {
|
||||
id: "msg_abc",
|
||||
stop_reason: null,
|
||||
usage: { input_tokens: 1, output_tokens: 35, cache_creation_input_tokens: 523, cache_read_input_tokens: 46295 },
|
||||
content: [{ type: "tool_use", id: "tool_1", name: "Bash", input: { command: "echo hi" } }],
|
||||
},
|
||||
});
|
||||
const result = parseClaudeStreamJson([initLine, assistantEvent].join("\n"));
|
||||
expect(result.truncatedMidStream).toBe(true);
|
||||
expect(result.llmApiEmptyResponse).toBe(false);
|
||||
expect(result.resultJson).toBeNull();
|
||||
});
|
||||
|
||||
it("clears truncatedMidStream when a result event follows assistant content", () => {
|
||||
const assistantEvent = JSON.stringify({
|
||||
type: "assistant",
|
||||
message: { stop_reason: null, usage: { output_tokens: 35 }, content: [] },
|
||||
});
|
||||
const resultEvent = JSON.stringify({
|
||||
type: "result",
|
||||
result: "Done",
|
||||
subtype: "stop",
|
||||
total_cost_usd: 0.001,
|
||||
usage: { input_tokens: 10, output_tokens: 5, cache_read_input_tokens: 0 },
|
||||
});
|
||||
const result = parseClaudeStreamJson([assistantEvent, resultEvent].join("\n"));
|
||||
expect(result.truncatedMidStream).toBe(false);
|
||||
expect(result.resultJson).not.toBeNull();
|
||||
});
|
||||
|
||||
it("does not set truncatedMidStream when assistant has output_tokens=0", () => {
|
||||
const assistantEvent = JSON.stringify({
|
||||
type: "assistant",
|
||||
message: { stop_reason: null, usage: { output_tokens: 0 }, content: [] },
|
||||
});
|
||||
const result = parseClaudeStreamJson(assistantEvent);
|
||||
expect(result.truncatedMidStream).toBe(false);
|
||||
});
|
||||
|
||||
it("sets llmApiEmptyResponse=false for normal result", () => {
|
||||
const resultEvent = JSON.stringify({
|
||||
type: "result",
|
||||
result: "Done",
|
||||
subtype: "stop",
|
||||
total_cost_usd: 0.005,
|
||||
usage: { input_tokens: 100, output_tokens: 200, cache_read_input_tokens: 50 },
|
||||
});
|
||||
const result = parseClaudeStreamJson(resultEvent);
|
||||
expect(result.llmApiEmptyResponse).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractClaudeLoginUrl", () => {
|
||||
|
||||
@@ -15,6 +15,14 @@ export function parseClaudeStreamJson(stdout: string) {
|
||||
// at the line level; this guard only needs to protect against the same
|
||||
// message block being parsed twice.
|
||||
const seenBlocks = new Set<string>();
|
||||
// Set when we see stop_reason:null + output_tokens:0 on an assistant event
|
||||
// with no subsequent result event — indicates the upstream LLM API returned
|
||||
// an empty/malformed response (e.g. MiniMax degraded performance).
|
||||
let llmApiEmptyResponse = false;
|
||||
// Set when an assistant event with output_tokens > 0 was seen but no result
|
||||
// event arrived — indicates the run was truncated mid-stream (pod terminated,
|
||||
// OOMKill, or claude CLI crash after producing content).
|
||||
let assistantContentSeen = false;
|
||||
|
||||
for (const rawLine of stdout.split(/\r?\n/)) {
|
||||
const line = rawLine.trim();
|
||||
@@ -34,6 +42,21 @@ export function parseClaudeStreamJson(stdout: string) {
|
||||
const message = parseObject(event.message);
|
||||
const messageId = asString(message.id, "");
|
||||
const content = Array.isArray(message.content) ? message.content : [];
|
||||
|
||||
// Detect empty LLM API response: stop_reason:null with zero output tokens.
|
||||
// output_tokens may appear directly on message or nested under message.usage.
|
||||
const stopReason = message.stop_reason;
|
||||
const usageObj = parseObject(message.usage as Record<string, unknown>);
|
||||
const outputTokens = typeof message.output_tokens === "number"
|
||||
? message.output_tokens
|
||||
: asNumber(usageObj.output_tokens, -1);
|
||||
if (stopReason === null && outputTokens === 0) {
|
||||
llmApiEmptyResponse = true;
|
||||
}
|
||||
if (outputTokens > 0) {
|
||||
assistantContentSeen = true;
|
||||
}
|
||||
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
const entry = content[i];
|
||||
if (typeof entry !== "object" || entry === null || Array.isArray(entry)) continue;
|
||||
@@ -55,6 +78,8 @@ export function parseClaudeStreamJson(stdout: string) {
|
||||
|
||||
if (type === "result") {
|
||||
finalResult = event;
|
||||
llmApiEmptyResponse = false; // result event means Claude completed normally
|
||||
assistantContentSeen = false; // result event means stream was not truncated
|
||||
sessionId = asString(event.session_id, sessionId ?? "") || sessionId;
|
||||
}
|
||||
}
|
||||
@@ -67,6 +92,8 @@ export function parseClaudeStreamJson(stdout: string) {
|
||||
usage: null as UsageSummary | null,
|
||||
summary: assistantTexts.join("\n\n").trim(),
|
||||
resultJson: null as Record<string, unknown> | null,
|
||||
llmApiEmptyResponse,
|
||||
truncatedMidStream: assistantContentSeen,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -87,6 +114,8 @@ export function parseClaudeStreamJson(stdout: string) {
|
||||
usage,
|
||||
summary,
|
||||
resultJson: finalResult,
|
||||
llmApiEmptyResponse: false,
|
||||
truncatedMidStream: false,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user