Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 3169f49f23 | |||
| e0b35d230f | |||
| 4e2c36319d | |||
| 8474f78fe1 |
Generated
+2
-2
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "paperclip-adapter-claude-k8s",
|
"name": "paperclip-adapter-claude-k8s",
|
||||||
"version": "0.1.45",
|
"version": "0.1.47",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "paperclip-adapter-claude-k8s",
|
"name": "paperclip-adapter-claude-k8s",
|
||||||
"version": "0.1.45",
|
"version": "0.1.47",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@kubernetes/client-node": "^1.0.0",
|
"@kubernetes/client-node": "^1.0.0",
|
||||||
|
|||||||
+1
-1
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "paperclip-adapter-claude-k8s",
|
"name": "paperclip-adapter-claude-k8s",
|
||||||
"version": "0.1.45",
|
"version": "0.1.47",
|
||||||
"description": "Paperclip adapter plugin that runs Claude Code agents as Kubernetes Jobs",
|
"description": "Paperclip adapter plugin that runs Claude Code agents as Kubernetes Jobs",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"repository": {
|
"repository": {
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ vi.mock("@paperclipai/adapter-utils/server-utils", async (importOriginal) => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
const { isK8s404, buildPartialRunError, classifyOrphan, describePodTerminatedError, streamPodLogsOnce, shouldAbortForCancellation, execute } = await import("./execute.js");
|
const { isK8s404, buildPartialRunError, classifyOrphan, describePodTerminatedError, describeTruncationCause, streamPodLogsOnce, shouldAbortForCancellation, execute } = await import("./execute.js");
|
||||||
|
|
||||||
function makeJob(opts: {
|
function makeJob(opts: {
|
||||||
runId?: string;
|
runId?: string;
|
||||||
@@ -150,10 +150,10 @@ describe("buildPartialRunError", () => {
|
|||||||
expect(buildPartialRunError(null, "", "")).toBe("Claude exited with code -1");
|
expect(buildPartialRunError(null, "", "")).toBe("Claude exited with code -1");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("skips system/init events and returns generic message when only init captured", () => {
|
it("returns init-only message when stdout is init-only with non-zero exit code (FAR-101)", () => {
|
||||||
const msg = buildPartialRunError(1, "claude-sonnet-4-6", initLine);
|
const msg = buildPartialRunError(1, "claude-sonnet-4-6", initLine);
|
||||||
expect(msg).toBe(
|
expect(msg).toBe(
|
||||||
"Claude started but did not produce a result (model: claude-sonnet-4-6) — check API credentials, model support, and adapter config",
|
"Claude exited immediately after init (model: claude-sonnet-4-6) (exit code 1) — the model may be unsupported or the session may have been rejected before producing output",
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -170,15 +170,15 @@ describe("buildPartialRunError", () => {
|
|||||||
expect(msg).toBe("Claude exited with code 1: Error: no API key configured");
|
expect(msg).toBe("Claude exited with code 1: Error: no API key configured");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("skips result events (structured protocol artefact — not surfaced verbatim)", () => {
|
it("returns init-only message when stdout has init + result event but no plain content (structured artefact, not surfaced verbatim)", () => {
|
||||||
// In production, buildPartialRunError is only called when parseClaudeStreamJson
|
// In production, buildPartialRunError is only called when parseClaudeStreamJson
|
||||||
// returns null (no result event). If somehow a result event appears here, the
|
// returns null (no result event). If somehow a result event appears here, the
|
||||||
// raw JSON blob must not be shown — the "did not produce a result" message is
|
// raw JSON blob must not be shown — the init-only message is cleaner and avoids
|
||||||
// cleaner and avoids leaking protocol internals to the UI.
|
// leaking protocol internals to the UI.
|
||||||
const resultLike = JSON.stringify({ type: "result", subtype: "error", result: "rate limit" });
|
const resultLike = JSON.stringify({ type: "result", subtype: "error", result: "rate limit" });
|
||||||
const stdout = [initLine, resultLike].join("\n");
|
const stdout = [initLine, resultLike].join("\n");
|
||||||
const msg = buildPartialRunError(2, "claude-sonnet-4-6", stdout);
|
const msg = buildPartialRunError(2, "claude-sonnet-4-6", stdout);
|
||||||
expect(msg).toContain("did not produce a result");
|
expect(msg).toContain("Claude exited immediately after init");
|
||||||
expect(msg).toContain("claude-sonnet-4-6");
|
expect(msg).toContain("claude-sonnet-4-6");
|
||||||
expect(msg).not.toMatch(/\{.*type.*result/);
|
expect(msg).not.toMatch(/\{.*type.*result/);
|
||||||
});
|
});
|
||||||
@@ -362,6 +362,33 @@ describe("describePodTerminatedError", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("describeTruncationCause", () => {
|
||||||
|
it("annotates exit code 137 as SIGKILL/OOM", () => {
|
||||||
|
const msg = describeTruncationCause({ exitCode: 137, reason: "OOMKilled", message: "Memory cgroup out of memory", signal: null });
|
||||||
|
expect(msg).toContain("exit code 137");
|
||||||
|
expect(msg).toContain("SIGKILL");
|
||||||
|
expect(msg).toContain("OOMKilled");
|
||||||
|
expect(msg).toContain("Memory cgroup out of memory");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("annotates exit code 143 as SIGTERM", () => {
|
||||||
|
const msg = describeTruncationCause({ exitCode: 143, reason: null, message: null, signal: null });
|
||||||
|
expect(msg).toContain("exit code 143");
|
||||||
|
expect(msg).toContain("SIGTERM");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("falls back to 'pod state unavailable' when state is null", () => {
|
||||||
|
const msg = describeTruncationCause(null);
|
||||||
|
expect(msg).toContain("pod state unavailable");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("emits 'no exit code' when exitCode is null but state exists", () => {
|
||||||
|
const msg = describeTruncationCause({ exitCode: null, reason: "Error", message: null, signal: null });
|
||||||
|
expect(msg).toContain("no exit code");
|
||||||
|
expect(msg).toContain("reason=Error");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe("execute: all-invalid agent.id (N4)", () => {
|
describe("execute: all-invalid agent.id (N4)", () => {
|
||||||
it("returns hard error without creating a Job when agent.id sanitizes to null", async () => {
|
it("returns hard error without creating a Job when agent.id sanitizes to null", async () => {
|
||||||
const logs: string[] = [];
|
const logs: string[] = [];
|
||||||
@@ -1019,7 +1046,7 @@ describe("execute: happy path", () => {
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
mockCoreListPods.mockResolvedValue({
|
mockCoreListPods.mockResolvedValue({
|
||||||
items: [{ metadata: { name: "pod-abc" }, status: { containerStatuses: [{ name: "claude", state: { terminated: { exitCode: 137 } } }] } }],
|
items: [{ metadata: { name: "pod-abc" }, status: { containerStatuses: [{ name: "claude", state: { terminated: { exitCode: 137, reason: "OOMKilled", message: "Memory cgroup out of memory" } } }] } }],
|
||||||
});
|
});
|
||||||
|
|
||||||
const executePromise = execute(makeCtx());
|
const executePromise = execute(makeCtx());
|
||||||
@@ -1030,6 +1057,9 @@ describe("execute: happy path", () => {
|
|||||||
expect(result.errorMessage).toContain("truncated mid-stream");
|
expect(result.errorMessage).toContain("truncated mid-stream");
|
||||||
expect(result.errorMessage).toContain("claude-opus-4-7");
|
expect(result.errorMessage).toContain("claude-opus-4-7");
|
||||||
expect(result.errorMessage).toContain("exit code 137");
|
expect(result.errorMessage).toContain("exit code 137");
|
||||||
|
expect(result.errorMessage).toContain("SIGKILL");
|
||||||
|
expect(result.errorMessage).toContain("OOMKilled");
|
||||||
|
expect(result.errorMessage).toContain("Memory cgroup out of memory");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("reconnects log stream and logs status when job completion takes > 3s", async () => {
|
it("reconnects log stream and logs status when job completion takes > 3s", async () => {
|
||||||
|
|||||||
+105
-25
@@ -110,24 +110,12 @@ export function shouldAbortForCancellation(runStatus: string | undefined): boole
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build the error message when Claude's stdout contains no result event.
|
* Returns the first non-JSON/plain-text line in stdout, treating JSON objects
|
||||||
* Skips system/init event lines so the UI doesn't display the raw init JSON.
|
* with a "type" field as protocol artefacts and skipping them.
|
||||||
* Exported for unit tests.
|
* Used by buildPartialRunError to detect init-only runs.
|
||||||
*/
|
*/
|
||||||
export function buildPartialRunError(
|
function firstContentLine(stdout: string): string {
|
||||||
exitCode: number | null,
|
return stdout.split(/\r?\n/)
|
||||||
model: string,
|
|
||||||
stdout: string,
|
|
||||||
): string {
|
|
||||||
if (exitCode === 0) return "Failed to parse Claude JSON output";
|
|
||||||
|
|
||||||
// Walk stdout lines and skip every structured streaming event (any JSON
|
|
||||||
// object that carries a non-empty "type" field: system, assistant, user,
|
|
||||||
// rate_limit_event, result, …). All of these are protocol artefacts and
|
|
||||||
// produce confusing raw-JSON blobs when surfaced verbatim as an error
|
|
||||||
// message. Only plain-text lines (non-JSON, or JSON without a type field)
|
|
||||||
// are treated as human-readable content worth including in the error.
|
|
||||||
const firstContentLine = stdout.split(/\r?\n/)
|
|
||||||
.map((l) => l.trim())
|
.map((l) => l.trim())
|
||||||
.find((l) => {
|
.find((l) => {
|
||||||
if (!l) return false;
|
if (!l) return false;
|
||||||
@@ -142,19 +130,55 @@ export function buildPartialRunError(
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}) ?? "";
|
}) ?? "";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true when stdout contains only init/system/assistant events from the
|
||||||
|
* given model with no human-readable content lines. Used to detect init-only
|
||||||
|
* non-zero-exit runs that should be classified as claude_init_failed rather than
|
||||||
|
* the generic "Claude exited with code N" message.
|
||||||
|
*/
|
||||||
|
function isInitOnlyRun(model: string, stdout: string): boolean {
|
||||||
|
if (!stdout.trim() || !model) return false;
|
||||||
|
const content = firstContentLine(stdout);
|
||||||
|
if (content) return false;
|
||||||
|
// Check that at least the init event for this model was seen
|
||||||
|
const hasModelInit = stdout.includes(`"model":"${model}"`) || stdout.includes(`"model":"${model.replace(/-/g, "_")}"`);
|
||||||
|
return hasModelInit;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the error message when Claude's stdout contains no result event.
|
||||||
|
* Skips system/init event lines so the UI doesn't display the raw init JSON.
|
||||||
|
* Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export function buildPartialRunError(
|
||||||
|
exitCode: number | null,
|
||||||
|
model: string,
|
||||||
|
stdout: string,
|
||||||
|
): string {
|
||||||
|
if (exitCode === 0) return "Failed to parse Claude JSON output";
|
||||||
|
|
||||||
// If the stream contained only structured events with no plain-text output,
|
// If the stream contained only structured events with no plain-text output,
|
||||||
// surface the model name so the operator can diagnose missing credentials
|
// surface the model name so the operator can diagnose missing credentials
|
||||||
// or unsupported/misconfigured model.
|
// or unsupported/misconfigured model.
|
||||||
const initOnlyOutput = stdout.trim() !== "" && model !== "" && !firstContentLine;
|
const contentLine = firstContentLine(stdout);
|
||||||
|
if (contentLine) {
|
||||||
|
return `Claude exited with code ${exitCode ?? -1}: ${contentLine}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isInitOnlyRun(model, stdout) && (exitCode ?? 0) !== 0) {
|
||||||
|
const modelHint = model ? ` (model: ${model})` : "";
|
||||||
|
return `Claude exited immediately after init${modelHint} (exit code ${exitCode ?? -1}) — the model may be unsupported or the session may have been rejected before producing output`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const initOnlyOutput = stdout.trim() !== "" && model !== "";
|
||||||
if (initOnlyOutput) {
|
if (initOnlyOutput) {
|
||||||
const modelHint = model ? ` (model: ${model})` : "";
|
const modelHint = model ? ` (model: ${model})` : "";
|
||||||
return `Claude started but did not produce a result${modelHint} — check API credentials, model support, and adapter config`;
|
return `Claude started but did not produce a result${modelHint} — check API credentials, model support, and adapter config`;
|
||||||
}
|
}
|
||||||
|
|
||||||
return firstContentLine
|
return `Claude exited with code ${exitCode ?? -1}`;
|
||||||
? `Claude exited with code ${exitCode ?? -1}: ${firstContentLine}`
|
|
||||||
: `Claude exited with code ${exitCode ?? -1}`;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export type OrphanClassification =
|
export type OrphanClassification =
|
||||||
@@ -574,6 +598,27 @@ async function waitForJobCompletion(
|
|||||||
* Get the exit code from the Job's pod.
|
* Get the exit code from the Job's pod.
|
||||||
*/
|
*/
|
||||||
async function getPodExitCode(namespace: string, jobName: string, kubeconfigPath?: string): Promise<number | null> {
|
async function getPodExitCode(namespace: string, jobName: string, kubeconfigPath?: string): Promise<number | null> {
|
||||||
|
const state = await getPodTerminatedState(namespace, jobName, kubeconfigPath);
|
||||||
|
return state?.exitCode ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the claude container's terminated state (exit code, reason, message,
|
||||||
|
* signal) from the Job's pod. Returns null if the pod or container is gone.
|
||||||
|
* Used by the no-result error path to explain *why* a run was truncated.
|
||||||
|
*/
|
||||||
|
export interface PodTerminatedState {
|
||||||
|
exitCode: number | null;
|
||||||
|
reason: string | null;
|
||||||
|
message: string | null;
|
||||||
|
signal: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getPodTerminatedState(
|
||||||
|
namespace: string,
|
||||||
|
jobName: string,
|
||||||
|
kubeconfigPath?: string,
|
||||||
|
): Promise<PodTerminatedState | null> {
|
||||||
const coreApi = getCoreApi(kubeconfigPath);
|
const coreApi = getCoreApi(kubeconfigPath);
|
||||||
const podList = await coreApi.listNamespacedPod({
|
const podList = await coreApi.listNamespacedPod({
|
||||||
namespace,
|
namespace,
|
||||||
@@ -583,7 +628,40 @@ async function getPodExitCode(namespace: string, jobName: string, kubeconfigPath
|
|||||||
if (!pod) return null;
|
if (!pod) return null;
|
||||||
|
|
||||||
const containerStatus = pod.status?.containerStatuses?.find((s) => s.name === "claude");
|
const containerStatus = pod.status?.containerStatuses?.find((s) => s.name === "claude");
|
||||||
return containerStatus?.state?.terminated?.exitCode ?? null;
|
const terminated = containerStatus?.state?.terminated;
|
||||||
|
if (!terminated) return null;
|
||||||
|
return {
|
||||||
|
exitCode: terminated.exitCode ?? null,
|
||||||
|
reason: terminated.reason ?? null,
|
||||||
|
message: (terminated.message ?? "").trim() || null,
|
||||||
|
signal: terminated.signal ?? null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format a human-readable explanation for a truncated run, including the
|
||||||
|
* pod's claude-container terminated state when available. Exit code 137
|
||||||
|
* is annotated as SIGKILL/OOM since that is the most common cause.
|
||||||
|
* Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export function describeTruncationCause(
|
||||||
|
state: PodTerminatedState | null,
|
||||||
|
): string {
|
||||||
|
if (!state) {
|
||||||
|
return "pod state unavailable — likely deleted before exit could be read";
|
||||||
|
}
|
||||||
|
const parts: string[] = [];
|
||||||
|
if (state.exitCode !== null) {
|
||||||
|
parts.push(`exit code ${state.exitCode}`);
|
||||||
|
if (state.exitCode === 137) parts.push("SIGKILL (commonly OOMKilled)");
|
||||||
|
else if (state.exitCode === 143) parts.push("SIGTERM");
|
||||||
|
} else {
|
||||||
|
parts.push("no exit code");
|
||||||
|
}
|
||||||
|
if (state.signal !== null) parts.push(`signal ${state.signal}`);
|
||||||
|
if (state.reason) parts.push(`reason=${state.reason}`);
|
||||||
|
if (state.message) parts.push(`message=${state.message}`);
|
||||||
|
return parts.join(", ");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -998,6 +1076,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
|
|
||||||
let stdout = "";
|
let stdout = "";
|
||||||
let exitCode: number | null = null;
|
let exitCode: number | null = null;
|
||||||
|
let podTerminatedState: PodTerminatedState | null = null;
|
||||||
let jobTimedOut = false;
|
let jobTimedOut = false;
|
||||||
let keepaliveTimer: ReturnType<typeof setInterval> | null = null;
|
let keepaliveTimer: ReturnType<typeof setInterval> | null = null;
|
||||||
// Set when we return a mismatch error so the finally block knows not to
|
// Set when we return a mismatch error so the finally block knows not to
|
||||||
@@ -1297,7 +1376,8 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
exitCode = await getPodExitCode(namespace, jobName, kubeconfigPath);
|
podTerminatedState = await getPodTerminatedState(namespace, jobName, kubeconfigPath);
|
||||||
|
exitCode = podTerminatedState?.exitCode ?? null;
|
||||||
} finally {
|
} finally {
|
||||||
if (keepaliveTimer) clearInterval(keepaliveTimer);
|
if (keepaliveTimer) clearInterval(keepaliveTimer);
|
||||||
activeJobs.delete(activeJobRef);
|
activeJobs.delete(activeJobRef);
|
||||||
@@ -1368,13 +1448,13 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
if (parsedStream.truncatedMidStream) {
|
if (parsedStream.truncatedMidStream) {
|
||||||
const exitHint = exitCode === null ? "no exit code" : `exit code ${exitCode}`;
|
const cause = describeTruncationCause(podTerminatedState);
|
||||||
const modelHint = parsedStream.model ? ` (model: ${parsedStream.model})` : "";
|
const modelHint = parsedStream.model ? ` (model: ${parsedStream.model})` : "";
|
||||||
return {
|
return {
|
||||||
exitCode,
|
exitCode,
|
||||||
signal: null,
|
signal: null,
|
||||||
timedOut: false,
|
timedOut: false,
|
||||||
errorMessage: `Claude run was truncated mid-stream${modelHint} — assistant produced content but no result event arrived (${exitHint}); pod may have been terminated, OOMKilled, or the CLI crashed`,
|
errorMessage: `Claude run was truncated mid-stream${modelHint} — assistant produced content but no result event arrived; ${cause}`,
|
||||||
errorCode: "claude_truncated",
|
errorCode: "claude_truncated",
|
||||||
resultJson: { stdout },
|
resultJson: { stdout },
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user