Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1d894f104f | |||
| fc3866924a | |||
| 368254d75d | |||
| 34756f8215 | |||
| 07ef106c66 | |||
| fd7dce7239 | |||
| b1878c684e | |||
| 83e105393c | |||
| 49288fa5c7 | |||
| dae9e18659 | |||
| 6923597b31 | |||
| d184a1732b | |||
| be84428226 | |||
| d9928030d6 | |||
| 76fc6fcdfc | |||
| 3169f49f23 | |||
| e0b35d230f | |||
| 4e2c36319d | |||
| 8474f78fe1 |
Generated
+2
-2
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "paperclip-adapter-claude-k8s",
|
"name": "paperclip-adapter-claude-k8s",
|
||||||
"version": "0.1.45",
|
"version": "0.1.54",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "paperclip-adapter-claude-k8s",
|
"name": "paperclip-adapter-claude-k8s",
|
||||||
"version": "0.1.45",
|
"version": "0.1.54",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@kubernetes/client-node": "^1.0.0",
|
"@kubernetes/client-node": "^1.0.0",
|
||||||
|
|||||||
+1
-1
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "paperclip-adapter-claude-k8s",
|
"name": "paperclip-adapter-claude-k8s",
|
||||||
"version": "0.1.45",
|
"version": "0.1.55",
|
||||||
"description": "Paperclip adapter plugin that runs Claude Code agents as Kubernetes Jobs",
|
"description": "Paperclip adapter plugin that runs Claude Code agents as Kubernetes Jobs",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"repository": {
|
"repository": {
|
||||||
|
|||||||
+2
-1
@@ -1,7 +1,8 @@
|
|||||||
export const type = "claude_k8s";
|
export const type = "claude_k8s";
|
||||||
export const label = "Claude (Kubernetes)";
|
export const label = "Claude (Kubernetes)";
|
||||||
|
|
||||||
export const models: undefined = undefined;
|
import { DIRECT_MODELS, BEDROCK_MODELS, isBedrockEnv } from "./server/models.js";
|
||||||
|
export const models = isBedrockEnv() ? BEDROCK_MODELS : DIRECT_MODELS;
|
||||||
|
|
||||||
export const agentConfigurationDoc = `# claude_k8s agent configuration
|
export const agentConfigurationDoc = `# claude_k8s agent configuration
|
||||||
|
|
||||||
|
|||||||
+148
-21
@@ -60,7 +60,7 @@ vi.mock("@paperclipai/adapter-utils/server-utils", async (importOriginal) => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
const { isK8s404, buildPartialRunError, classifyOrphan, describePodTerminatedError, streamPodLogsOnce, shouldAbortForCancellation, execute } = await import("./execute.js");
|
const { isK8s404, buildPartialRunError, classifyOrphan, describePodTerminatedError, describeTruncationCause, streamPodLogsOnce, shouldAbortForCancellation, execute } = await import("./execute.js");
|
||||||
|
|
||||||
function makeJob(opts: {
|
function makeJob(opts: {
|
||||||
runId?: string;
|
runId?: string;
|
||||||
@@ -150,10 +150,10 @@ describe("buildPartialRunError", () => {
|
|||||||
expect(buildPartialRunError(null, "", "")).toBe("Claude exited with code -1");
|
expect(buildPartialRunError(null, "", "")).toBe("Claude exited with code -1");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("skips system/init events and returns generic message when only init captured", () => {
|
it("returns init-only message when stdout is init-only with non-zero exit code (FAR-101)", () => {
|
||||||
const msg = buildPartialRunError(1, "claude-sonnet-4-6", initLine);
|
const msg = buildPartialRunError(1, "claude-sonnet-4-6", initLine);
|
||||||
expect(msg).toBe(
|
expect(msg).toBe(
|
||||||
"Claude started but did not produce a result (model: claude-sonnet-4-6) — check API credentials, model support, and adapter config",
|
"Claude exited immediately after init (model: claude-sonnet-4-6) (exit code 1) — the model may be unsupported or the session may have been rejected before producing output",
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -170,15 +170,15 @@ describe("buildPartialRunError", () => {
|
|||||||
expect(msg).toBe("Claude exited with code 1: Error: no API key configured");
|
expect(msg).toBe("Claude exited with code 1: Error: no API key configured");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("skips result events (structured protocol artefact — not surfaced verbatim)", () => {
|
it("returns init-only message when stdout has init + result event but no plain content (structured artefact, not surfaced verbatim)", () => {
|
||||||
// In production, buildPartialRunError is only called when parseClaudeStreamJson
|
// In production, buildPartialRunError is only called when parseClaudeStreamJson
|
||||||
// returns null (no result event). If somehow a result event appears here, the
|
// returns null (no result event). If somehow a result event appears here, the
|
||||||
// raw JSON blob must not be shown — the "did not produce a result" message is
|
// raw JSON blob must not be shown — the init-only message is cleaner and avoids
|
||||||
// cleaner and avoids leaking protocol internals to the UI.
|
// leaking protocol internals to the UI.
|
||||||
const resultLike = JSON.stringify({ type: "result", subtype: "error", result: "rate limit" });
|
const resultLike = JSON.stringify({ type: "result", subtype: "error", result: "rate limit" });
|
||||||
const stdout = [initLine, resultLike].join("\n");
|
const stdout = [initLine, resultLike].join("\n");
|
||||||
const msg = buildPartialRunError(2, "claude-sonnet-4-6", stdout);
|
const msg = buildPartialRunError(2, "claude-sonnet-4-6", stdout);
|
||||||
expect(msg).toContain("did not produce a result");
|
expect(msg).toContain("Claude exited immediately after init");
|
||||||
expect(msg).toContain("claude-sonnet-4-6");
|
expect(msg).toContain("claude-sonnet-4-6");
|
||||||
expect(msg).not.toMatch(/\{.*type.*result/);
|
expect(msg).not.toMatch(/\{.*type.*result/);
|
||||||
});
|
});
|
||||||
@@ -245,6 +245,44 @@ describe("buildPartialRunError", () => {
|
|||||||
const msg = buildPartialRunError(1, "model-x", stdout);
|
const msg = buildPartialRunError(1, "model-x", stdout);
|
||||||
expect(msg).toBe("Claude exited with code 1: real error line");
|
expect(msg).toBe("Claude exited with code 1: real error line");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("appends pod terminated reason/message when state is provided (FAR-100)", () => {
|
||||||
|
const msg = buildPartialRunError(1, "claude-sonnet-4-6", initLine, {
|
||||||
|
exitCode: 1,
|
||||||
|
reason: "Error",
|
||||||
|
message: "model not supported",
|
||||||
|
signal: null,
|
||||||
|
});
|
||||||
|
expect(msg).toContain("Claude exited immediately after init");
|
||||||
|
expect(msg).toContain("claude-sonnet-4-6");
|
||||||
|
expect(msg).toContain("[pod: reason=Error, message=model not supported]");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("flags exit 137 as OOMKilled in pod cause", () => {
|
||||||
|
const msg = buildPartialRunError(137, "claude-sonnet-4-6", initLine, {
|
||||||
|
exitCode: 137,
|
||||||
|
reason: "OOMKilled",
|
||||||
|
message: null,
|
||||||
|
signal: null,
|
||||||
|
});
|
||||||
|
expect(msg).toContain("[pod: reason=OOMKilled, SIGKILL (commonly OOMKilled)]");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("appends pod cause to content-line message", () => {
|
||||||
|
const stdout = [initLine, "Error: bad request"].join("\n");
|
||||||
|
const msg = buildPartialRunError(1, "claude-sonnet-4-6", stdout, {
|
||||||
|
exitCode: 1,
|
||||||
|
reason: "Error",
|
||||||
|
message: null,
|
||||||
|
signal: null,
|
||||||
|
});
|
||||||
|
expect(msg).toBe("Claude exited with code 1: Error: bad request [pod: reason=Error]");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not append anything when podState is null (back-compat)", () => {
|
||||||
|
const msg = buildPartialRunError(1, "claude-sonnet-4-6", initLine, null);
|
||||||
|
expect(msg).not.toContain("[pod:");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("classifyOrphan", () => {
|
describe("classifyOrphan", () => {
|
||||||
@@ -362,6 +400,33 @@ describe("describePodTerminatedError", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("describeTruncationCause", () => {
|
||||||
|
it("annotates exit code 137 as SIGKILL/OOM", () => {
|
||||||
|
const msg = describeTruncationCause({ exitCode: 137, reason: "OOMKilled", message: "Memory cgroup out of memory", signal: null });
|
||||||
|
expect(msg).toContain("exit code 137");
|
||||||
|
expect(msg).toContain("SIGKILL");
|
||||||
|
expect(msg).toContain("OOMKilled");
|
||||||
|
expect(msg).toContain("Memory cgroup out of memory");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("annotates exit code 143 as SIGTERM", () => {
|
||||||
|
const msg = describeTruncationCause({ exitCode: 143, reason: null, message: null, signal: null });
|
||||||
|
expect(msg).toContain("exit code 143");
|
||||||
|
expect(msg).toContain("SIGTERM");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("falls back to 'pod state unavailable' when state is null", () => {
|
||||||
|
const msg = describeTruncationCause(null);
|
||||||
|
expect(msg).toContain("pod state unavailable");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("emits 'no exit code' when exitCode is null but state exists", () => {
|
||||||
|
const msg = describeTruncationCause({ exitCode: null, reason: "Error", message: null, signal: null });
|
||||||
|
expect(msg).toContain("no exit code");
|
||||||
|
expect(msg).toContain("reason=Error");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe("execute: all-invalid agent.id (N4)", () => {
|
describe("execute: all-invalid agent.id (N4)", () => {
|
||||||
it("returns hard error without creating a Job when agent.id sanitizes to null", async () => {
|
it("returns hard error without creating a Job when agent.id sanitizes to null", async () => {
|
||||||
const logs: string[] = [];
|
const logs: string[] = [];
|
||||||
@@ -954,7 +1019,8 @@ describe("execute: happy path", () => {
|
|||||||
const result = await executePromise;
|
const result = await executePromise;
|
||||||
|
|
||||||
expect(result.errorCode).toBe("k8s_job_deleted_externally");
|
expect(result.errorCode).toBe("k8s_job_deleted_externally");
|
||||||
expect(result.errorMessage).toBe("K8s Job was deleted externally before Claude could complete");
|
expect(result.errorMessage).toMatch(/^K8s Job was deleted externally before Claude could complete \[/);
|
||||||
|
expect(result.errorMessage).toContain("detected_via=");
|
||||||
expect(result.exitCode).toBeNull();
|
expect(result.exitCode).toBeNull();
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1019,7 +1085,7 @@ describe("execute: happy path", () => {
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
mockCoreListPods.mockResolvedValue({
|
mockCoreListPods.mockResolvedValue({
|
||||||
items: [{ metadata: { name: "pod-abc" }, status: { containerStatuses: [{ name: "claude", state: { terminated: { exitCode: 137 } } }] } }],
|
items: [{ metadata: { name: "pod-abc" }, status: { containerStatuses: [{ name: "claude", state: { terminated: { exitCode: 137, reason: "OOMKilled", message: "Memory cgroup out of memory" } } }] } }],
|
||||||
});
|
});
|
||||||
|
|
||||||
const executePromise = execute(makeCtx());
|
const executePromise = execute(makeCtx());
|
||||||
@@ -1030,6 +1096,9 @@ describe("execute: happy path", () => {
|
|||||||
expect(result.errorMessage).toContain("truncated mid-stream");
|
expect(result.errorMessage).toContain("truncated mid-stream");
|
||||||
expect(result.errorMessage).toContain("claude-opus-4-7");
|
expect(result.errorMessage).toContain("claude-opus-4-7");
|
||||||
expect(result.errorMessage).toContain("exit code 137");
|
expect(result.errorMessage).toContain("exit code 137");
|
||||||
|
expect(result.errorMessage).toContain("SIGKILL");
|
||||||
|
expect(result.errorMessage).toContain("OOMKilled");
|
||||||
|
expect(result.errorMessage).toContain("Memory cgroup out of memory");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("reconnects log stream and logs status when job completion takes > 3s", async () => {
|
it("reconnects log stream and logs status when job completion takes > 3s", async () => {
|
||||||
@@ -1443,6 +1512,54 @@ describe("execute: log-stream-exit grace period (FAR-23)", () => {
|
|||||||
// (grace did not fire, real completion arrived)
|
// (grace did not fire, real completion arrived)
|
||||||
expect(result.errorMessage).toBeNull();
|
expect(result.errorMessage).toBeNull();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("does NOT fire grace when stream drops mid-output and reconnects with more output (FAR-107)", async () => {
|
||||||
|
// Reproduces Nancy / Privileged Escalation symptom: the K8s log API drops
|
||||||
|
// the streaming connection mid-run; streamPodLogs reconnects and the
|
||||||
|
// container is still producing. Before the fix, the grace timer was
|
||||||
|
// armed on first stream exit and fired 30s later regardless of whether
|
||||||
|
// output had resumed, surfacing claude_truncated even though the pod was
|
||||||
|
// still phase=Running.
|
||||||
|
let attemptIndex = 0;
|
||||||
|
mockLogFn.mockImplementation(
|
||||||
|
async (_ns: string, _pod: string, _ctr: string, writable: import("node:stream").Writable) => {
|
||||||
|
if (attemptIndex === 0) {
|
||||||
|
// Stream a partial init line then "drop" the connection without a
|
||||||
|
// result event — this is the transient API disconnect.
|
||||||
|
writable.write(JSON.stringify({ type: "system", subtype: "init", model: "claude-sonnet-4-6", session_id: "sess_test123" }) + "\n");
|
||||||
|
attemptIndex++;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Reconnect produces the rest of the stream including the result event.
|
||||||
|
writable.write(CLAUDE_HAPPY_OUTPUT);
|
||||||
|
},
|
||||||
|
);
|
||||||
|
// Job condition arrives only after the reconnect produces output, well
|
||||||
|
// beyond the 30s grace window; the old code would have grace-fired at
|
||||||
|
// ~30s and treated the run as truncated.
|
||||||
|
let readJobCalls = 0;
|
||||||
|
mockBatchReadJob.mockImplementation(async () => {
|
||||||
|
readJobCalls++;
|
||||||
|
// Stay non-terminal until the reconnect has had time to run and the
|
||||||
|
// grace window has fully elapsed since the FIRST disconnect.
|
||||||
|
if (readJobCalls < 25) return { status: { conditions: [] } };
|
||||||
|
return { status: { conditions: [{ type: "Complete", status: "True" }] } };
|
||||||
|
});
|
||||||
|
|
||||||
|
const executePromise = execute(makeCtx());
|
||||||
|
// t=3000: first reconnect sleep fires → second streamPodLogsOnce attempt
|
||||||
|
await vi.advanceTimersByTimeAsync(3_100);
|
||||||
|
// Drive past the old (buggy) 30s grace boundary without firing real completion
|
||||||
|
await vi.advanceTimersByTimeAsync(35_000);
|
||||||
|
// Then let the Job's Complete condition land
|
||||||
|
await vi.advanceTimersByTimeAsync(20_000);
|
||||||
|
const result = await executePromise;
|
||||||
|
|
||||||
|
// Run completed normally — grace must not have falsely truncated it.
|
||||||
|
expect(result.exitCode).toBe(0);
|
||||||
|
expect(result.errorCode).toBeUndefined();
|
||||||
|
expect(result.sessionId).toBe("sess_test123");
|
||||||
|
}, 80_000);
|
||||||
});
|
});
|
||||||
|
|
||||||
// ─── execute: concurrency guard — multiple orphan sorting ────────────────────
|
// ─── execute: concurrency guard — multiple orphan sorting ────────────────────
|
||||||
@@ -1492,16 +1609,24 @@ describe("shouldAbortForCancellation", () => {
|
|||||||
expect(shouldAbortForCancellation("cancelled")).toBe(true);
|
expect(shouldAbortForCancellation("cancelled")).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns true when status is 'failed'", () => {
|
it("returns true when status is 'cancelling'", () => {
|
||||||
expect(shouldAbortForCancellation("failed")).toBe(true);
|
expect(shouldAbortForCancellation("cancelling")).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns true when status is 'completed'", () => {
|
// FAR-107: terminal-but-not-cancelled statuses MUST NOT trigger Job deletion.
|
||||||
expect(shouldAbortForCancellation("completed")).toBe(true);
|
// The previous "anything but running" guard caused k8s_job_deleted_externally
|
||||||
|
// false positives for in-flight runs whenever the API briefly reported a
|
||||||
|
// transient/stale status.
|
||||||
|
it("returns false for non-cancellation terminal statuses (FAR-107)", () => {
|
||||||
|
expect(shouldAbortForCancellation("succeeded")).toBe(false);
|
||||||
|
expect(shouldAbortForCancellation("failed")).toBe(false);
|
||||||
|
expect(shouldAbortForCancellation("completed")).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns true for any non-running non-empty string", () => {
|
it("returns false for unknown statuses (FAR-107)", () => {
|
||||||
expect(shouldAbortForCancellation("unknown")).toBe(true);
|
expect(shouldAbortForCancellation("unknown")).toBe(false);
|
||||||
|
expect(shouldAbortForCancellation("queued")).toBe(false);
|
||||||
|
expect(shouldAbortForCancellation("pending")).toBe(false);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1702,7 +1827,7 @@ describe("execute: SIGTERM handler best-effort cleanup", () => {
|
|||||||
vi.useRealTimers();
|
vi.useRealTimers();
|
||||||
});
|
});
|
||||||
|
|
||||||
it("deletes the active Job when SIGTERM fires during execution", async () => {
|
it("does NOT delete active Jobs on SIGTERM — leaves them for orphan reattach (FAR-107)", async () => {
|
||||||
// Mock process.kill to prevent the test process from actually being killed.
|
// Mock process.kill to prevent the test process from actually being killed.
|
||||||
const killSpy = vi.spyOn(process, "kill").mockImplementation(() => true);
|
const killSpy = vi.spyOn(process, "kill").mockImplementation(() => true);
|
||||||
|
|
||||||
@@ -1713,17 +1838,19 @@ describe("execute: SIGTERM handler best-effort cleanup", () => {
|
|||||||
// Flush microtasks through the async setup chain: getSelfPodInfo, listJobs,
|
// Flush microtasks through the async setup chain: getSelfPodInfo, listJobs,
|
||||||
// readSkillEntries, prepareBundle, createJob, onLog, activeJobs.add(), and
|
// readSkillEntries, prepareBundle, createJob, onLog, activeJobs.add(), and
|
||||||
// ensureSigtermHandler() all complete before the try block enters streaming.
|
// ensureSigtermHandler() all complete before the try block enters streaming.
|
||||||
// 30 rounds is more than enough for the ~7 sequential await points.
|
|
||||||
for (let i = 0; i < 30; i++) await Promise.resolve();
|
for (let i = 0; i < 30; i++) await Promise.resolve();
|
||||||
|
|
||||||
// Emit SIGTERM — the process.once handler fires synchronously and kicks off
|
// Reset deleteJob spy after setup so we can detect any SIGTERM-driven calls.
|
||||||
// async cleanup (deleteNamespacedJob). The mock resolves immediately.
|
mockBatchDeleteJob.mockClear();
|
||||||
|
|
||||||
|
// Emit SIGTERM — the handler must re-raise to the default handler without
|
||||||
|
// touching the K8s Job. Deleting the Job here would surface as
|
||||||
|
// k8s_job_deleted_externally in the in-flight run (FAR-107).
|
||||||
process.emit("SIGTERM");
|
process.emit("SIGTERM");
|
||||||
|
|
||||||
// Flush microtasks for deleteJob to resolve and the .then(process.kill) to run.
|
|
||||||
for (let i = 0; i < 10; i++) await Promise.resolve();
|
for (let i = 0; i < 10; i++) await Promise.resolve();
|
||||||
|
|
||||||
expect(mockBatchDeleteJob).toHaveBeenCalled();
|
expect(mockBatchDeleteJob).not.toHaveBeenCalled();
|
||||||
expect(killSpy).toHaveBeenCalledWith(process.pid, "SIGTERM");
|
expect(killSpy).toHaveBeenCalledWith(process.pid, "SIGTERM");
|
||||||
|
|
||||||
killSpy.mockRestore();
|
killSpy.mockRestore();
|
||||||
|
|||||||
+391
-82
@@ -58,30 +58,20 @@ function ensureSigtermHandler(): void {
|
|||||||
if (sigtermHandlerRegistered) return;
|
if (sigtermHandlerRegistered) return;
|
||||||
sigtermHandlerRegistered = true;
|
sigtermHandlerRegistered = true;
|
||||||
process.once("SIGTERM", () => {
|
process.once("SIGTERM", () => {
|
||||||
const jobs = [...activeJobs];
|
// Do NOT delete active K8s Jobs on SIGTERM (FAR-107). Paperclip itself
|
||||||
void Promise.allSettled(
|
// receives SIGTERM during rolling deploys, evictions, scale-down, etc.
|
||||||
jobs.map(async (ref) => {
|
// Deleting the Jobs we own there causes the in-flight heartbeat to surface
|
||||||
try {
|
// a false-positive `k8s_job_deleted_externally` error and tears down work
|
||||||
const batchApi = getBatchApi(ref.kubeconfigPath);
|
// the user expected to keep running.
|
||||||
await batchApi.deleteNamespacedJob({
|
//
|
||||||
name: ref.jobName,
|
// The correct behaviour with `reattachOrphanedJobs=true` (default) is to
|
||||||
namespace: ref.namespace,
|
// leave the Jobs alive: the next paperclip process discovers them via the
|
||||||
body: { propagationPolicy: "Background" },
|
// orphan-classification path and reattaches their log streams. When
|
||||||
});
|
// `reattachOrphanedJobs=false` the operator explicitly opted into manual
|
||||||
} catch { /* best-effort */ }
|
// cleanup and should not have us auto-deleting either. The owning Job's
|
||||||
if (ref.promptSecretName && ref.promptSecretNamespace) {
|
// ownerReference (FAR-15) keeps the prompt Secret tied to the Job, so
|
||||||
try {
|
// both survive together and TTL cleans them up after natural completion.
|
||||||
const coreApi = getCoreApi(ref.kubeconfigPath);
|
process.kill(process.pid, "SIGTERM");
|
||||||
await coreApi.deleteNamespacedSecret({
|
|
||||||
name: ref.promptSecretName,
|
|
||||||
namespace: ref.promptSecretNamespace,
|
|
||||||
});
|
|
||||||
} catch { /* best-effort */ }
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
).then(() => {
|
|
||||||
process.kill(process.pid, "SIGTERM");
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -100,34 +90,32 @@ export function isK8s404(err: unknown): boolean {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true when the heartbeat-run status indicates the run is no longer
|
* Returns true when the heartbeat-run status indicates the run was explicitly
|
||||||
* active and the K8s Job should be cancelled.
|
* cancelled and the K8s Job must be torn down.
|
||||||
|
*
|
||||||
|
* Only `cancelled` / `cancelling` qualify. Treating any non-`running` status
|
||||||
|
* as cancellation (the previous behaviour) produced spurious
|
||||||
|
* k8s_job_deleted_externally errors for in-flight runs whenever the API
|
||||||
|
* briefly reported a transient or stale status — Nancy's runs at
|
||||||
|
* Privileged Escalation hit this without anyone actually cancelling them
|
||||||
|
* (FAR-107). Other terminal statuses (`succeeded`/`failed`/`completed`)
|
||||||
|
* are unreachable in practice while the adapter is still executing
|
||||||
|
* (the adapter's own return is what flips them) and even if observed,
|
||||||
|
* they do not warrant our deleting a Job that may still be doing work.
|
||||||
* Exported for unit tests.
|
* Exported for unit tests.
|
||||||
*/
|
*/
|
||||||
export function shouldAbortForCancellation(runStatus: string | undefined): boolean {
|
export function shouldAbortForCancellation(runStatus: string | undefined): boolean {
|
||||||
if (!runStatus) return false;
|
if (!runStatus) return false;
|
||||||
return runStatus !== "running";
|
return runStatus === "cancelled" || runStatus === "cancelling";
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build the error message when Claude's stdout contains no result event.
|
* Returns the first non-JSON/plain-text line in stdout, treating JSON objects
|
||||||
* Skips system/init event lines so the UI doesn't display the raw init JSON.
|
* with a "type" field as protocol artefacts and skipping them.
|
||||||
* Exported for unit tests.
|
* Used by buildPartialRunError to detect init-only runs.
|
||||||
*/
|
*/
|
||||||
export function buildPartialRunError(
|
function firstContentLine(stdout: string): string {
|
||||||
exitCode: number | null,
|
return stdout.split(/\r?\n/)
|
||||||
model: string,
|
|
||||||
stdout: string,
|
|
||||||
): string {
|
|
||||||
if (exitCode === 0) return "Failed to parse Claude JSON output";
|
|
||||||
|
|
||||||
// Walk stdout lines and skip every structured streaming event (any JSON
|
|
||||||
// object that carries a non-empty "type" field: system, assistant, user,
|
|
||||||
// rate_limit_event, result, …). All of these are protocol artefacts and
|
|
||||||
// produce confusing raw-JSON blobs when surfaced verbatim as an error
|
|
||||||
// message. Only plain-text lines (non-JSON, or JSON without a type field)
|
|
||||||
// are treated as human-readable content worth including in the error.
|
|
||||||
const firstContentLine = stdout.split(/\r?\n/)
|
|
||||||
.map((l) => l.trim())
|
.map((l) => l.trim())
|
||||||
.find((l) => {
|
.find((l) => {
|
||||||
if (!l) return false;
|
if (!l) return false;
|
||||||
@@ -142,19 +130,82 @@ export function buildPartialRunError(
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}) ?? "";
|
}) ?? "";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true when stdout contains only init/system/assistant events from the
|
||||||
|
* given model with no human-readable content lines. Used to detect init-only
|
||||||
|
* non-zero-exit runs that should be classified as claude_init_failed rather than
|
||||||
|
* the generic "Claude exited with code N" message.
|
||||||
|
*/
|
||||||
|
function isInitOnlyRun(model: string, stdout: string): boolean {
|
||||||
|
if (!stdout.trim() || !model) return false;
|
||||||
|
const content = firstContentLine(stdout);
|
||||||
|
if (content) return false;
|
||||||
|
// Check that at least the init event for this model was seen
|
||||||
|
const hasModelInit = stdout.includes(`"model":"${model}"`) || stdout.includes(`"model":"${model.replace(/-/g, "_")}"`);
|
||||||
|
return hasModelInit;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append the pod's terminated-state detail (reason/message/signal) to a
|
||||||
|
* partial-run error message when available. Exit code is already in the
|
||||||
|
* caller-supplied message, so we only append fields that add new signal —
|
||||||
|
* specifically reason (e.g. OOMKilled, Error, ContainerCannotRun), message
|
||||||
|
* (kubelet diagnostic text), and signal. Saves the operator a kubectl trip.
|
||||||
|
*/
|
||||||
|
function appendPodCause(message: string, state: PodTerminatedState | null): string {
|
||||||
|
if (!state) return message;
|
||||||
|
const parts: string[] = [];
|
||||||
|
if (state.reason) parts.push(`reason=${state.reason}`);
|
||||||
|
if (state.message) parts.push(`message=${state.message}`);
|
||||||
|
if (state.signal !== null) parts.push(`signal=${state.signal}`);
|
||||||
|
if (state.exitCode === 137) parts.push("SIGKILL (commonly OOMKilled)");
|
||||||
|
if (parts.length === 0) return message;
|
||||||
|
return `${message} [pod: ${parts.join(", ")}]`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the error message when Claude's stdout contains no result event.
|
||||||
|
* Skips system/init event lines so the UI doesn't display the raw init JSON.
|
||||||
|
* When `podState` is provided, appends the K8s container terminated reason/
|
||||||
|
* message so failures self-explain without requiring `kubectl`.
|
||||||
|
* Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export function buildPartialRunError(
|
||||||
|
exitCode: number | null,
|
||||||
|
model: string,
|
||||||
|
stdout: string,
|
||||||
|
podState: PodTerminatedState | null = null,
|
||||||
|
): string {
|
||||||
|
if (exitCode === 0) return "Failed to parse Claude JSON output";
|
||||||
|
|
||||||
// If the stream contained only structured events with no plain-text output,
|
// If the stream contained only structured events with no plain-text output,
|
||||||
// surface the model name so the operator can diagnose missing credentials
|
// surface the model name so the operator can diagnose missing credentials
|
||||||
// or unsupported/misconfigured model.
|
// or unsupported/misconfigured model.
|
||||||
const initOnlyOutput = stdout.trim() !== "" && model !== "" && !firstContentLine;
|
const contentLine = firstContentLine(stdout);
|
||||||
if (initOnlyOutput) {
|
if (contentLine) {
|
||||||
const modelHint = model ? ` (model: ${model})` : "";
|
return appendPodCause(`Claude exited with code ${exitCode ?? -1}: ${contentLine}`, podState);
|
||||||
return `Claude started but did not produce a result${modelHint} — check API credentials, model support, and adapter config`;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return firstContentLine
|
if (isInitOnlyRun(model, stdout) && (exitCode ?? 0) !== 0) {
|
||||||
? `Claude exited with code ${exitCode ?? -1}: ${firstContentLine}`
|
const modelHint = model ? ` (model: ${model})` : "";
|
||||||
: `Claude exited with code ${exitCode ?? -1}`;
|
return appendPodCause(
|
||||||
|
`Claude exited immediately after init${modelHint} (exit code ${exitCode ?? -1}) — the model may be unsupported or the session may have been rejected before producing output`,
|
||||||
|
podState,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const initOnlyOutput = stdout.trim() !== "" && model !== "";
|
||||||
|
if (initOnlyOutput) {
|
||||||
|
const modelHint = model ? ` (model: ${model})` : "";
|
||||||
|
return appendPodCause(
|
||||||
|
`Claude started but did not produce a result${modelHint} — check API credentials, model support, and adapter config`,
|
||||||
|
podState,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return appendPodCause(`Claude exited with code ${exitCode ?? -1}`, podState);
|
||||||
}
|
}
|
||||||
|
|
||||||
export type OrphanClassification =
|
export type OrphanClassification =
|
||||||
@@ -350,6 +401,7 @@ export async function streamPodLogsOnce(
|
|||||||
sinceSeconds?: number,
|
sinceSeconds?: number,
|
||||||
dedup?: LogLineDedupFilter,
|
dedup?: LogLineDedupFilter,
|
||||||
stopSignal?: { stopped: boolean },
|
stopSignal?: { stopped: boolean },
|
||||||
|
activity?: { lastActiveAt: number },
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const logApi = getLogApi(kubeconfigPath);
|
const logApi = getLogApi(kubeconfigPath);
|
||||||
const chunks: string[] = [];
|
const chunks: string[] = [];
|
||||||
@@ -358,6 +410,13 @@ export async function streamPodLogsOnce(
|
|||||||
write(chunk: Buffer, _encoding, callback) {
|
write(chunk: Buffer, _encoding, callback) {
|
||||||
const text = chunk.toString("utf-8");
|
const text = chunk.toString("utf-8");
|
||||||
chunks.push(text);
|
chunks.push(text);
|
||||||
|
// Refresh stream liveness on every chunk received from the container.
|
||||||
|
// This MUST happen here (not just after streamPodLogsOnce returns) —
|
||||||
|
// a streaming attempt that never disconnects can produce output for
|
||||||
|
// hours, and the grace timer in execute() will fire 30s after the
|
||||||
|
// FIRST disconnect even if a new long-running attempt is currently
|
||||||
|
// streaming, unless we keep this timestamp fresh per-chunk (FAR-107).
|
||||||
|
if (activity) activity.lastActiveAt = Date.now();
|
||||||
const emitted = dedup ? dedup.filter(text) : text;
|
const emitted = dedup ? dedup.filter(text) : text;
|
||||||
if (!emitted) {
|
if (!emitted) {
|
||||||
callback();
|
callback();
|
||||||
@@ -430,10 +489,18 @@ export async function streamPodLogsOnce(
|
|||||||
* Capped at MAX_LOG_RECONNECT_ATTEMPTS to prevent infinite reconnect
|
* Capped at MAX_LOG_RECONNECT_ATTEMPTS to prevent infinite reconnect
|
||||||
* loops during sustained API partitions.
|
* loops during sustained API partitions.
|
||||||
*
|
*
|
||||||
* onFirstStreamExit is called the first time streamPodLogsOnce returns
|
* `activity` tracks stream liveness so execute()'s grace timer can
|
||||||
* (container has exited or stream disconnected). Used by execute() to
|
* distinguish a transient K8s log-API reconnect from a real container
|
||||||
* start the LOG_EXIT_COMPLETION_GRACE_MS grace timer (FAR-23) without
|
* exit (FAR-107). Two signals:
|
||||||
* waiting for all reconnects to exhaust.
|
* - `streamHasExited` becomes true on the first return from
|
||||||
|
* streamPodLogsOnce. Until then we are still in the warm-up window
|
||||||
|
* and waitForJobCompletion is the authoritative signal — grace must
|
||||||
|
* not fire.
|
||||||
|
* - `lastActiveAt` advances every time a streamPodLogsOnce attempt
|
||||||
|
* returns non-empty output (the container is still producing).
|
||||||
|
* The grace timer fires only once GRACE_MS have passed since the
|
||||||
|
* last chunk, so output that resumes after a transient drop keeps
|
||||||
|
* the run alive.
|
||||||
*/
|
*/
|
||||||
async function streamPodLogs(
|
async function streamPodLogs(
|
||||||
namespace: string,
|
namespace: string,
|
||||||
@@ -442,7 +509,7 @@ async function streamPodLogs(
|
|||||||
kubeconfigPath?: string,
|
kubeconfigPath?: string,
|
||||||
stopSignal?: { stopped: boolean },
|
stopSignal?: { stopped: boolean },
|
||||||
dedup?: LogLineDedupFilter,
|
dedup?: LogLineDedupFilter,
|
||||||
onFirstStreamExit?: () => void,
|
activity?: { lastActiveAt: number; streamHasExited: boolean },
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const allChunks: string[] = [];
|
const allChunks: string[] = [];
|
||||||
let attempt = 0;
|
let attempt = 0;
|
||||||
@@ -472,15 +539,16 @@ async function streamPodLogs(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const preStreamTs = Math.floor(Date.now() / 1000);
|
const preStreamTs = Math.floor(Date.now() / 1000);
|
||||||
const result = await streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds, dedup, stopSignal);
|
const result = await streamPodLogsOnce(namespace, podName, onLog, kubeconfigPath, sinceSeconds, dedup, stopSignal, activity);
|
||||||
// Signal first stream exit immediately so the grace-period timer in
|
if (activity) activity.streamHasExited = true;
|
||||||
// execute() can start without waiting for all reconnects to complete.
|
|
||||||
if (attempt === 0) onFirstStreamExit?.();
|
|
||||||
if (result) {
|
if (result) {
|
||||||
allChunks.push(result);
|
allChunks.push(result);
|
||||||
// Update last-received timestamp to now (the stream just ended,
|
// Update last-received timestamp to now (the stream just ended,
|
||||||
// so any log lines in `result` were received up to this moment).
|
// so any log lines in `result` were received up to this moment).
|
||||||
lastLogReceivedAt = Math.floor(Date.now() / 1000);
|
lastLogReceivedAt = Math.floor(Date.now() / 1000);
|
||||||
|
// Refresh stream liveness so the grace timer in execute() does not
|
||||||
|
// fire while output is still flowing through reconnects (FAR-107).
|
||||||
|
if (activity) activity.lastActiveAt = Date.now();
|
||||||
} else if (attempt === 0) {
|
} else if (attempt === 0) {
|
||||||
// First attempt returned nothing — update timestamp so reconnect
|
// First attempt returned nothing — update timestamp so reconnect
|
||||||
// window stays reasonable.
|
// window stays reasonable.
|
||||||
@@ -531,11 +599,14 @@ async function readPodLogs(
|
|||||||
* is treated as a soft terminal: succeeded=false, timedOut=false, jobGone=true.
|
* is treated as a soft terminal: succeeded=false, timedOut=false, jobGone=true.
|
||||||
* The caller should log this and fall through to stdout parsing.
|
* The caller should log this and fall through to stdout parsing.
|
||||||
*/
|
*/
|
||||||
|
type JobConditionSnapshot = { type?: string; status?: string; reason?: string; message?: string };
|
||||||
|
|
||||||
async function waitForJobCompletion(
|
async function waitForJobCompletion(
|
||||||
namespace: string,
|
namespace: string,
|
||||||
jobName: string,
|
jobName: string,
|
||||||
timeoutMs: number,
|
timeoutMs: number,
|
||||||
kubeconfigPath?: string,
|
kubeconfigPath?: string,
|
||||||
|
observer?: { lastConditions: JobConditionSnapshot[] | null; pollCount: number },
|
||||||
): Promise<{ succeeded: boolean; timedOut: boolean; jobGone?: boolean }> {
|
): Promise<{ succeeded: boolean; timedOut: boolean; jobGone?: boolean }> {
|
||||||
const batchApi = getBatchApi(kubeconfigPath);
|
const batchApi = getBatchApi(kubeconfigPath);
|
||||||
const deadline = timeoutMs > 0 ? Date.now() + timeoutMs : 0;
|
const deadline = timeoutMs > 0 ? Date.now() + timeoutMs : 0;
|
||||||
@@ -554,6 +625,12 @@ async function waitForJobCompletion(
|
|||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
const conditions = job.status?.conditions ?? [];
|
const conditions = job.status?.conditions ?? [];
|
||||||
|
if (observer) {
|
||||||
|
observer.pollCount += 1;
|
||||||
|
observer.lastConditions = conditions.map((c) => ({
|
||||||
|
type: c.type, status: c.status, reason: c.reason, message: c.message,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
const complete = conditions.find((c) => c.type === "Complete" && c.status === "True");
|
const complete = conditions.find((c) => c.type === "Complete" && c.status === "True");
|
||||||
if (complete) return { succeeded: true, timedOut: false };
|
if (complete) return { succeeded: true, timedOut: false };
|
||||||
@@ -574,16 +651,130 @@ async function waitForJobCompletion(
|
|||||||
* Get the exit code from the Job's pod.
|
* Get the exit code from the Job's pod.
|
||||||
*/
|
*/
|
||||||
async function getPodExitCode(namespace: string, jobName: string, kubeconfigPath?: string): Promise<number | null> {
|
async function getPodExitCode(namespace: string, jobName: string, kubeconfigPath?: string): Promise<number | null> {
|
||||||
|
const state = await getPodTerminatedState(namespace, jobName, kubeconfigPath);
|
||||||
|
return state?.exitCode ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the claude container's terminated state (exit code, reason, message,
|
||||||
|
* signal) from the Job's pod. Returns null if the pod or container is gone.
|
||||||
|
* Used by the no-result error path to explain *why* a run was truncated.
|
||||||
|
*/
|
||||||
|
export interface PodTerminatedState {
|
||||||
|
exitCode: number | null;
|
||||||
|
reason: string | null;
|
||||||
|
message: string | null;
|
||||||
|
signal: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of a pod-state lookup. `state` is the terminated state when available;
|
||||||
|
* `phase` and `podMissing` give the caller enough context to render an honest
|
||||||
|
* truncation-cause message instead of guessing "likely deleted" (FAR-107).
|
||||||
|
*/
|
||||||
|
export interface PodLookupResult {
|
||||||
|
state: PodTerminatedState | null;
|
||||||
|
phase: string | null;
|
||||||
|
podMissing: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function lookupPodState(
|
||||||
|
namespace: string,
|
||||||
|
jobName: string,
|
||||||
|
kubeconfigPath?: string,
|
||||||
|
): Promise<PodLookupResult> {
|
||||||
const coreApi = getCoreApi(kubeconfigPath);
|
const coreApi = getCoreApi(kubeconfigPath);
|
||||||
const podList = await coreApi.listNamespacedPod({
|
const podList = await coreApi.listNamespacedPod({
|
||||||
namespace,
|
namespace,
|
||||||
labelSelector: `job-name=${jobName}`,
|
labelSelector: `job-name=${jobName}`,
|
||||||
});
|
});
|
||||||
const pod = podList.items[0];
|
const pod = podList.items[0];
|
||||||
if (!pod) return null;
|
if (!pod) return { state: null, phase: null, podMissing: true };
|
||||||
|
|
||||||
|
const phase = pod.status?.phase ?? null;
|
||||||
const containerStatus = pod.status?.containerStatuses?.find((s) => s.name === "claude");
|
const containerStatus = pod.status?.containerStatuses?.find((s) => s.name === "claude");
|
||||||
return containerStatus?.state?.terminated?.exitCode ?? null;
|
const terminated = containerStatus?.state?.terminated;
|
||||||
|
if (!terminated) return { state: null, phase, podMissing: false };
|
||||||
|
return {
|
||||||
|
state: {
|
||||||
|
exitCode: terminated.exitCode ?? null,
|
||||||
|
reason: terminated.reason ?? null,
|
||||||
|
message: (terminated.message ?? "").trim() || null,
|
||||||
|
signal: terminated.signal ?? null,
|
||||||
|
},
|
||||||
|
phase,
|
||||||
|
podMissing: false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read the claude container's terminated state, retrying briefly when the pod
|
||||||
|
* exists in a terminal phase but kubelet has not yet propagated the
|
||||||
|
* containerStatuses[].state.terminated field. Without this retry, fast
|
||||||
|
* truncated-stream exits surface as "pod state unavailable" (FAR-107) and
|
||||||
|
* mask the real exit code / OOMKilled / SIGTERM cause.
|
||||||
|
*/
|
||||||
|
async function getPodLookupWithRetry(
|
||||||
|
namespace: string,
|
||||||
|
jobName: string,
|
||||||
|
kubeconfigPath?: string,
|
||||||
|
attempts = 4,
|
||||||
|
delayMs = 500,
|
||||||
|
): Promise<PodLookupResult> {
|
||||||
|
let last: PodLookupResult = { state: null, phase: null, podMissing: true };
|
||||||
|
for (let i = 0; i < attempts; i++) {
|
||||||
|
last = await lookupPodState(namespace, jobName, kubeconfigPath);
|
||||||
|
if (last.state) return last;
|
||||||
|
if (last.podMissing) return last;
|
||||||
|
// Pod exists but no terminated state. If it is in a terminal phase the
|
||||||
|
// containerStatuses update is in flight — wait briefly and retry. If it
|
||||||
|
// is still Running/Pending, retrying is unlikely to help, so bail.
|
||||||
|
if (last.phase !== "Succeeded" && last.phase !== "Failed") return last;
|
||||||
|
if (i < attempts - 1) await new Promise((r) => setTimeout(r, delayMs));
|
||||||
|
}
|
||||||
|
return last;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getPodTerminatedState(
|
||||||
|
namespace: string,
|
||||||
|
jobName: string,
|
||||||
|
kubeconfigPath?: string,
|
||||||
|
): Promise<PodTerminatedState | null> {
|
||||||
|
return (await lookupPodState(namespace, jobName, kubeconfigPath)).state;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format a human-readable explanation for a truncated run, including the
|
||||||
|
* pod's claude-container terminated state when available. Exit code 137
|
||||||
|
* is annotated as SIGKILL/OOM since that is the most common cause.
|
||||||
|
* Exported for unit tests.
|
||||||
|
*/
|
||||||
|
export function describeTruncationCause(
|
||||||
|
state: PodTerminatedState | null,
|
||||||
|
lookup?: PodLookupResult,
|
||||||
|
): string {
|
||||||
|
if (!state) {
|
||||||
|
if (lookup?.podMissing) {
|
||||||
|
return "pod is gone — Job pod was removed (eviction, preemption, or external delete) before exit could be read";
|
||||||
|
}
|
||||||
|
if (lookup && !lookup.podMissing) {
|
||||||
|
const phaseHint = lookup.phase ? `pod phase=${lookup.phase}` : "pod present";
|
||||||
|
return `container terminated state not yet observable (${phaseHint}) — kubelet status update did not land within retry window; exit cause unknown`;
|
||||||
|
}
|
||||||
|
return "pod state unavailable — exit cause unknown";
|
||||||
|
}
|
||||||
|
const parts: string[] = [];
|
||||||
|
if (state.exitCode !== null) {
|
||||||
|
parts.push(`exit code ${state.exitCode}`);
|
||||||
|
if (state.exitCode === 137) parts.push("SIGKILL (commonly OOMKilled)");
|
||||||
|
else if (state.exitCode === 143) parts.push("SIGTERM");
|
||||||
|
} else {
|
||||||
|
parts.push("no exit code");
|
||||||
|
}
|
||||||
|
if (state.signal !== null) parts.push(`signal ${state.signal}`);
|
||||||
|
if (state.reason) parts.push(`reason=${state.reason}`);
|
||||||
|
if (state.message) parts.push(`message=${state.message}`);
|
||||||
|
return parts.join(", ");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -998,6 +1189,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
|
|
||||||
let stdout = "";
|
let stdout = "";
|
||||||
let exitCode: number | null = null;
|
let exitCode: number | null = null;
|
||||||
|
let podTerminatedState: PodTerminatedState | null = null;
|
||||||
let jobTimedOut = false;
|
let jobTimedOut = false;
|
||||||
let keepaliveTimer: ReturnType<typeof setInterval> | null = null;
|
let keepaliveTimer: ReturnType<typeof setInterval> | null = null;
|
||||||
// Set when we return a mismatch error so the finally block knows not to
|
// Set when we return a mismatch error so the finally block knows not to
|
||||||
@@ -1006,6 +1198,17 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
// Set when the job disappeared (404) or grace-timer fired before we saw a
|
// Set when the job disappeared (404) or grace-timer fired before we saw a
|
||||||
// terminal condition — used to emit a clearer error when stdout parsing fails.
|
// terminal condition — used to emit a clearer error when stdout parsing fails.
|
||||||
let jobDeletedExternally = false;
|
let jobDeletedExternally = false;
|
||||||
|
// Forensics for k8s_job_deleted_externally — captures which of the three
|
||||||
|
// detection paths observed the 404, the last successful Job-condition read
|
||||||
|
// before deletion, and timing. Surfaced in the error message so the next
|
||||||
|
// occurrence is self-diagnosing instead of opaque (FAR-107).
|
||||||
|
let jobGoneDetectionPath: string | null = null;
|
||||||
|
let jobGoneAt: number | null = null;
|
||||||
|
const jobObserver: { lastConditions: JobConditionSnapshot[] | null; pollCount: number } = {
|
||||||
|
lastConditions: null,
|
||||||
|
pollCount: 0,
|
||||||
|
};
|
||||||
|
let podRunningAt: number | null = null;
|
||||||
|
|
||||||
const activeJobRef: ActiveJobRef = {
|
const activeJobRef: ActiveJobRef = {
|
||||||
namespace,
|
namespace,
|
||||||
@@ -1038,6 +1241,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
podName = await waitForPod(namespace, jobName, scheduleTimeoutMs, onLog, kubeconfigPath);
|
podName = await waitForPod(namespace, jobName, scheduleTimeoutMs, onLog, kubeconfigPath);
|
||||||
await onLog("stdout", `[paperclip] Pod running: ${podName}\n`);
|
await onLog("stdout", `[paperclip] Pod running: ${podName}\n`);
|
||||||
}
|
}
|
||||||
|
podRunningAt = Date.now();
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const msg = err instanceof Error ? err.message : String(err);
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
@@ -1153,17 +1357,16 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
return onLog(stream, chunk);
|
return onLog(stream, chunk);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Track when the log stream first exits so the grace-period can fire
|
// Track stream liveness so the grace timer below only fires when output
|
||||||
// if the K8s Job condition lags behind container exit (FAR-23).
|
// has actually stopped — not on a transient K8s log-API reconnect that
|
||||||
// Set via onFirstStreamExit callback (called after attempt=0 returns)
|
// streamPodLogs heals on its own (FAR-107).
|
||||||
// rather than in .then() of streamPodLogs, which would create a
|
const streamActivity: { lastActiveAt: number; streamHasExited: boolean } = {
|
||||||
// deadlock: streamPodLogs only resolves after stopSignal is set, but
|
lastActiveAt: Date.now(),
|
||||||
// stopSignal is set by the grace timer which needs logExitTime to be
|
streamHasExited: false,
|
||||||
// non-null.
|
};
|
||||||
let logExitTime: number | null = null;
|
|
||||||
const trackedLogStream = streamPodLogs(
|
const trackedLogStream = streamPodLogs(
|
||||||
namespace, podName, wrappedOnLog, kubeconfigPath, logStopSignal, logDedup,
|
namespace, podName, wrappedOnLog, kubeconfigPath, logStopSignal, logDedup,
|
||||||
() => { logExitTime = Date.now(); },
|
streamActivity,
|
||||||
);
|
);
|
||||||
|
|
||||||
// completionWithGrace races waitForJobCompletion against a grace timer
|
// completionWithGrace races waitForJobCompletion against a grace timer
|
||||||
@@ -1173,7 +1376,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
// while streamPodLogs reconnects, holding execute() open for minutes.
|
// while streamPodLogs reconnects, holding execute() open for minutes.
|
||||||
// logStopSignal.stopped is set on every settled path (fulfilled, rejected,
|
// logStopSignal.stopped is set on every settled path (fulfilled, rejected,
|
||||||
// or grace) so streamPodLogs stops reconnecting promptly.
|
// or grace) so streamPodLogs stops reconnecting promptly.
|
||||||
type CompletionResult = { succeeded: boolean; timedOut: boolean; jobGone?: boolean };
|
type CompletionResult = { succeeded: boolean; timedOut: boolean; jobGone?: boolean; gracePeriodFired?: boolean };
|
||||||
let gracePoller: ReturnType<typeof setInterval> | null = null;
|
let gracePoller: ReturnType<typeof setInterval> | null = null;
|
||||||
const completionWithGrace = new Promise<CompletionResult>((resolve, reject) => {
|
const completionWithGrace = new Promise<CompletionResult>((resolve, reject) => {
|
||||||
let settled = false;
|
let settled = false;
|
||||||
@@ -1191,11 +1394,68 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
logStopSignal.stopped = true;
|
logStopSignal.stopped = true;
|
||||||
reject(err);
|
reject(err);
|
||||||
};
|
};
|
||||||
waitForJobCompletion(namespace, jobName, completionTimeoutMs, kubeconfigPath).then(settleOk).catch(settleErr);
|
waitForJobCompletion(namespace, jobName, completionTimeoutMs, kubeconfigPath, jobObserver).then(settleOk).catch(settleErr);
|
||||||
|
let graceCheckInFlight = false;
|
||||||
gracePoller = setInterval(() => {
|
gracePoller = setInterval(() => {
|
||||||
if (logExitTime !== null && Date.now() - logExitTime >= LOG_EXIT_COMPLETION_GRACE_MS) {
|
// Only consider grace once the stream has exited at least once.
|
||||||
void onLog("stdout", `[paperclip] Log stream exited ${LOG_EXIT_COMPLETION_GRACE_MS / 1000}s ago without K8s Job condition update — proceeding with captured output (FAR-23)\n`).catch(() => {});
|
// Until then we are still in the warm-up window and
|
||||||
settleOk({ succeeded: false, timedOut: false, jobGone: true });
|
// waitForJobCompletion is the authoritative signal. Once the
|
||||||
|
// stream has exited, fire only after GRACE_MS of inactivity
|
||||||
|
// measured against the last received chunk — output that resumes
|
||||||
|
// through a reconnect resets the clock so transient drops do not
|
||||||
|
// truncate live runs (FAR-107).
|
||||||
|
if (graceCheckInFlight) return;
|
||||||
|
if (
|
||||||
|
streamActivity.streamHasExited &&
|
||||||
|
Date.now() - streamActivity.lastActiveAt >= LOG_EXIT_COMPLETION_GRACE_MS
|
||||||
|
) {
|
||||||
|
graceCheckInFlight = true;
|
||||||
|
void (async () => {
|
||||||
|
try {
|
||||||
|
// Pod-phase gate (FAR-107): if the pod is still Running/Pending
|
||||||
|
// the container is alive — Claude can be silent for >30s during
|
||||||
|
// long tool calls (web fetches, slow upstream APIs). Refresh
|
||||||
|
// the stream-activity timer, leave the poller armed, and let
|
||||||
|
// waitForJobCompletion remain the authoritative signal. Only
|
||||||
|
// proceed with the grace settlement when the pod has actually
|
||||||
|
// reached a terminal phase or is gone.
|
||||||
|
const podLookup = await lookupPodState(namespace, jobName, kubeconfigPath);
|
||||||
|
if (!podLookup.podMissing && (podLookup.phase === "Running" || podLookup.phase === "Pending")) {
|
||||||
|
streamActivity.lastActiveAt = Date.now();
|
||||||
|
graceCheckInFlight = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
await onLog("stderr", `[paperclip] grace gate: pod state lookup failed (${err instanceof Error ? err.message : String(err)}) — falling through to Job-presence check\n`).catch(() => {});
|
||||||
|
}
|
||||||
|
// Pod is no longer Running — proceed with Job-presence verification.
|
||||||
|
// Stop the grace poller immediately so we don't double-fire while the
|
||||||
|
// verification read below is in flight.
|
||||||
|
if (gracePoller) { clearInterval(gracePoller); gracePoller = null; }
|
||||||
|
// The log stream exiting only means the container stopped producing
|
||||||
|
// output — it does NOT prove the Job was deleted. Verify Job
|
||||||
|
// presence with a one-shot read so we can distinguish:
|
||||||
|
// (a) Job 404 → truly gone (TTL or external deletion)
|
||||||
|
// (b) Job still present → K8s condition propagation lag (FAR-23)
|
||||||
|
// Without this check we mis-classify (b) as "deleted externally" and
|
||||||
|
// emit a false-positive k8s_job_deleted_externally error (FAR-107).
|
||||||
|
try {
|
||||||
|
await getBatchApi(kubeconfigPath).readNamespacedJob({ name: jobName, namespace });
|
||||||
|
await onLog("stdout", `[paperclip] Log stream exited ${LOG_EXIT_COMPLETION_GRACE_MS / 1000}s ago without K8s Job condition update; Job ${jobName} still present — proceeding with captured output (FAR-23)\n`).catch(() => {});
|
||||||
|
settleOk({ succeeded: false, timedOut: false, gracePeriodFired: true });
|
||||||
|
} catch (err: unknown) {
|
||||||
|
if (isK8s404(err)) {
|
||||||
|
jobGoneDetectionPath = "grace-period-verify-404";
|
||||||
|
jobGoneAt = Date.now();
|
||||||
|
await onLog("stdout", `[paperclip] Log stream exited ${LOG_EXIT_COMPLETION_GRACE_MS / 1000}s ago and Job ${jobName} is gone (TTL or external deletion) — proceeding with captured output (FAR-23)\n`).catch(() => {});
|
||||||
|
settleOk({ succeeded: false, timedOut: false, jobGone: true });
|
||||||
|
} else {
|
||||||
|
// K8s API hiccup — bail out without claiming external deletion.
|
||||||
|
await onLog("stdout", `[paperclip] Log stream exited ${LOG_EXIT_COMPLETION_GRACE_MS / 1000}s ago; Job state unverifiable (${err instanceof Error ? err.message : String(err)}) — proceeding with captured output (FAR-23)\n`).catch(() => {});
|
||||||
|
settleOk({ succeeded: false, timedOut: false, gracePeriodFired: true });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})();
|
||||||
}
|
}
|
||||||
}, 1_000);
|
}, 1_000);
|
||||||
});
|
});
|
||||||
@@ -1263,6 +1523,10 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
// completion), so log streaming has captured the full output — continue
|
// completion), so log streaming has captured the full output — continue
|
||||||
// to stdout parsing rather than returning an error.
|
// to stdout parsing rather than returning an error.
|
||||||
jobDeletedExternally = true;
|
jobDeletedExternally = true;
|
||||||
|
if (!jobGoneDetectionPath) {
|
||||||
|
jobGoneDetectionPath = "completion-poll-404";
|
||||||
|
jobGoneAt = Date.now();
|
||||||
|
}
|
||||||
await onLog("stdout", `[paperclip] Job ${jobName} was deleted before terminal condition was observed (TTL or external deletion) — proceeding with captured output.\n`);
|
await onLog("stdout", `[paperclip] Job ${jobName} was deleted before terminal condition was observed (TTL or external deletion) — proceeding with captured output.\n`);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -1271,7 +1535,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
// (60s) so we don't hang the heartbeat indefinitely if the K8s API is degraded.
|
// (60s) so we don't hang the heartbeat indefinitely if the K8s API is degraded.
|
||||||
jobTimedOut = false;
|
jobTimedOut = false;
|
||||||
const RECHECK_TIMEOUT_MS = 60_000;
|
const RECHECK_TIMEOUT_MS = 60_000;
|
||||||
const actualState = await waitForJobCompletion(namespace, jobName, RECHECK_TIMEOUT_MS, kubeconfigPath);
|
const actualState = await waitForJobCompletion(namespace, jobName, RECHECK_TIMEOUT_MS, kubeconfigPath, jobObserver);
|
||||||
if (actualState.timedOut) {
|
if (actualState.timedOut) {
|
||||||
// Re-check itself timed out — the job may still be running.
|
// Re-check itself timed out — the job may still be running.
|
||||||
// Return an error so the UI knows the run is not done.
|
// Return an error so the UI knows the run is not done.
|
||||||
@@ -1280,6 +1544,10 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
// Job was deleted before we could confirm terminal state — same as the
|
// Job was deleted before we could confirm terminal state — same as the
|
||||||
// fulfilled+jobGone case above: proceed with captured output.
|
// fulfilled+jobGone case above: proceed with captured output.
|
||||||
jobDeletedExternally = true;
|
jobDeletedExternally = true;
|
||||||
|
if (!jobGoneDetectionPath) {
|
||||||
|
jobGoneDetectionPath = "recheck-poll-404";
|
||||||
|
jobGoneAt = Date.now();
|
||||||
|
}
|
||||||
await onLog("stdout", `[paperclip] Job ${jobName} was deleted before terminal condition was observed (TTL or external deletion) — proceeding with captured output.\n`);
|
await onLog("stdout", `[paperclip] Job ${jobName} was deleted before terminal condition was observed (TTL or external deletion) — proceeding with captured output.\n`);
|
||||||
} else if (!actualState.succeeded) {
|
} else if (!actualState.succeeded) {
|
||||||
// Job still not terminal — the completion error was likely transient.
|
// Job still not terminal — the completion error was likely transient.
|
||||||
@@ -1297,7 +1565,8 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
exitCode = await getPodExitCode(namespace, jobName, kubeconfigPath);
|
podTerminatedState = await getPodTerminatedState(namespace, jobName, kubeconfigPath);
|
||||||
|
exitCode = podTerminatedState?.exitCode ?? null;
|
||||||
} finally {
|
} finally {
|
||||||
if (keepaliveTimer) clearInterval(keepaliveTimer);
|
if (keepaliveTimer) clearInterval(keepaliveTimer);
|
||||||
activeJobs.delete(activeJobRef);
|
activeJobs.delete(activeJobRef);
|
||||||
@@ -1348,11 +1617,35 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
|
|
||||||
if (!parsed) {
|
if (!parsed) {
|
||||||
if (jobDeletedExternally && exitCode === null) {
|
if (jobDeletedExternally && exitCode === null) {
|
||||||
|
// Forensic context (FAR-107): users sometimes see this error when nothing
|
||||||
|
// actually deleted the Job manually. Surface enough state in the message
|
||||||
|
// to distinguish self-delete (SIGTERM/cancel), TTL-after-completion, and
|
||||||
|
// genuine external deletion without needing cluster shell access.
|
||||||
|
const detailParts: string[] = [];
|
||||||
|
if (jobGoneDetectionPath) detailParts.push(`detected_via=${jobGoneDetectionPath}`);
|
||||||
|
detailParts.push(`job=${jobName}`);
|
||||||
|
detailParts.push(`ns=${namespace}`);
|
||||||
|
if (podRunningAt !== null && jobGoneAt !== null) {
|
||||||
|
detailParts.push(`elapsed_since_pod_running=${Math.round((jobGoneAt - podRunningAt) / 1000)}s`);
|
||||||
|
}
|
||||||
|
detailParts.push(`completion_polls=${jobObserver.pollCount}`);
|
||||||
|
const lastConds = jobObserver.lastConditions;
|
||||||
|
if (lastConds && lastConds.length > 0) {
|
||||||
|
const summary = lastConds
|
||||||
|
.map((c) => `${c.type}=${c.status}${c.reason ? `(${c.reason})` : ""}`)
|
||||||
|
.join(",");
|
||||||
|
detailParts.push(`last_job_conditions=[${summary}]`);
|
||||||
|
} else {
|
||||||
|
detailParts.push("last_job_conditions=none_observed");
|
||||||
|
}
|
||||||
|
detailParts.push(`stdout_bytes=${stdout.length}`);
|
||||||
|
const stdoutLines = stdout.split("\n").filter((l) => l.trim()).length;
|
||||||
|
detailParts.push(`stdout_nonempty_lines=${stdoutLines}`);
|
||||||
return {
|
return {
|
||||||
exitCode,
|
exitCode,
|
||||||
signal: null,
|
signal: null,
|
||||||
timedOut: false,
|
timedOut: false,
|
||||||
errorMessage: "K8s Job was deleted externally before Claude could complete",
|
errorMessage: `K8s Job was deleted externally before Claude could complete [${detailParts.join(", ")}]`,
|
||||||
errorCode: "k8s_job_deleted_externally",
|
errorCode: "k8s_job_deleted_externally",
|
||||||
resultJson: { stdout },
|
resultJson: { stdout },
|
||||||
};
|
};
|
||||||
@@ -1368,13 +1661,29 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
if (parsedStream.truncatedMidStream) {
|
if (parsedStream.truncatedMidStream) {
|
||||||
const exitHint = exitCode === null ? "no exit code" : `exit code ${exitCode}`;
|
// Re-query pod state with retry — the initial single-shot read can lose
|
||||||
|
// to kubelet propagation lag and surface a useless "pod state unavailable"
|
||||||
|
// message that hides the real exit cause (OOMKilled, SIGTERM, etc). The
|
||||||
|
// retry distinguishes pod-genuinely-gone from terminated-state-lag and
|
||||||
|
// gives the operator the actual exit code/reason where possible (FAR-107).
|
||||||
|
let lookup: PodLookupResult | undefined;
|
||||||
|
let refreshedState = podTerminatedState;
|
||||||
|
try {
|
||||||
|
lookup = await getPodLookupWithRetry(namespace, jobName, kubeconfigPath);
|
||||||
|
refreshedState = lookup.state;
|
||||||
|
if (refreshedState && refreshedState.exitCode !== null) {
|
||||||
|
exitCode = refreshedState.exitCode;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
await onLog("stderr", `[paperclip] truncation diagnostic: pod re-query failed (${err instanceof Error ? err.message : String(err)})\n`).catch(() => {});
|
||||||
|
}
|
||||||
|
const cause = describeTruncationCause(refreshedState, lookup);
|
||||||
const modelHint = parsedStream.model ? ` (model: ${parsedStream.model})` : "";
|
const modelHint = parsedStream.model ? ` (model: ${parsedStream.model})` : "";
|
||||||
return {
|
return {
|
||||||
exitCode,
|
exitCode,
|
||||||
signal: null,
|
signal: null,
|
||||||
timedOut: false,
|
timedOut: false,
|
||||||
errorMessage: `Claude run was truncated mid-stream${modelHint} — assistant produced content but no result event arrived (${exitHint}); pod may have been terminated, OOMKilled, or the CLI crashed`,
|
errorMessage: `Claude run was truncated mid-stream${modelHint} — assistant produced content but no result event arrived; ${cause}`,
|
||||||
errorCode: "claude_truncated",
|
errorCode: "claude_truncated",
|
||||||
resultJson: { stdout },
|
resultJson: { stdout },
|
||||||
};
|
};
|
||||||
@@ -1383,7 +1692,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
|
|||||||
exitCode,
|
exitCode,
|
||||||
signal: null,
|
signal: null,
|
||||||
timedOut: false,
|
timedOut: false,
|
||||||
errorMessage: buildPartialRunError(exitCode, parsedStream.model, stdout),
|
errorMessage: buildPartialRunError(exitCode, parsedStream.model, stdout, podTerminatedState),
|
||||||
resultJson: { stdout },
|
resultJson: { stdout },
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||||
import { listK8sModels } from "./models.js";
|
import { listK8sModels, DIRECT_MODELS, BEDROCK_MODELS } from "./models.js";
|
||||||
|
|
||||||
describe("listK8sModels", () => {
|
describe("listK8sModels", () => {
|
||||||
const savedEnv: Record<string, string | undefined> = {};
|
const savedEnv: Record<string, string | undefined> = {};
|
||||||
@@ -50,3 +50,22 @@ describe("listK8sModels", () => {
|
|||||||
expect(models.some((m) => m.id === "claude-opus-4-7")).toBe(true);
|
expect(models.some((m) => m.id === "claude-opus-4-7")).toBe(true);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("static model lists", () => {
|
||||||
|
it("DIRECT_MODELS is non-empty and has valid ids", () => {
|
||||||
|
expect(DIRECT_MODELS.length).toBeGreaterThan(0);
|
||||||
|
for (const m of DIRECT_MODELS) {
|
||||||
|
expect(typeof m.id).toBe("string");
|
||||||
|
expect(m.id.length).toBeGreaterThan(0);
|
||||||
|
expect(typeof m.label).toBe("string");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it("BEDROCK_MODELS is non-empty and all ids contain 'anthropic.'", () => {
|
||||||
|
expect(BEDROCK_MODELS.length).toBeGreaterThan(0);
|
||||||
|
for (const m of BEDROCK_MODELS) {
|
||||||
|
expect(m.id).toContain("anthropic.");
|
||||||
|
expect(typeof m.label).toBe("string");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import type { AdapterModel } from "@paperclipai/adapter-utils";
|
import type { AdapterModel } from "@paperclipai/adapter-utils";
|
||||||
|
|
||||||
const DIRECT_MODELS: AdapterModel[] = [
|
export const DIRECT_MODELS: AdapterModel[] = [
|
||||||
{ id: "claude-opus-4-7", label: "Claude Opus 4.7" },
|
{ id: "claude-opus-4-7", label: "Claude Opus 4.7" },
|
||||||
{ id: "claude-opus-4-6", label: "Claude Opus 4.6" },
|
{ id: "claude-opus-4-6", label: "Claude Opus 4.6" },
|
||||||
{ id: "claude-sonnet-4-6", label: "Claude Sonnet 4.6" },
|
{ id: "claude-sonnet-4-6", label: "Claude Sonnet 4.6" },
|
||||||
@@ -9,7 +9,7 @@ const DIRECT_MODELS: AdapterModel[] = [
|
|||||||
{ id: "claude-haiku-4-5-20251001", label: "Claude Haiku 4.5" },
|
{ id: "claude-haiku-4-5-20251001", label: "Claude Haiku 4.5" },
|
||||||
];
|
];
|
||||||
|
|
||||||
const BEDROCK_MODELS: AdapterModel[] = [
|
export const BEDROCK_MODELS: AdapterModel[] = [
|
||||||
{ id: "us.anthropic.claude-opus-4-7", label: "Bedrock Opus 4.7" },
|
{ id: "us.anthropic.claude-opus-4-7", label: "Bedrock Opus 4.7" },
|
||||||
{ id: "us.anthropic.claude-opus-4-6-v1", label: "Bedrock Opus 4.6" },
|
{ id: "us.anthropic.claude-opus-4-6-v1", label: "Bedrock Opus 4.6" },
|
||||||
{ id: "us.anthropic.claude-sonnet-4-6", label: "Bedrock Sonnet 4.6" },
|
{ id: "us.anthropic.claude-sonnet-4-6", label: "Bedrock Sonnet 4.6" },
|
||||||
@@ -17,7 +17,7 @@ const BEDROCK_MODELS: AdapterModel[] = [
|
|||||||
{ id: "us.anthropic.claude-haiku-4-5-20251001-v1:0", label: "Bedrock Haiku 4.5" },
|
{ id: "us.anthropic.claude-haiku-4-5-20251001-v1:0", label: "Bedrock Haiku 4.5" },
|
||||||
];
|
];
|
||||||
|
|
||||||
function isBedrockEnv(): boolean {
|
export function isBedrockEnv(): boolean {
|
||||||
return (
|
return (
|
||||||
process.env.CLAUDE_CODE_USE_BEDROCK === "1" ||
|
process.env.CLAUDE_CODE_USE_BEDROCK === "1" ||
|
||||||
process.env.CLAUDE_CODE_USE_BEDROCK === "true" ||
|
process.env.CLAUDE_CODE_USE_BEDROCK === "true" ||
|
||||||
|
|||||||
Reference in New Issue
Block a user