From 39d81c732ccd72452eccf2a8aacb8bc040358fb6 Mon Sep 17 00:00:00 2001 From: Dotta Date: Tue, 12 May 2026 12:34:08 -0500 Subject: [PATCH] fix(plugin): bound kubernetes sandbox execution --- .../kubernetes/src/plugin.ts | 63 ++++++++++++------- .../kubernetes/src/pod-exec.ts | 8 +++ .../kubernetes/src/sandbox-cr-orchestrator.ts | 5 +- .../kubernetes/test/unit/pod-exec.test.ts | 9 +++ .../test/unit/sandbox-cr-orchestrator.test.ts | 12 ++++ 5 files changed, 72 insertions(+), 25 deletions(-) diff --git a/packages/plugins/sandbox-providers/kubernetes/src/plugin.ts b/packages/plugins/sandbox-providers/kubernetes/src/plugin.ts index 750d2814..9578260e 100644 --- a/packages/plugins/sandbox-providers/kubernetes/src/plugin.ts +++ b/packages/plugins/sandbox-providers/kubernetes/src/plugin.ts @@ -274,33 +274,45 @@ const plugin = definePlugin({ // NOTE: For sandbox-cr, if the Secret outlives the Sandbox due to a cluster // quirk, the release() call will still clean it up via namespace GC or // explicit delete in a future milestone. - await createPerRunSecret(clients, { - namespace, - secretName, - runId: params.runId, - ownerKind: isSandboxCrBackend ? "Sandbox" : "Job", - ownerApiVersion: isSandboxCrBackend ? "agents.x-k8s.io/v1alpha1" : "batch/v1", - ownerName: jobName, - ownerUid, - bootstrapToken, - adapterEnv, - }); + try { + await createPerRunSecret(clients, { + namespace, + secretName, + runId: params.runId, + ownerKind: isSandboxCrBackend ? "Sandbox" : "Job", + ownerApiVersion: isSandboxCrBackend ? "agents.x-k8s.io/v1alpha1" : "batch/v1", + ownerName: jobName, + ownerUid, + bootstrapToken, + adapterEnv, + }); - const podName = await orchestrator.findPod(clients, namespace, jobName); + const podName = await orchestrator.findPod(clients, namespace, jobName); - const leaseMetadata: KubernetesLeaseMetadata = { - namespace, - jobName, - podName, - secretName, - phase: "Pending", - backend: config.backend, - }; + const leaseMetadata: KubernetesLeaseMetadata = { + namespace, + jobName, + podName, + secretName, + phase: "Pending", + backend: config.backend, + }; - return { - providerLeaseId: jobName, - metadata: leaseMetadata as unknown as Record, - }; + return { + providerLeaseId: jobName, + metadata: leaseMetadata as unknown as Record, + }; + } catch (err) { + try { + await orchestrator.release(clients, namespace, jobName); + } catch (cleanupErr) { + throw new Error( + `Kubernetes lease setup failed and cleanup also failed: ${cleanupErr instanceof Error ? cleanupErr.message : String(cleanupErr)}`, + { cause: err }, + ); + } + throw err; + } }, async onEnvironmentRealizeWorkspace( @@ -397,6 +409,7 @@ const plugin = definePlugin({ // 1. Ensure the Sandbox pod is Ready (wait if needed). // 2. Exec the command into the running pod. // 3. Return exec result directly (no log scraping needed). + const executeStartedAt = Date.now(); let podName = typeof lease.metadata?.podName === "string" && lease.metadata.podName @@ -464,6 +477,7 @@ const plugin = definePlugin({ ? ["/bin/sh", "-lc", rawCommand] : ["/bin/sh", "-l"]; + const remainingTimeoutMs = Math.max(1, effectiveTimeoutMs - (Date.now() - executeStartedAt)); const execResult = await execInPod( kc, namespace, @@ -471,6 +485,7 @@ const plugin = definePlugin({ "agent", execCommand, typeof params.stdin === "string" ? params.stdin : undefined, + remainingTimeoutMs, ); return { diff --git a/packages/plugins/sandbox-providers/kubernetes/src/pod-exec.ts b/packages/plugins/sandbox-providers/kubernetes/src/pod-exec.ts index a8c49de2..7392fad6 100644 --- a/packages/plugins/sandbox-providers/kubernetes/src/pod-exec.ts +++ b/packages/plugins/sandbox-providers/kubernetes/src/pod-exec.ts @@ -21,6 +21,7 @@ export async function execInPod( containerName: string, command: string[], stdin?: string, + timeoutMs?: number, ): Promise<{ exitCode: number; stdout: string; stderr: string }> { const exec = new Exec(kc); const stdoutStream = new PassThrough(); @@ -45,9 +46,16 @@ export async function execInPod( return await new Promise<{ exitCode: number; stdout: string; stderr: string }>( (resolve, reject) => { let settled = false; + const timeout = + typeof timeoutMs === "number" && timeoutMs > 0 + ? setTimeout(() => { + finishWithTransportFailure(`Kubernetes exec timed out after ${timeoutMs}ms`); + }, timeoutMs) + : null; const finish = (result: { exitCode: number; stdout: string; stderr: string }) => { if (settled) return; settled = true; + if (timeout) clearTimeout(timeout); resolve(result); }; const finishWithTransportFailure = (message: string) => { diff --git a/packages/plugins/sandbox-providers/kubernetes/src/sandbox-cr-orchestrator.ts b/packages/plugins/sandbox-providers/kubernetes/src/sandbox-cr-orchestrator.ts index dc718d9b..b77a097b 100644 --- a/packages/plugins/sandbox-providers/kubernetes/src/sandbox-cr-orchestrator.ts +++ b/packages/plugins/sandbox-providers/kubernetes/src/sandbox-cr-orchestrator.ts @@ -264,7 +264,10 @@ export async function waitForSandboxReady( `Sandbox ${namespace}/${name} failed: ${mapped.reason ?? "unknown reason"} — ${mapped.message ?? ""}`, ); } - // Pending or Terminating — keep polling + if (phase === "Terminating") { + throw new Error(`Sandbox ${namespace}/${name} is terminating before it became ready`); + } + // Pending or unknown — keep polling await sleep(pollMs); } diff --git a/packages/plugins/sandbox-providers/kubernetes/test/unit/pod-exec.test.ts b/packages/plugins/sandbox-providers/kubernetes/test/unit/pod-exec.test.ts index 0ecd5720..513289d1 100644 --- a/packages/plugins/sandbox-providers/kubernetes/test/unit/pod-exec.test.ts +++ b/packages/plugins/sandbox-providers/kubernetes/test/unit/pod-exec.test.ts @@ -38,4 +38,13 @@ describe("execInPod", () => { stderr: expect.stringContaining("websocket closed before status frame"), }); }); + + it("returns an execution failure if the exec command exceeds its deadline", async () => { + execMock.mockResolvedValue(new EventEmitter()); + + const result = await execInPod({} as never, "ns", "pod-1", "agent", ["sleep", "60"], undefined, 5); + + expect(result.exitCode).toBe(1); + expect(result.stderr).toContain("Kubernetes exec timed out after 5ms"); + }); }); diff --git a/packages/plugins/sandbox-providers/kubernetes/test/unit/sandbox-cr-orchestrator.test.ts b/packages/plugins/sandbox-providers/kubernetes/test/unit/sandbox-cr-orchestrator.test.ts index 8037968b..9aff74e1 100644 --- a/packages/plugins/sandbox-providers/kubernetes/test/unit/sandbox-cr-orchestrator.test.ts +++ b/packages/plugins/sandbox-providers/kubernetes/test/unit/sandbox-cr-orchestrator.test.ts @@ -213,4 +213,16 @@ describe("waitForSandboxReady", () => { }), ).rejects.toThrow(/failed.*OOMKilled/i); }); + + it("fails fast when Sandbox starts terminating before it is ready", async () => { + const get = vi.fn().mockResolvedValue(makeCr("Terminating")); + const clients = { custom: { getNamespacedCustomObject: get } }; + await expect( + waitForSandboxReady(clients as never, "ns", "pc-abc", { + timeoutMs: 5000, + pollMs: 10, + }), + ).rejects.toThrow(/terminating before it became ready/i); + expect(get).toHaveBeenCalledTimes(1); + }); });