fix(plugin): bound kubernetes sandbox execution

This commit is contained in:
Dotta
2026-05-12 12:34:08 -05:00
committed by Chris Farhood
parent e691d30d12
commit 39d81c732c
5 changed files with 72 additions and 25 deletions
@@ -274,33 +274,45 @@ const plugin = definePlugin({
// NOTE: For sandbox-cr, if the Secret outlives the Sandbox due to a cluster // NOTE: For sandbox-cr, if the Secret outlives the Sandbox due to a cluster
// quirk, the release() call will still clean it up via namespace GC or // quirk, the release() call will still clean it up via namespace GC or
// explicit delete in a future milestone. // explicit delete in a future milestone.
await createPerRunSecret(clients, { try {
namespace, await createPerRunSecret(clients, {
secretName, namespace,
runId: params.runId, secretName,
ownerKind: isSandboxCrBackend ? "Sandbox" : "Job", runId: params.runId,
ownerApiVersion: isSandboxCrBackend ? "agents.x-k8s.io/v1alpha1" : "batch/v1", ownerKind: isSandboxCrBackend ? "Sandbox" : "Job",
ownerName: jobName, ownerApiVersion: isSandboxCrBackend ? "agents.x-k8s.io/v1alpha1" : "batch/v1",
ownerUid, ownerName: jobName,
bootstrapToken, ownerUid,
adapterEnv, bootstrapToken,
}); adapterEnv,
});
const podName = await orchestrator.findPod(clients, namespace, jobName); const podName = await orchestrator.findPod(clients, namespace, jobName);
const leaseMetadata: KubernetesLeaseMetadata = { const leaseMetadata: KubernetesLeaseMetadata = {
namespace, namespace,
jobName, jobName,
podName, podName,
secretName, secretName,
phase: "Pending", phase: "Pending",
backend: config.backend, backend: config.backend,
}; };
return { return {
providerLeaseId: jobName, providerLeaseId: jobName,
metadata: leaseMetadata as unknown as Record<string, unknown>, metadata: leaseMetadata as unknown as Record<string, unknown>,
}; };
} catch (err) {
try {
await orchestrator.release(clients, namespace, jobName);
} catch (cleanupErr) {
throw new Error(
`Kubernetes lease setup failed and cleanup also failed: ${cleanupErr instanceof Error ? cleanupErr.message : String(cleanupErr)}`,
{ cause: err },
);
}
throw err;
}
}, },
async onEnvironmentRealizeWorkspace( async onEnvironmentRealizeWorkspace(
@@ -397,6 +409,7 @@ const plugin = definePlugin({
// 1. Ensure the Sandbox pod is Ready (wait if needed). // 1. Ensure the Sandbox pod is Ready (wait if needed).
// 2. Exec the command into the running pod. // 2. Exec the command into the running pod.
// 3. Return exec result directly (no log scraping needed). // 3. Return exec result directly (no log scraping needed).
const executeStartedAt = Date.now();
let podName = let podName =
typeof lease.metadata?.podName === "string" && lease.metadata.podName typeof lease.metadata?.podName === "string" && lease.metadata.podName
@@ -464,6 +477,7 @@ const plugin = definePlugin({
? ["/bin/sh", "-lc", rawCommand] ? ["/bin/sh", "-lc", rawCommand]
: ["/bin/sh", "-l"]; : ["/bin/sh", "-l"];
const remainingTimeoutMs = Math.max(1, effectiveTimeoutMs - (Date.now() - executeStartedAt));
const execResult = await execInPod( const execResult = await execInPod(
kc, kc,
namespace, namespace,
@@ -471,6 +485,7 @@ const plugin = definePlugin({
"agent", "agent",
execCommand, execCommand,
typeof params.stdin === "string" ? params.stdin : undefined, typeof params.stdin === "string" ? params.stdin : undefined,
remainingTimeoutMs,
); );
return { return {
@@ -21,6 +21,7 @@ export async function execInPod(
containerName: string, containerName: string,
command: string[], command: string[],
stdin?: string, stdin?: string,
timeoutMs?: number,
): Promise<{ exitCode: number; stdout: string; stderr: string }> { ): Promise<{ exitCode: number; stdout: string; stderr: string }> {
const exec = new Exec(kc); const exec = new Exec(kc);
const stdoutStream = new PassThrough(); const stdoutStream = new PassThrough();
@@ -45,9 +46,16 @@ export async function execInPod(
return await new Promise<{ exitCode: number; stdout: string; stderr: string }>( return await new Promise<{ exitCode: number; stdout: string; stderr: string }>(
(resolve, reject) => { (resolve, reject) => {
let settled = false; let settled = false;
const timeout =
typeof timeoutMs === "number" && timeoutMs > 0
? setTimeout(() => {
finishWithTransportFailure(`Kubernetes exec timed out after ${timeoutMs}ms`);
}, timeoutMs)
: null;
const finish = (result: { exitCode: number; stdout: string; stderr: string }) => { const finish = (result: { exitCode: number; stdout: string; stderr: string }) => {
if (settled) return; if (settled) return;
settled = true; settled = true;
if (timeout) clearTimeout(timeout);
resolve(result); resolve(result);
}; };
const finishWithTransportFailure = (message: string) => { const finishWithTransportFailure = (message: string) => {
@@ -264,7 +264,10 @@ export async function waitForSandboxReady(
`Sandbox ${namespace}/${name} failed: ${mapped.reason ?? "unknown reason"}${mapped.message ?? ""}`, `Sandbox ${namespace}/${name} failed: ${mapped.reason ?? "unknown reason"}${mapped.message ?? ""}`,
); );
} }
// Pending or Terminating — keep polling if (phase === "Terminating") {
throw new Error(`Sandbox ${namespace}/${name} is terminating before it became ready`);
}
// Pending or unknown — keep polling
await sleep(pollMs); await sleep(pollMs);
} }
@@ -38,4 +38,13 @@ describe("execInPod", () => {
stderr: expect.stringContaining("websocket closed before status frame"), stderr: expect.stringContaining("websocket closed before status frame"),
}); });
}); });
it("returns an execution failure if the exec command exceeds its deadline", async () => {
execMock.mockResolvedValue(new EventEmitter());
const result = await execInPod({} as never, "ns", "pod-1", "agent", ["sleep", "60"], undefined, 5);
expect(result.exitCode).toBe(1);
expect(result.stderr).toContain("Kubernetes exec timed out after 5ms");
});
}); });
@@ -213,4 +213,16 @@ describe("waitForSandboxReady", () => {
}), }),
).rejects.toThrow(/failed.*OOMKilled/i); ).rejects.toThrow(/failed.*OOMKilled/i);
}); });
it("fails fast when Sandbox starts terminating before it is ready", async () => {
const get = vi.fn().mockResolvedValue(makeCr("Terminating"));
const clients = { custom: { getNamespacedCustomObject: get } };
await expect(
waitForSandboxReady(clients as never, "ns", "pc-abc", {
timeoutMs: 5000,
pollMs: 10,
}),
).rejects.toThrow(/terminating before it became ready/i);
expect(get).toHaveBeenCalledTimes(1);
});
}); });