fix(plugin): bound kubernetes sandbox execution

This commit is contained in:
Dotta
2026-05-12 12:34:08 -05:00
committed by Chris Farhood
parent e691d30d12
commit 39d81c732c
5 changed files with 72 additions and 25 deletions
@@ -274,33 +274,45 @@ const plugin = definePlugin({
// NOTE: For sandbox-cr, if the Secret outlives the Sandbox due to a cluster
// quirk, the release() call will still clean it up via namespace GC or
// explicit delete in a future milestone.
await createPerRunSecret(clients, {
namespace,
secretName,
runId: params.runId,
ownerKind: isSandboxCrBackend ? "Sandbox" : "Job",
ownerApiVersion: isSandboxCrBackend ? "agents.x-k8s.io/v1alpha1" : "batch/v1",
ownerName: jobName,
ownerUid,
bootstrapToken,
adapterEnv,
});
try {
await createPerRunSecret(clients, {
namespace,
secretName,
runId: params.runId,
ownerKind: isSandboxCrBackend ? "Sandbox" : "Job",
ownerApiVersion: isSandboxCrBackend ? "agents.x-k8s.io/v1alpha1" : "batch/v1",
ownerName: jobName,
ownerUid,
bootstrapToken,
adapterEnv,
});
const podName = await orchestrator.findPod(clients, namespace, jobName);
const podName = await orchestrator.findPod(clients, namespace, jobName);
const leaseMetadata: KubernetesLeaseMetadata = {
namespace,
jobName,
podName,
secretName,
phase: "Pending",
backend: config.backend,
};
const leaseMetadata: KubernetesLeaseMetadata = {
namespace,
jobName,
podName,
secretName,
phase: "Pending",
backend: config.backend,
};
return {
providerLeaseId: jobName,
metadata: leaseMetadata as unknown as Record<string, unknown>,
};
return {
providerLeaseId: jobName,
metadata: leaseMetadata as unknown as Record<string, unknown>,
};
} catch (err) {
try {
await orchestrator.release(clients, namespace, jobName);
} catch (cleanupErr) {
throw new Error(
`Kubernetes lease setup failed and cleanup also failed: ${cleanupErr instanceof Error ? cleanupErr.message : String(cleanupErr)}`,
{ cause: err },
);
}
throw err;
}
},
async onEnvironmentRealizeWorkspace(
@@ -397,6 +409,7 @@ const plugin = definePlugin({
// 1. Ensure the Sandbox pod is Ready (wait if needed).
// 2. Exec the command into the running pod.
// 3. Return exec result directly (no log scraping needed).
const executeStartedAt = Date.now();
let podName =
typeof lease.metadata?.podName === "string" && lease.metadata.podName
@@ -464,6 +477,7 @@ const plugin = definePlugin({
? ["/bin/sh", "-lc", rawCommand]
: ["/bin/sh", "-l"];
const remainingTimeoutMs = Math.max(1, effectiveTimeoutMs - (Date.now() - executeStartedAt));
const execResult = await execInPod(
kc,
namespace,
@@ -471,6 +485,7 @@ const plugin = definePlugin({
"agent",
execCommand,
typeof params.stdin === "string" ? params.stdin : undefined,
remainingTimeoutMs,
);
return {
@@ -21,6 +21,7 @@ export async function execInPod(
containerName: string,
command: string[],
stdin?: string,
timeoutMs?: number,
): Promise<{ exitCode: number; stdout: string; stderr: string }> {
const exec = new Exec(kc);
const stdoutStream = new PassThrough();
@@ -45,9 +46,16 @@ export async function execInPod(
return await new Promise<{ exitCode: number; stdout: string; stderr: string }>(
(resolve, reject) => {
let settled = false;
const timeout =
typeof timeoutMs === "number" && timeoutMs > 0
? setTimeout(() => {
finishWithTransportFailure(`Kubernetes exec timed out after ${timeoutMs}ms`);
}, timeoutMs)
: null;
const finish = (result: { exitCode: number; stdout: string; stderr: string }) => {
if (settled) return;
settled = true;
if (timeout) clearTimeout(timeout);
resolve(result);
};
const finishWithTransportFailure = (message: string) => {
@@ -264,7 +264,10 @@ export async function waitForSandboxReady(
`Sandbox ${namespace}/${name} failed: ${mapped.reason ?? "unknown reason"}${mapped.message ?? ""}`,
);
}
// Pending or Terminating — keep polling
if (phase === "Terminating") {
throw new Error(`Sandbox ${namespace}/${name} is terminating before it became ready`);
}
// Pending or unknown — keep polling
await sleep(pollMs);
}
@@ -38,4 +38,13 @@ describe("execInPod", () => {
stderr: expect.stringContaining("websocket closed before status frame"),
});
});
it("returns an execution failure if the exec command exceeds its deadline", async () => {
execMock.mockResolvedValue(new EventEmitter());
const result = await execInPod({} as never, "ns", "pod-1", "agent", ["sleep", "60"], undefined, 5);
expect(result.exitCode).toBe(1);
expect(result.stderr).toContain("Kubernetes exec timed out after 5ms");
});
});
@@ -213,4 +213,16 @@ describe("waitForSandboxReady", () => {
}),
).rejects.toThrow(/failed.*OOMKilled/i);
});
it("fails fast when Sandbox starts terminating before it is ready", async () => {
const get = vi.fn().mockResolvedValue(makeCr("Terminating"));
const clients = { custom: { getNamespacedCustomObject: get } };
await expect(
waitForSandboxReady(clients as never, "ns", "pc-abc", {
timeoutMs: 5000,
pollMs: 10,
}),
).rejects.toThrow(/terminating before it became ready/i);
expect(get).toHaveBeenCalledTimes(1);
});
});