forked from farhoodlabs/paperclip
fix(plugin): bound kubernetes sandbox execution
This commit is contained in:
@@ -274,33 +274,45 @@ const plugin = definePlugin({
|
||||
// NOTE: For sandbox-cr, if the Secret outlives the Sandbox due to a cluster
|
||||
// quirk, the release() call will still clean it up via namespace GC or
|
||||
// explicit delete in a future milestone.
|
||||
await createPerRunSecret(clients, {
|
||||
namespace,
|
||||
secretName,
|
||||
runId: params.runId,
|
||||
ownerKind: isSandboxCrBackend ? "Sandbox" : "Job",
|
||||
ownerApiVersion: isSandboxCrBackend ? "agents.x-k8s.io/v1alpha1" : "batch/v1",
|
||||
ownerName: jobName,
|
||||
ownerUid,
|
||||
bootstrapToken,
|
||||
adapterEnv,
|
||||
});
|
||||
try {
|
||||
await createPerRunSecret(clients, {
|
||||
namespace,
|
||||
secretName,
|
||||
runId: params.runId,
|
||||
ownerKind: isSandboxCrBackend ? "Sandbox" : "Job",
|
||||
ownerApiVersion: isSandboxCrBackend ? "agents.x-k8s.io/v1alpha1" : "batch/v1",
|
||||
ownerName: jobName,
|
||||
ownerUid,
|
||||
bootstrapToken,
|
||||
adapterEnv,
|
||||
});
|
||||
|
||||
const podName = await orchestrator.findPod(clients, namespace, jobName);
|
||||
const podName = await orchestrator.findPod(clients, namespace, jobName);
|
||||
|
||||
const leaseMetadata: KubernetesLeaseMetadata = {
|
||||
namespace,
|
||||
jobName,
|
||||
podName,
|
||||
secretName,
|
||||
phase: "Pending",
|
||||
backend: config.backend,
|
||||
};
|
||||
const leaseMetadata: KubernetesLeaseMetadata = {
|
||||
namespace,
|
||||
jobName,
|
||||
podName,
|
||||
secretName,
|
||||
phase: "Pending",
|
||||
backend: config.backend,
|
||||
};
|
||||
|
||||
return {
|
||||
providerLeaseId: jobName,
|
||||
metadata: leaseMetadata as unknown as Record<string, unknown>,
|
||||
};
|
||||
return {
|
||||
providerLeaseId: jobName,
|
||||
metadata: leaseMetadata as unknown as Record<string, unknown>,
|
||||
};
|
||||
} catch (err) {
|
||||
try {
|
||||
await orchestrator.release(clients, namespace, jobName);
|
||||
} catch (cleanupErr) {
|
||||
throw new Error(
|
||||
`Kubernetes lease setup failed and cleanup also failed: ${cleanupErr instanceof Error ? cleanupErr.message : String(cleanupErr)}`,
|
||||
{ cause: err },
|
||||
);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
},
|
||||
|
||||
async onEnvironmentRealizeWorkspace(
|
||||
@@ -397,6 +409,7 @@ const plugin = definePlugin({
|
||||
// 1. Ensure the Sandbox pod is Ready (wait if needed).
|
||||
// 2. Exec the command into the running pod.
|
||||
// 3. Return exec result directly (no log scraping needed).
|
||||
const executeStartedAt = Date.now();
|
||||
|
||||
let podName =
|
||||
typeof lease.metadata?.podName === "string" && lease.metadata.podName
|
||||
@@ -464,6 +477,7 @@ const plugin = definePlugin({
|
||||
? ["/bin/sh", "-lc", rawCommand]
|
||||
: ["/bin/sh", "-l"];
|
||||
|
||||
const remainingTimeoutMs = Math.max(1, effectiveTimeoutMs - (Date.now() - executeStartedAt));
|
||||
const execResult = await execInPod(
|
||||
kc,
|
||||
namespace,
|
||||
@@ -471,6 +485,7 @@ const plugin = definePlugin({
|
||||
"agent",
|
||||
execCommand,
|
||||
typeof params.stdin === "string" ? params.stdin : undefined,
|
||||
remainingTimeoutMs,
|
||||
);
|
||||
|
||||
return {
|
||||
|
||||
@@ -21,6 +21,7 @@ export async function execInPod(
|
||||
containerName: string,
|
||||
command: string[],
|
||||
stdin?: string,
|
||||
timeoutMs?: number,
|
||||
): Promise<{ exitCode: number; stdout: string; stderr: string }> {
|
||||
const exec = new Exec(kc);
|
||||
const stdoutStream = new PassThrough();
|
||||
@@ -45,9 +46,16 @@ export async function execInPod(
|
||||
return await new Promise<{ exitCode: number; stdout: string; stderr: string }>(
|
||||
(resolve, reject) => {
|
||||
let settled = false;
|
||||
const timeout =
|
||||
typeof timeoutMs === "number" && timeoutMs > 0
|
||||
? setTimeout(() => {
|
||||
finishWithTransportFailure(`Kubernetes exec timed out after ${timeoutMs}ms`);
|
||||
}, timeoutMs)
|
||||
: null;
|
||||
const finish = (result: { exitCode: number; stdout: string; stderr: string }) => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
if (timeout) clearTimeout(timeout);
|
||||
resolve(result);
|
||||
};
|
||||
const finishWithTransportFailure = (message: string) => {
|
||||
|
||||
@@ -264,7 +264,10 @@ export async function waitForSandboxReady(
|
||||
`Sandbox ${namespace}/${name} failed: ${mapped.reason ?? "unknown reason"} — ${mapped.message ?? ""}`,
|
||||
);
|
||||
}
|
||||
// Pending or Terminating — keep polling
|
||||
if (phase === "Terminating") {
|
||||
throw new Error(`Sandbox ${namespace}/${name} is terminating before it became ready`);
|
||||
}
|
||||
// Pending or unknown — keep polling
|
||||
await sleep(pollMs);
|
||||
}
|
||||
|
||||
|
||||
@@ -38,4 +38,13 @@ describe("execInPod", () => {
|
||||
stderr: expect.stringContaining("websocket closed before status frame"),
|
||||
});
|
||||
});
|
||||
|
||||
it("returns an execution failure if the exec command exceeds its deadline", async () => {
|
||||
execMock.mockResolvedValue(new EventEmitter());
|
||||
|
||||
const result = await execInPod({} as never, "ns", "pod-1", "agent", ["sleep", "60"], undefined, 5);
|
||||
|
||||
expect(result.exitCode).toBe(1);
|
||||
expect(result.stderr).toContain("Kubernetes exec timed out after 5ms");
|
||||
});
|
||||
});
|
||||
|
||||
+12
@@ -213,4 +213,16 @@ describe("waitForSandboxReady", () => {
|
||||
}),
|
||||
).rejects.toThrow(/failed.*OOMKilled/i);
|
||||
});
|
||||
|
||||
it("fails fast when Sandbox starts terminating before it is ready", async () => {
|
||||
const get = vi.fn().mockResolvedValue(makeCr("Terminating"));
|
||||
const clients = { custom: { getNamespacedCustomObject: get } };
|
||||
await expect(
|
||||
waitForSandboxReady(clients as never, "ns", "pc-abc", {
|
||||
timeoutMs: 5000,
|
||||
pollMs: 10,
|
||||
}),
|
||||
).rejects.toThrow(/terminating before it became ready/i);
|
||||
expect(get).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user