forked from farhoodlabs/paperclip
fix(plugin): bound kubernetes sandbox execution
This commit is contained in:
@@ -274,33 +274,45 @@ const plugin = definePlugin({
|
|||||||
// NOTE: For sandbox-cr, if the Secret outlives the Sandbox due to a cluster
|
// NOTE: For sandbox-cr, if the Secret outlives the Sandbox due to a cluster
|
||||||
// quirk, the release() call will still clean it up via namespace GC or
|
// quirk, the release() call will still clean it up via namespace GC or
|
||||||
// explicit delete in a future milestone.
|
// explicit delete in a future milestone.
|
||||||
await createPerRunSecret(clients, {
|
try {
|
||||||
namespace,
|
await createPerRunSecret(clients, {
|
||||||
secretName,
|
namespace,
|
||||||
runId: params.runId,
|
secretName,
|
||||||
ownerKind: isSandboxCrBackend ? "Sandbox" : "Job",
|
runId: params.runId,
|
||||||
ownerApiVersion: isSandboxCrBackend ? "agents.x-k8s.io/v1alpha1" : "batch/v1",
|
ownerKind: isSandboxCrBackend ? "Sandbox" : "Job",
|
||||||
ownerName: jobName,
|
ownerApiVersion: isSandboxCrBackend ? "agents.x-k8s.io/v1alpha1" : "batch/v1",
|
||||||
ownerUid,
|
ownerName: jobName,
|
||||||
bootstrapToken,
|
ownerUid,
|
||||||
adapterEnv,
|
bootstrapToken,
|
||||||
});
|
adapterEnv,
|
||||||
|
});
|
||||||
|
|
||||||
const podName = await orchestrator.findPod(clients, namespace, jobName);
|
const podName = await orchestrator.findPod(clients, namespace, jobName);
|
||||||
|
|
||||||
const leaseMetadata: KubernetesLeaseMetadata = {
|
const leaseMetadata: KubernetesLeaseMetadata = {
|
||||||
namespace,
|
namespace,
|
||||||
jobName,
|
jobName,
|
||||||
podName,
|
podName,
|
||||||
secretName,
|
secretName,
|
||||||
phase: "Pending",
|
phase: "Pending",
|
||||||
backend: config.backend,
|
backend: config.backend,
|
||||||
};
|
};
|
||||||
|
|
||||||
return {
|
return {
|
||||||
providerLeaseId: jobName,
|
providerLeaseId: jobName,
|
||||||
metadata: leaseMetadata as unknown as Record<string, unknown>,
|
metadata: leaseMetadata as unknown as Record<string, unknown>,
|
||||||
};
|
};
|
||||||
|
} catch (err) {
|
||||||
|
try {
|
||||||
|
await orchestrator.release(clients, namespace, jobName);
|
||||||
|
} catch (cleanupErr) {
|
||||||
|
throw new Error(
|
||||||
|
`Kubernetes lease setup failed and cleanup also failed: ${cleanupErr instanceof Error ? cleanupErr.message : String(cleanupErr)}`,
|
||||||
|
{ cause: err },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
async onEnvironmentRealizeWorkspace(
|
async onEnvironmentRealizeWorkspace(
|
||||||
@@ -397,6 +409,7 @@ const plugin = definePlugin({
|
|||||||
// 1. Ensure the Sandbox pod is Ready (wait if needed).
|
// 1. Ensure the Sandbox pod is Ready (wait if needed).
|
||||||
// 2. Exec the command into the running pod.
|
// 2. Exec the command into the running pod.
|
||||||
// 3. Return exec result directly (no log scraping needed).
|
// 3. Return exec result directly (no log scraping needed).
|
||||||
|
const executeStartedAt = Date.now();
|
||||||
|
|
||||||
let podName =
|
let podName =
|
||||||
typeof lease.metadata?.podName === "string" && lease.metadata.podName
|
typeof lease.metadata?.podName === "string" && lease.metadata.podName
|
||||||
@@ -464,6 +477,7 @@ const plugin = definePlugin({
|
|||||||
? ["/bin/sh", "-lc", rawCommand]
|
? ["/bin/sh", "-lc", rawCommand]
|
||||||
: ["/bin/sh", "-l"];
|
: ["/bin/sh", "-l"];
|
||||||
|
|
||||||
|
const remainingTimeoutMs = Math.max(1, effectiveTimeoutMs - (Date.now() - executeStartedAt));
|
||||||
const execResult = await execInPod(
|
const execResult = await execInPod(
|
||||||
kc,
|
kc,
|
||||||
namespace,
|
namespace,
|
||||||
@@ -471,6 +485,7 @@ const plugin = definePlugin({
|
|||||||
"agent",
|
"agent",
|
||||||
execCommand,
|
execCommand,
|
||||||
typeof params.stdin === "string" ? params.stdin : undefined,
|
typeof params.stdin === "string" ? params.stdin : undefined,
|
||||||
|
remainingTimeoutMs,
|
||||||
);
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ export async function execInPod(
|
|||||||
containerName: string,
|
containerName: string,
|
||||||
command: string[],
|
command: string[],
|
||||||
stdin?: string,
|
stdin?: string,
|
||||||
|
timeoutMs?: number,
|
||||||
): Promise<{ exitCode: number; stdout: string; stderr: string }> {
|
): Promise<{ exitCode: number; stdout: string; stderr: string }> {
|
||||||
const exec = new Exec(kc);
|
const exec = new Exec(kc);
|
||||||
const stdoutStream = new PassThrough();
|
const stdoutStream = new PassThrough();
|
||||||
@@ -45,9 +46,16 @@ export async function execInPod(
|
|||||||
return await new Promise<{ exitCode: number; stdout: string; stderr: string }>(
|
return await new Promise<{ exitCode: number; stdout: string; stderr: string }>(
|
||||||
(resolve, reject) => {
|
(resolve, reject) => {
|
||||||
let settled = false;
|
let settled = false;
|
||||||
|
const timeout =
|
||||||
|
typeof timeoutMs === "number" && timeoutMs > 0
|
||||||
|
? setTimeout(() => {
|
||||||
|
finishWithTransportFailure(`Kubernetes exec timed out after ${timeoutMs}ms`);
|
||||||
|
}, timeoutMs)
|
||||||
|
: null;
|
||||||
const finish = (result: { exitCode: number; stdout: string; stderr: string }) => {
|
const finish = (result: { exitCode: number; stdout: string; stderr: string }) => {
|
||||||
if (settled) return;
|
if (settled) return;
|
||||||
settled = true;
|
settled = true;
|
||||||
|
if (timeout) clearTimeout(timeout);
|
||||||
resolve(result);
|
resolve(result);
|
||||||
};
|
};
|
||||||
const finishWithTransportFailure = (message: string) => {
|
const finishWithTransportFailure = (message: string) => {
|
||||||
|
|||||||
@@ -264,7 +264,10 @@ export async function waitForSandboxReady(
|
|||||||
`Sandbox ${namespace}/${name} failed: ${mapped.reason ?? "unknown reason"} — ${mapped.message ?? ""}`,
|
`Sandbox ${namespace}/${name} failed: ${mapped.reason ?? "unknown reason"} — ${mapped.message ?? ""}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
// Pending or Terminating — keep polling
|
if (phase === "Terminating") {
|
||||||
|
throw new Error(`Sandbox ${namespace}/${name} is terminating before it became ready`);
|
||||||
|
}
|
||||||
|
// Pending or unknown — keep polling
|
||||||
await sleep(pollMs);
|
await sleep(pollMs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -38,4 +38,13 @@ describe("execInPod", () => {
|
|||||||
stderr: expect.stringContaining("websocket closed before status frame"),
|
stderr: expect.stringContaining("websocket closed before status frame"),
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("returns an execution failure if the exec command exceeds its deadline", async () => {
|
||||||
|
execMock.mockResolvedValue(new EventEmitter());
|
||||||
|
|
||||||
|
const result = await execInPod({} as never, "ns", "pod-1", "agent", ["sleep", "60"], undefined, 5);
|
||||||
|
|
||||||
|
expect(result.exitCode).toBe(1);
|
||||||
|
expect(result.stderr).toContain("Kubernetes exec timed out after 5ms");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
+12
@@ -213,4 +213,16 @@ describe("waitForSandboxReady", () => {
|
|||||||
}),
|
}),
|
||||||
).rejects.toThrow(/failed.*OOMKilled/i);
|
).rejects.toThrow(/failed.*OOMKilled/i);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("fails fast when Sandbox starts terminating before it is ready", async () => {
|
||||||
|
const get = vi.fn().mockResolvedValue(makeCr("Terminating"));
|
||||||
|
const clients = { custom: { getNamespacedCustomObject: get } };
|
||||||
|
await expect(
|
||||||
|
waitForSandboxReady(clients as never, "ns", "pc-abc", {
|
||||||
|
timeoutMs: 5000,
|
||||||
|
pollMs: 10,
|
||||||
|
}),
|
||||||
|
).rejects.toThrow(/terminating before it became ready/i);
|
||||||
|
expect(get).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user