From a7b45938b7bbc3d3ef458fac8831c74182e652df Mon Sep 17 00:00:00 2001 From: Devin Foley Date: Sun, 3 May 2026 12:19:35 -0700 Subject: [PATCH] Let sandbox providers declare shell defaults (#5114) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Agents execute in sandboxed remote environments served by pluggable sandbox > providers (E2B today, more later) > - Today every sandbox command runs under `sh -lc` regardless of what the > provider's container actually ships > - That misses bash-only shell init on E2B (which ships bash) and prevents > future providers from declaring a different default — there's no way for a > provider to say "I have bash, use it" > - This PR adds a `shellCommand` field to sandbox execution targets so providers > can declare their preferred shell ("bash" for E2B), threads it through the > sandbox-managed-runtime client, callback bridge, and execution-target shell > helper, and validates the value at the lease-metadata boundary > - The benefit is that sandbox commands run under the right shell on the right > provider, and adding new sandbox providers only needs to declare a shell > preference ## What Changed - Added `packages/adapter-utils/src/sandbox-shell.ts` exporting `preferredShellForSandbox(shellCommand)` (returns `"bash"` if input is `"bash"`, else `"sh"`) - Added `shellCommand?: "bash" | "sh" | null` to `AdapterSandboxExecutionTarget` and `CommandManagedRuntimeSpec`; threaded it through `runAdapterExecutionTargetShellCommand`, `prepareAdapterExecutionTargetRuntime`, and `startAdapterExecutionTargetPaperclipBridge` - `createCommandManagedRuntimeClient`, `prepareCommandManagedRuntime`, and `createCommandManagedSandboxCallbackBridgeQueueClient` now take an optional `shellCommand` and use `preferredShellForSandbox` to pick the shell - `startSandboxCallbackBridgeServer` accepts a `shellCommand` for its server startup, readiness probe, and stop hook - E2B sandbox plugin declares `shellCommand: "bash"` in `leaseMetadata` - `resolveEnvironmentExecutionTarget` reads `shellCommand` from lease metadata (validating against `"bash" | "sh" | null`) - `environment-runtime.ts` adds `"shellCommand"` to `INTERNAL_PLUGIN_SANDBOX_CONFIG_KEYS` so the field round-trips through internal plugin config without leaking to external plugin metadata - Updated tests in `command-managed-runtime.test.ts`, `execution-target-sandbox.test.ts`, `sandbox-callback-bridge.test.ts`, `environment-execution-target.test.ts` ## Verification - `pnpm --filter @paperclipai/adapter-utils test` - `pnpm --filter @paperclipai/server test -- environment-execution-target` - `pnpm --filter @paperclipai/sandbox-providers-e2b test` - Manual QA: boot a Paperclip instance, create an E2B-backed environment, run a claude_local agent against it, and confirm the run completes (verifies bash shell semantics flow through the callback bridge end-to-end) ## Risks - E2B sandbox commands now run under `bash -lc` instead of `sh -lc`. Bash is a strict superset for the commands we issue (no busybox-only flags in our shell scripts), so risk is low. The shellCommand field is opt-in via lease metadata — providers that don't declare it stay on `sh`. - New optional field on `CommandManagedRuntimeSpec` and `AdapterSandboxExecutionTarget`. Consumers ignoring the field retain previous behaviour (sh). - Lease metadata now carries an additional field. Existing leases without `shellCommand` resolve to `null` and fall back to sh — backwards compatible. ## Model Used - OpenAI GPT-5.4 (reasoning effort: high) via Codex CLI - Provider: OpenAI - Used to author the code changes in this PR ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [ ] If this change affects the UI, I have included before/after screenshots — N/A (no UI changes) - [ ] I have updated relevant documentation to reflect my changes — N/A - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --- .../src/command-managed-runtime.test.ts | 10 ++++- .../src/command-managed-runtime.ts | 14 +++++-- .../src/execution-target-sandbox.test.ts | 38 ++++++++++++++++++- .../adapter-utils/src/execution-target.ts | 12 +++++- .../src/sandbox-callback-bridge.test.ts | 10 ++++- .../src/sandbox-callback-bridge.ts | 17 +++++++-- packages/adapter-utils/src/sandbox-shell.ts | 3 ++ .../sandbox-providers/e2b/src/plugin.ts | 1 + .../environment-execution-target.test.ts | 36 ++++++++++++++++++ .../services/environment-execution-target.ts | 5 +++ server/src/services/environment-runtime.ts | 1 + 11 files changed, 133 insertions(+), 14 deletions(-) create mode 100644 packages/adapter-utils/src/sandbox-shell.ts diff --git a/packages/adapter-utils/src/command-managed-runtime.test.ts b/packages/adapter-utils/src/command-managed-runtime.test.ts index 9be9c062..f8c0dc7b 100644 --- a/packages/adapter-utils/src/command-managed-runtime.test.ts +++ b/packages/adapter-utils/src/command-managed-runtime.test.ts @@ -55,9 +55,15 @@ describe("command managed runtime", () => { ...process.env, ...input.env, }; - const command = input.command === "sh" ? "/bin/sh" : input.command; + const command = + input.command === "sh" ? "/bin/sh" : input.command === "bash" ? "/bin/bash" : input.command; const args = [...(input.args ?? [])]; - if (input.stdin != null && input.command === "sh" && args[0] === "-lc" && typeof args[1] === "string") { + if ( + input.stdin != null && + (input.command === "sh" || input.command === "bash") && + args[0] === "-lc" && + typeof args[1] === "string" + ) { env.PAPERCLIP_TEST_STDIN = input.stdin; args[1] = `printf '%s' \"$PAPERCLIP_TEST_STDIN\" | (${args[1]})`; } diff --git a/packages/adapter-utils/src/command-managed-runtime.ts b/packages/adapter-utils/src/command-managed-runtime.ts index 706c3fd7..a126ffa4 100644 --- a/packages/adapter-utils/src/command-managed-runtime.ts +++ b/packages/adapter-utils/src/command-managed-runtime.ts @@ -6,6 +6,7 @@ import { type SandboxManagedRuntimeClient, type SandboxRemoteExecutionSpec, } from "./sandbox-managed-runtime.js"; +import { preferredShellForSandbox } from "./sandbox-shell.js"; import type { RunProcessResult } from "./server-utils.js"; export interface CommandManagedRuntimeRunner { @@ -23,6 +24,7 @@ export interface CommandManagedRuntimeRunner { export interface CommandManagedRuntimeSpec { providerKey?: string | null; + shellCommand?: "bash" | "sh" | null; leaseId?: string | null; remoteCwd: string; timeoutMs?: number | null; @@ -58,10 +60,12 @@ export function createCommandManagedRuntimeClient(input: { runner: CommandManagedRuntimeRunner; remoteCwd: string; timeoutMs: number; + shellCommand?: "bash" | "sh" | null; }): SandboxManagedRuntimeClient { + const shellCommand = preferredShellForSandbox(input.shellCommand); const runShell = async (script: string, opts: { stdin?: string; timeoutMs?: number } = {}) => { const result = await input.runner.execute({ - command: "sh", + command: shellCommand, args: ["-lc", script], cwd: input.remoteCwd, stdin: opts.stdin, @@ -112,7 +116,7 @@ export function createCommandManagedRuntimeClient(input: { }, remove: async (remotePath) => { const result = await input.runner.execute({ - command: "sh", + command: shellCommand, args: ["-lc", `rm -rf ${shellQuote(remotePath)}`], cwd: input.remoteCwd, timeoutMs: input.timeoutMs, @@ -121,7 +125,7 @@ export function createCommandManagedRuntimeClient(input: { }, run: async (command, options) => { const result = await input.runner.execute({ - command: "sh", + command: shellCommand, args: ["-lc", command], cwd: input.remoteCwd, timeoutMs: options.timeoutMs, @@ -157,11 +161,13 @@ export async function prepareCommandManagedRuntime(input: { runner: input.runner, remoteCwd: workspaceRemoteDir, timeoutMs, + shellCommand: input.spec.shellCommand, }); + const shellCommand = preferredShellForSandbox(input.spec.shellCommand); if (input.installCommand?.trim()) { const result = await input.runner.execute({ - command: "sh", + command: shellCommand, args: ["-lc", input.installCommand.trim()], cwd: workspaceRemoteDir, timeoutMs, diff --git a/packages/adapter-utils/src/execution-target-sandbox.test.ts b/packages/adapter-utils/src/execution-target-sandbox.test.ts index cda63354..6fe99a7c 100644 --- a/packages/adapter-utils/src/execution-target-sandbox.test.ts +++ b/packages/adapter-utils/src/execution-target-sandbox.test.ts @@ -39,7 +39,8 @@ describe("sandbox adapter execution targets", () => { onSpawn?: (meta: { pid: number; startedAt: string }) => Promise; }) => { counter += 1; - return runChildProcess(`sandbox-run-${counter}`, input.command, input.args ?? [], { + const command = input.command === "bash" ? "/bin/bash" : input.command; + return runChildProcess(`sandbox-run-${counter}`, command, input.args ?? [], { cwd: input.cwd ?? process.cwd(), env: input.env ?? {}, stdin: input.stdin, @@ -140,6 +141,41 @@ describe("sandbox adapter execution targets", () => { })); }); + it("uses the provider-declared shell for sandbox helper commands", async () => { + const runner = { + execute: vi.fn(async () => ({ + exitCode: 0, + signal: null, + timedOut: false, + stdout: "/home/sandbox", + stderr: "", + pid: null, + startedAt: new Date().toISOString(), + })), + }; + const target: AdapterSandboxExecutionTarget = { + kind: "remote", + transport: "sandbox", + providerKey: "custom-provider", + shellCommand: "bash", + remoteCwd: "/workspace", + runner, + }; + + await runAdapterExecutionTargetShellCommand("run-2b", target, 'printf %s "$HOME"', { + cwd: "/local/workspace", + env: {}, + timeoutSec: 7, + }); + + expect(runner.execute).toHaveBeenCalledWith(expect.objectContaining({ + command: "bash", + args: ["-lc", 'printf %s "$HOME"'], + cwd: "/workspace", + timeoutMs: 7000, + })); + }); + it("starts a localhost Paperclip bridge for sandbox targets in bridge mode", async () => { const rootDir = await mkdtemp(path.join(os.tmpdir(), "paperclip-execution-target-bridge-")); cleanupDirs.push(rootDir); diff --git a/packages/adapter-utils/src/execution-target.ts b/packages/adapter-utils/src/execution-target.ts index 58c128e8..8fe6ee4b 100644 --- a/packages/adapter-utils/src/execution-target.ts +++ b/packages/adapter-utils/src/execution-target.ts @@ -26,6 +26,7 @@ import { type RunProcessResult, type TerminalResultCleanupOptions, } from "./server-utils.js"; +import { preferredShellForSandbox } from "./sandbox-shell.js"; export interface AdapterLocalExecutionTarget { kind: "local"; @@ -47,6 +48,7 @@ export interface AdapterSandboxExecutionTarget { kind: "remote"; transport: "sandbox"; providerKey?: string | null; + shellCommand?: "bash" | "sh" | null; environmentId?: string | null; leaseId?: string | null; remoteCwd: string; @@ -214,6 +216,10 @@ function requireSandboxRunner(target: AdapterSandboxExecutionTarget): CommandMan ); } +function preferredSandboxShell(target: AdapterSandboxExecutionTarget): "bash" | "sh" { + return preferredShellForSandbox(target.shellCommand); +} + export async function ensureAdapterExecutionTargetCommandResolvable( command: string, target: AdapterExecutionTarget | null | undefined, @@ -341,8 +347,9 @@ export async function runAdapterExecutionTargetShellCommand( } } + const shellCommand = preferredSandboxShell(target); return await requireSandboxRunner(target).execute({ - command: "sh", + command: shellCommand, args: ["-lc", command], cwd: target.remoteCwd, env: options.env, @@ -612,6 +619,7 @@ export async function prepareAdapterExecutionTargetRuntime(input: { runner: requireSandboxRunner(target), spec: { providerKey: target.providerKey, + shellCommand: target.shellCommand, leaseId: target.leaseId, remoteCwd: target.remoteCwd, timeoutMs: target.timeoutMs, @@ -745,6 +753,7 @@ export async function startAdapterExecutionTargetPaperclipBridge(input: { runner: requireSandboxRunner(target), remoteCwd: target.remoteCwd, timeoutMs: target.timeoutMs, + shellCommand: preferredSandboxShell(target), }); worker = await startSandboxCallbackBridgeWorker({ client, @@ -781,6 +790,7 @@ export async function startAdapterExecutionTargetPaperclipBridge(input: { bridgeAsset, timeoutMs: target.timeoutMs, maxBodyBytes, + shellCommand: preferredSandboxShell(target), }); } catch (error) { await Promise.allSettled([ diff --git a/packages/adapter-utils/src/sandbox-callback-bridge.test.ts b/packages/adapter-utils/src/sandbox-callback-bridge.test.ts index d036771d..dc04df6c 100644 --- a/packages/adapter-utils/src/sandbox-callback-bridge.test.ts +++ b/packages/adapter-utils/src/sandbox-callback-bridge.test.ts @@ -37,9 +37,15 @@ describe("sandbox callback bridge", () => { ...process.env, ...input.env, }; - const command = input.command === "sh" ? "/bin/sh" : input.command; + const command = + input.command === "sh" ? "/bin/sh" : input.command === "bash" ? "/bin/bash" : input.command; const args = [...(input.args ?? [])]; - if (input.stdin != null && input.command === "sh" && args[0] === "-lc" && typeof args[1] === "string") { + if ( + input.stdin != null && + (input.command === "sh" || input.command === "bash") && + args[0] === "-lc" && + typeof args[1] === "string" + ) { env.PAPERCLIP_TEST_STDIN = input.stdin; args[1] = `printf '%s' \"$PAPERCLIP_TEST_STDIN\" | (${args[1]})`; } diff --git a/packages/adapter-utils/src/sandbox-callback-bridge.ts b/packages/adapter-utils/src/sandbox-callback-bridge.ts index 013a3bbb..666ba9e9 100644 --- a/packages/adapter-utils/src/sandbox-callback-bridge.ts +++ b/packages/adapter-utils/src/sandbox-callback-bridge.ts @@ -4,6 +4,7 @@ import os from "node:os"; import path from "node:path"; import type { CommandManagedRuntimeRunner } from "./command-managed-runtime.js"; +import { preferredShellForSandbox } from "./sandbox-shell.js"; import type { RunProcessResult } from "./server-utils.js"; const DEFAULT_BRIDGE_TOKEN_BYTES = 24; @@ -133,9 +134,10 @@ async function runShell( cwd: string, script: string, timeoutMs: number, + shellCommand: "bash" | "sh" = "sh", ): Promise { return await runner.execute({ - command: "sh", + command: shellCommand, args: ["-lc", script], cwd, timeoutMs, @@ -266,10 +268,12 @@ export function createCommandManagedSandboxCallbackBridgeQueueClient(input: { runner: CommandManagedRuntimeRunner; remoteCwd: string; timeoutMs?: number | null; + shellCommand?: "bash" | "sh" | null; }): SandboxCallbackBridgeQueueClient { const timeoutMs = normalizeTimeoutMs(input.timeoutMs, DEFAULT_BRIDGE_RESPONSE_TIMEOUT_MS); + const shellCommand = preferredShellForSandbox(input.shellCommand); const runChecked = async (action: string, script: string) => - requireSuccessfulResult(action, await runShell(input.runner, input.remoteCwd, script, timeoutMs)); + requireSuccessfulResult(action, await runShell(input.runner, input.remoteCwd, script, timeoutMs, shellCommand)); return { makeDir: async (remotePath) => { @@ -288,6 +292,7 @@ export function createCommandManagedSandboxCallbackBridgeQueueClient(input: { "fi", ].join("\n"), timeoutMs, + shellCommand, ); requireSuccessfulResult(`list ${remotePath}`, result); return result.stdout @@ -525,10 +530,12 @@ export async function startSandboxCallbackBridgeServer(input: { responseTimeoutMs?: number | null; timeoutMs?: number | null; nodeCommand?: string; + shellCommand?: "bash" | "sh" | null; maxQueueDepth?: number | null; maxBodyBytes?: number | null; }): Promise { const timeoutMs = normalizeTimeoutMs(input.timeoutMs, DEFAULT_BRIDGE_RESPONSE_TIMEOUT_MS); + const shellCommand = preferredShellForSandbox(input.shellCommand); const directories = sandboxCallbackBridgeDirectories(input.queueDir); const remoteEntrypoint = path.posix.join(input.assetRemoteDir, SANDBOX_CALLBACK_BRIDGE_ENTRYPOINT); if (input.bridgeAsset) { @@ -536,6 +543,7 @@ export async function startSandboxCallbackBridgeServer(input: { runner: input.runner, remoteCwd: input.remoteCwd, timeoutMs, + shellCommand, }); await assetClient.makeDir(input.assetRemoteDir); const entrypointSource = await fs.readFile(input.bridgeAsset.entrypoint, "utf8"); @@ -553,7 +561,7 @@ export async function startSandboxCallbackBridgeServer(input: { }); const nodeCommand = input.nodeCommand?.trim() || "node"; const startResult = await input.runner.execute({ - command: "sh", + command: shellCommand, args: [ "-lc", [ @@ -594,6 +602,7 @@ export async function startSandboxCallbackBridgeServer(input: { "exit 1", ].join("\n"), timeoutMs, + shellCommand, ); requireSuccessfulResult("wait for sandbox callback bridge readiness", readyResult); @@ -626,7 +635,7 @@ export async function startSandboxCallbackBridgeServer(input: { directories, stop: async () => { const stopResult = await input.runner.execute({ - command: "sh", + command: shellCommand, args: [ "-lc", [ diff --git a/packages/adapter-utils/src/sandbox-shell.ts b/packages/adapter-utils/src/sandbox-shell.ts new file mode 100644 index 00000000..c83c0a1a --- /dev/null +++ b/packages/adapter-utils/src/sandbox-shell.ts @@ -0,0 +1,3 @@ +export function preferredShellForSandbox(shellCommand: string | null | undefined): "bash" | "sh" { + return shellCommand === "bash" ? "bash" : "sh"; +} diff --git a/packages/plugins/sandbox-providers/e2b/src/plugin.ts b/packages/plugins/sandbox-providers/e2b/src/plugin.ts index 3504620f..723cc810 100644 --- a/packages/plugins/sandbox-providers/e2b/src/plugin.ts +++ b/packages/plugins/sandbox-providers/e2b/src/plugin.ts @@ -127,6 +127,7 @@ function leaseMetadata(input: { }) { return { provider: "e2b", + shellCommand: "bash", template: input.config.template, timeoutMs: input.config.timeoutMs, reuseLease: input.config.reuseLease, diff --git a/server/src/__tests__/environment-execution-target.test.ts b/server/src/__tests__/environment-execution-target.test.ts index cd07ee9c..a1a5ab1d 100644 --- a/server/src/__tests__/environment-execution-target.test.ts +++ b/server/src/__tests__/environment-execution-target.test.ts @@ -97,4 +97,40 @@ describe("resolveEnvironmentExecutionTarget", () => { paperclipTransport: "direct", }); }); + + it("passes through a provider-declared sandbox shell command from lease metadata", async () => { + mockResolveEnvironmentDriverConfigForRuntime.mockResolvedValue({ + driver: "sandbox", + config: { + provider: "fake-plugin", + reuseLease: false, + timeoutMs: 30_000, + }, + }); + + const target = await resolveEnvironmentExecutionTarget({ + db: {} as never, + companyId: "company-1", + adapterType: "claude_local", + environment: { + id: "env-1", + driver: "sandbox", + config: { + provider: "fake-plugin", + }, + }, + leaseId: "lease-1", + leaseMetadata: { + shellCommand: "bash", + }, + lease: null, + environmentRuntime: null, + }); + + expect(target).toMatchObject({ + kind: "remote", + transport: "sandbox", + shellCommand: "bash", + }); + }); }); diff --git a/server/src/services/environment-execution-target.ts b/server/src/services/environment-execution-target.ts index 0f2f70db..c2f2e4fb 100644 --- a/server/src/services/environment-execution-target.ts +++ b/server/src/services/environment-execution-target.ts @@ -62,11 +62,16 @@ export async function resolveEnvironmentExecutionTarget(input: { typeof input.leaseMetadata?.paperclipApiUrl === "string" && input.leaseMetadata.paperclipApiUrl.trim().length > 0 ? input.leaseMetadata.paperclipApiUrl.trim() : null; + const shellCommand = + input.leaseMetadata?.shellCommand === "bash" || input.leaseMetadata?.shellCommand === "sh" + ? input.leaseMetadata.shellCommand + : null; return { kind: "remote", transport: "sandbox", providerKey: parsed.config.provider, + shellCommand, remoteCwd, environmentId: input.environment.id ?? null, leaseId: input.leaseId ?? null, diff --git a/server/src/services/environment-runtime.ts b/server/src/services/environment-runtime.ts index ec3c9496..d83244ab 100644 --- a/server/src/services/environment-runtime.ts +++ b/server/src/services/environment-runtime.ts @@ -726,6 +726,7 @@ const INTERNAL_PLUGIN_SANDBOX_CONFIG_KEYS = new Set([ "pluginId", "pluginKey", "providerMetadata", + "shellCommand", "sandboxProviderPlugin", ]);