From e3c875c1c76cdccd64b7ffa1f3cb5fca263608b6 Mon Sep 17 00:00:00 2001 From: Devin Foley Date: Fri, 22 May 2026 13:34:11 -0700 Subject: [PATCH] fix(sandbox): prevent E2B workspace upload + lease idle failures (PAPA-382) (#6560) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Heartbeats run inside managed sandboxes (E2B, Cloudflare Sandbox), and each run begins by uploading the agent's workspace as a tar archive > - PAPA-381's E2B runs were failing at 5 and 11 minutes — two distinct failure modes were entangled: workspace tar extraction errors on Linux, and sandbox idle/lease timeouts during normal heartbeat gaps > - Workspace tar extraction failed because macOS bsdtar embeds `LIBARCHIVE.xattr.*` PAX headers that GNU tar on Linux rejects with "This does not look like a tar archive"; the existing `COPYFILE_DISABLE=1` only suppresses AppleDouble `._*` sidecars, not inline PAX xattr entries > - E2B sandboxes also expired between heartbeats because `timeoutMs` defaulted to a short window and was never refreshed per execute, and Cloudflare sandboxes idled out because `sleepAfter` defaulted to 10 minutes > - This pull request adds `--no-xattrs` to the workspace tar invocation, refreshes the E2B sandbox lifetime on each execute and bumps the default `timeoutMs` to 1h, and raises the Cloudflare `sleepAfter` default to 1h > - The benefit is that long-running heartbeat-driven runs (Claude, Codex, etc.) survive across both their initial workspace upload and the natural idle gaps between executes on both E2B and Cloudflare ## What Changed - `packages/adapter-utils/src/sandbox-managed-runtime.ts`: added `--no-xattrs` to `createTarballFromDirectory` so macOS bsdtar produces a clean POSIX tar that GNU tar on Linux can extract, with an inline comment explaining why `COPYFILE_DISABLE=1` alone is insufficient. - `packages/plugins/sandbox-providers/e2b/src/plugin.ts`: refresh the sandbox lifetime on every execute (so long runs don't expire mid-job) and raised the default `timeoutMs` to 1h. - `packages/plugins/sandbox-providers/e2b/src/manifest.ts` and `plugin.test.ts`: updated manifest defaults and added regression coverage for the new behavior. - `packages/plugins/sandbox-providers/cloudflare/src/config.ts`, `manifest.ts`, `plugin.test.ts`: raised default `sleepAfter` from 10m to 1h, mirroring the E2B 1h default, and added a regression test asserting the acquire-lease request body sends `sleepAfter: "1h"` when not overridden. ## Verification - `pnpm --filter @paperclipai/plugin-e2b test` - `pnpm --filter @paperclipai/plugin-cloudflare-sandbox test` - Locally cherry-picked the `--no-xattrs` fix onto master and confirmed end-to-end via a real PAPA-381-style heartbeat-driven E2B run that the workspace upload now extracts cleanly on Linux. The user (board operator) tested this on master and reported "Ok, that worked." - Manual reviewer steps: trigger an E2B heartbeat from a macOS host (this is where the bsdtar xattr headers come from), confirm the workspace tar extracts on the Linux sandbox side; run a long (>15 min) Cloudflare sandbox flow and confirm no lost-lease/idle errors between executes. ## Risks - Low risk overall. - `--no-xattrs` is widely supported by both macOS bsdtar and GNU tar (Linux). Worst case it silently no-ops on a future host that doesn't support it; in that case the existing failure mode reappears, it doesn't introduce a new one. - Raising default `timeoutMs` (E2B) and `sleepAfter` (Cloudflare) from short values to 1h means sandboxes stay alive longer between executes by default. This is the intended behavior — operators that want a tighter idle window can still override via plugin config. - E2B per-execute sandbox lifetime refresh adds a small API call per execute; it is bounded by the same client that already handles execute traffic, so no new dependencies or retry semantics. ## Model Used - Claude (Anthropic), `claude-opus-4-7`, extended thinking enabled, tool use enabled (file/grep/git tools and Paperclip control-plane API). Used to diagnose the dual failure mode (workspace tar PAX xattr headers + sandbox lifetime), write the fixes and tests, and drive the verification loop with the board operator. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [ ] If this change affects the UI, I have included before/after screenshots (N/A — no UI changes) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip --- .../src/sandbox-managed-runtime.ts | 7 +++ .../cloudflare/src/config.ts | 2 +- .../cloudflare/src/manifest.ts | 5 +- .../cloudflare/src/plugin.test.ts | 25 ++++++++- .../sandbox-providers/e2b/src/manifest.ts | 5 +- .../sandbox-providers/e2b/src/plugin.test.ts | 53 +++++++++++++++++++ .../sandbox-providers/e2b/src/plugin.ts | 16 +++++- 7 files changed, 105 insertions(+), 8 deletions(-) diff --git a/packages/adapter-utils/src/sandbox-managed-runtime.ts b/packages/adapter-utils/src/sandbox-managed-runtime.ts index 62375d7d..13e9e912 100644 --- a/packages/adapter-utils/src/sandbox-managed-runtime.ts +++ b/packages/adapter-utils/src/sandbox-managed-runtime.ts @@ -138,6 +138,13 @@ async function createTarballFromDirectory(input: { const excludeArgs = ["._*", ...(input.exclude ?? [])].flatMap((entry) => ["--exclude", entry]); await execTar([ "-c", + // Prevent macOS bsdtar from embedding LIBARCHIVE.xattr.* PAX extended + // headers for extended attributes (e.g. com.apple.provenance). GNU tar on + // Linux does not recognise these proprietary headers and fails extraction + // with "This does not look like a tar archive". COPYFILE_DISABLE=1 (set in + // execTar) already suppresses AppleDouble ._* sidecar files; --no-xattrs + // additionally suppresses the inline PAX xattr entries. + "--no-xattrs", ...(input.followSymlinks ? ["-h"] : []), "-f", input.archivePath, diff --git a/packages/plugins/sandbox-providers/cloudflare/src/config.ts b/packages/plugins/sandbox-providers/cloudflare/src/config.ts index 1ed62a26..8bd6bab5 100644 --- a/packages/plugins/sandbox-providers/cloudflare/src/config.ts +++ b/packages/plugins/sandbox-providers/cloudflare/src/config.ts @@ -1,7 +1,7 @@ import type { CloudflareDriverConfig } from "./types.js"; const DEFAULT_REQUESTED_CWD = "/workspace/paperclip"; -const DEFAULT_SLEEP_AFTER = "10m"; +const DEFAULT_SLEEP_AFTER = "1h"; const DEFAULT_TIMEOUT_MS = 300_000; const DEFAULT_BRIDGE_REQUEST_TIMEOUT_MS = 300_000; const LOCALHOST_HOSTNAMES = new Set(["localhost", "127.0.0.1", "::1"]); diff --git a/packages/plugins/sandbox-providers/cloudflare/src/manifest.ts b/packages/plugins/sandbox-providers/cloudflare/src/manifest.ts index 21f3ddda..52b18660 100644 --- a/packages/plugins/sandbox-providers/cloudflare/src/manifest.ts +++ b/packages/plugins/sandbox-providers/cloudflare/src/manifest.ts @@ -49,8 +49,9 @@ const manifest: PaperclipPluginManifestV1 = { }, sleepAfter: { type: "string", - default: "10m", - description: "Idle timeout passed to getSandbox(). Ignored when keepAlive is true.", + default: "1h", + description: + "Idle timeout passed to getSandbox() on lease creation. Defaults to 1 hour so a fresh sandbox survives normal Claude/Codex heartbeats. Ignored when keepAlive is true.", }, normalizeId: { type: "boolean", diff --git a/packages/plugins/sandbox-providers/cloudflare/src/plugin.test.ts b/packages/plugins/sandbox-providers/cloudflare/src/plugin.test.ts index 84a6077b..5e3908bd 100644 --- a/packages/plugins/sandbox-providers/cloudflare/src/plugin.test.ts +++ b/packages/plugins/sandbox-providers/cloudflare/src/plugin.test.ts @@ -62,7 +62,7 @@ describe("Cloudflare sandbox provider plugin", () => { bridgeAuthToken: "secret-ref://bridge-token", reuseLease: true, keepAlive: true, - sleepAfter: "10m", + sleepAfter: "1h", normalizeId: false, requestedCwd: "/workspace/custom", sessionStrategy: "default", @@ -145,6 +145,29 @@ describe("Cloudflare sandbox provider plugin", () => { }); }); + it("defaults the sleepAfter passed to the bridge to 1h so long runs don't idle out", async () => { + fetchMock.mockResolvedValueOnce( + jsonResponse({ + providerLeaseId: "pc-run-1-abcd1234", + metadata: { provider: "cloudflare", remoteCwd: "/workspace/paperclip", resumedLease: false }, + }), + ); + + await plugin.definition.onEnvironmentAcquireLease?.({ + driverKey: "cloudflare", + companyId: "company-1", + environmentId: "env-1", + runId: "run-1", + requestedCwd: "/workspace/paperclip", + config: { + bridgeBaseUrl: "https://bridge.example.workers.dev", + bridgeAuthToken: "resolved-token", + }, + }); + + expect(requestBodyAt()).toMatchObject({ sleepAfter: "1h" }); + }); + it("returns expired lease semantics when resume reports lost state", async () => { fetchMock.mockResolvedValueOnce( jsonResponse( diff --git a/packages/plugins/sandbox-providers/e2b/src/manifest.ts b/packages/plugins/sandbox-providers/e2b/src/manifest.ts index 11b23590..a245e80c 100644 --- a/packages/plugins/sandbox-providers/e2b/src/manifest.ts +++ b/packages/plugins/sandbox-providers/e2b/src/manifest.ts @@ -39,8 +39,9 @@ const manifest: PaperclipPluginManifestV1 = { }, timeoutMs: { type: "number", - description: "Sandbox timeout in milliseconds.", - default: 300000, + description: + "Sandbox lifetime in milliseconds, refreshed on each command. Defaults to 1 hour. Raise this if your runs commonly idle longer than the default between commands.", + default: 3600000, }, reuseLease: { type: "boolean", diff --git a/packages/plugins/sandbox-providers/e2b/src/plugin.test.ts b/packages/plugins/sandbox-providers/e2b/src/plugin.test.ts index 99b881c8..3f982284 100644 --- a/packages/plugins/sandbox-providers/e2b/src/plugin.test.ts +++ b/packages/plugins/sandbox-providers/e2b/src/plugin.test.ts @@ -379,6 +379,59 @@ describe("E2B sandbox provider plugin", () => { }); }); + it("refreshes the sandbox lifetime on every execute so long runs don't die mid-command", async () => { + const sandbox = createMockSandbox(); + mockConnect.mockResolvedValue(sandbox); + + await plugin.definition.onEnvironmentExecute?.({ + driverKey: "e2b", + companyId: "company-1", + environmentId: "env-1", + config: { + template: "base", + apiKey: "resolved-key", + timeoutMs: 1_800_000, + reuseLease: false, + }, + lease: { providerLeaseId: "sandbox-123", metadata: {} }, + command: "printf", + args: ["hello"], + cwd: "/workspace", + env: {}, + timeoutMs: 1000, + }); + + expect(sandbox.setTimeout).toHaveBeenCalledWith(1_800_000); + }); + + it("still runs the command when the setTimeout refresh fails transiently", async () => { + const sandbox = createMockSandbox(); + sandbox.setTimeout.mockRejectedValueOnce(new Error("transient e2b api error")); + mockConnect.mockResolvedValue(sandbox); + + const result = await plugin.definition.onEnvironmentExecute?.({ + driverKey: "e2b", + companyId: "company-1", + environmentId: "env-1", + config: { + template: "base", + apiKey: "resolved-key", + timeoutMs: 1_800_000, + reuseLease: false, + }, + lease: { providerLeaseId: "sandbox-123", metadata: {} }, + command: "printf", + args: ["hello"], + cwd: "/workspace", + env: {}, + timeoutMs: 1000, + }); + + expect(sandbox.setTimeout).toHaveBeenCalledWith(1_800_000); + expect(sandbox.commands.run).toHaveBeenCalled(); + expect(result?.exitCode).toBe(0); + }); + it("cleans up staged stdin even when writing it fails", async () => { const sandbox = createMockSandbox(); const failure = new Error("write failed"); diff --git a/packages/plugins/sandbox-providers/e2b/src/plugin.ts b/packages/plugins/sandbox-providers/e2b/src/plugin.ts index daf15486..e20dfd68 100644 --- a/packages/plugins/sandbox-providers/e2b/src/plugin.ts +++ b/packages/plugins/sandbox-providers/e2b/src/plugin.ts @@ -34,11 +34,11 @@ function parseDriverConfig(raw: Record): E2bDriverConfig { const template = typeof raw.template === "string" && raw.template.trim().length > 0 ? raw.template.trim() : "base"; - const timeoutMs = Number(raw.timeoutMs ?? 300_000); + const timeoutMs = Number(raw.timeoutMs ?? 3_600_000); return { template, apiKey: typeof raw.apiKey === "string" && raw.apiKey.trim().length > 0 ? raw.apiKey.trim() : null, - timeoutMs: Number.isFinite(timeoutMs) ? Math.trunc(timeoutMs) : 300_000, + timeoutMs: Number.isFinite(timeoutMs) ? Math.trunc(timeoutMs) : 3_600_000, reuseLease: raw.reuseLease === true, }; } @@ -391,6 +391,18 @@ const plugin = definePlugin({ const config = parseDriverConfig(params.config); const sandbox = await connectSandbox(config, params.lease.providerLeaseId); + // Refresh the sandbox death clock on every command. E2B's `timeoutMs` is + // the absolute sandbox lifetime from create/connect; without this, a run + // longer than `config.timeoutMs` will have its sandbox killed mid-command + // and the next call throws "Sandbox is probably not running anymore". + // The refresh is best-effort: the sandbox is already healthy at this + // point, so a transient API error on setTimeout should not block the + // command from running. Worst case the existing lifetime stands. + try { + await sandbox.setTimeout(config.timeoutMs); + } catch { + // ignore — keep going with the existing sandbox lifetime + } const baseCommand = buildLoginShellScript({ command: params.command, args: params.args ?? [],