fix(remote-sandbox): harden host workspace resumes (#5922)

## Thinking Path

> - Paperclip orchestrates AI agents through a control plane while
adapters execute work in local, remote, or sandboxed runtimes.
> - Remote sandbox execution depends on a strict host-versus-remote
workspace boundary: the host prepares/restores files, while the adapter
command runs inside the sandbox cwd.
> - Jannes' PR #5823 identified host-side failure modes that were not
covered by replacement PR #5822.
> - Persisting a remote pod cwd in session params could poison the next
host heartbeat resume and make Paperclip inspect or upload system temp
roots.
> - Plugin sandbox providers also need a narrow way to receive
model-provider API keys without exposing the full server environment to
every plugin worker.
> - This pull request ports the host-side fixes from #5823 in the
current codebase style, with focused regression coverage.
> - The benefit is safer remote sandbox resumes and plugin worker
environment handling without broadening core plugin privileges.

## What Changed

- Persist host workspace cwd, not remote sandbox cwd, in `claude_local`
session params while retaining remote execution identity metadata.
- Reject saved session cwds that point at system roots before heartbeat
falls back to agent home workspace.
- Skip sockets, FIFOs, devices, and other non-file entries during
workspace restore snapshot capture/comparison.
- Pass a small model-provider API-key allowlist only to plugins
declaring `environment.drivers.register`.
- Added focused regression tests for remote Claude session params,
unsafe session cwd detection, plugin worker env filtering, and non-file
snapshot entries.

Credits: ports host-side fixes from Jannes' #5823.

## Verification

- `pnpm vitest run
packages/adapter-utils/src/workspace-restore-merge.test.ts
server/src/services/session-workspace-cwd.test.ts
server/src/__tests__/claude-local-execute.test.ts
server/src/__tests__/plugin-database.test.ts` (25 passed, 7 skipped by
existing embedded-Postgres host guard)
- `pnpm --filter @paperclipai/adapter-utils typecheck`
- `pnpm --filter @paperclipai/adapter-claude-local typecheck`
- `pnpm --filter @paperclipai/server typecheck`

## Risks

- Low risk: changes are scoped to remote sandbox/session metadata,
workspace snapshot filtering, and plugin worker env setup.
- Sandbox-provider plugins now receive only the explicit model-provider
key allowlist; any provider needing another key name will need a
deliberate allowlist update.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex, GPT-5-based coding agent, tool-enabled local code
execution and repository editing.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Dotta
2026-05-13 16:23:04 -05:00
committed by GitHub
parent 012a738729
commit d1a8c873b2
10 changed files with 206 additions and 14 deletions
@@ -1,4 +1,5 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
import net from "node:net";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it } from "vitest";
@@ -58,4 +59,26 @@ describe("workspace restore merge", () => {
readFile(path.join(targetDir, "manual-qa", "environment-matrix", "ssh", "codex_local.md"), "utf8"),
).resolves.toBe("ssh codex\n");
});
it("ignores non-file entries when capturing snapshots", async () => {
if (process.platform === "win32") return;
const rootDir = await mkdtemp(path.join(os.tmpdir(), "paperclip-restore-merge-"));
cleanupDirs.push(rootDir);
const socketPath = path.join(rootDir, "runtime.sock");
const server = net.createServer();
try {
await new Promise<void>((resolve, reject) => {
server.once("error", reject);
server.listen(socketPath, resolve);
});
const snapshot = await captureDirectorySnapshot(rootDir, { exclude: [] });
expect(snapshot.entries.has("runtime.sock")).toBe(false);
} finally {
await new Promise<void>((resolve) => server.close(() => resolve()));
}
});
});
@@ -47,6 +47,10 @@ async function walkDirectory(
const fullPath = path.join(root, nextRelative);
const stats = await fs.lstat(fullPath);
if (!stats.isDirectory() && !stats.isSymbolicLink() && !stats.isFile()) {
continue;
}
if (stats.isDirectory()) {
out.set(nextRelative, { kind: "dir" });
await walkDirectory(root, exclude, nextRelative, out);
@@ -87,6 +91,8 @@ async function readSnapshotEntry(root: string, relative: string): Promise<Snapsh
target: await fs.readlink(fullPath),
};
}
if (!stats.isFile()) return null;
return {
kind: "file",
mode: stats.mode,
@@ -89,6 +89,15 @@ interface ClaudeRuntimeConfig {
extraArgs: string[];
}
export function claudeSessionCwdMatchesExecutionTarget(input: {
runtimeSessionCwd: string;
effectiveExecutionCwd: string;
executionTargetIsRemote: boolean;
}): boolean {
if (input.executionTargetIsRemote || input.runtimeSessionCwd.length === 0) return true;
return path.resolve(input.runtimeSessionCwd) === path.resolve(input.effectiveExecutionCwd);
}
function buildLoginResult(input: {
proc: RunProcessResult;
loginUrl: string | null;
@@ -591,7 +600,11 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
const canResumeSession =
runtimeSessionId.length > 0 &&
hasMatchingPromptBundle &&
(runtimeSessionCwd.length === 0 || path.resolve(runtimeSessionCwd) === path.resolve(effectiveExecutionCwd)) &&
claudeSessionCwdMatchesExecutionTarget({
runtimeSessionCwd,
effectiveExecutionCwd,
executionTargetIsRemote,
}) &&
adapterExecutionTargetSessionMatches(runtimeRemoteExecution, runtimeExecutionTarget);
const sessionId = canResumeSession ? runtimeSessionId : null;
if (
@@ -853,7 +866,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
const resolvedSessionParams = resolvedSessionId
? ({
sessionId: resolvedSessionId,
cwd: effectiveExecutionCwd,
cwd,
promptBundleKey: promptBundle.bundleKey,
...(executionTargetIsRemote
? {
@@ -1,4 +1,4 @@
export { execute, runClaudeLogin } from "./execute.js";
export { claudeSessionCwdMatchesExecutionTarget, execute, runClaudeLogin } from "./execute.js";
export { listClaudeSkills, syncClaudeSkills } from "./skills.js";
export { listClaudeModels } from "./models.js";
export { testEnvironment } from "./test.js";