Files
paperclip/packages/adapter-utils/src/workspace-restore-merge.ts
T
Dotta d1a8c873b2 fix(remote-sandbox): harden host workspace resumes (#5922)
## Thinking Path

> - Paperclip orchestrates AI agents through a control plane while
adapters execute work in local, remote, or sandboxed runtimes.
> - Remote sandbox execution depends on a strict host-versus-remote
workspace boundary: the host prepares/restores files, while the adapter
command runs inside the sandbox cwd.
> - Jannes' PR #5823 identified host-side failure modes that were not
covered by replacement PR #5822.
> - Persisting a remote pod cwd in session params could poison the next
host heartbeat resume and make Paperclip inspect or upload system temp
roots.
> - Plugin sandbox providers also need a narrow way to receive
model-provider API keys without exposing the full server environment to
every plugin worker.
> - This pull request ports the host-side fixes from #5823 in the
current codebase style, with focused regression coverage.
> - The benefit is safer remote sandbox resumes and plugin worker
environment handling without broadening core plugin privileges.

## What Changed

- Persist host workspace cwd, not remote sandbox cwd, in `claude_local`
session params while retaining remote execution identity metadata.
- Reject saved session cwds that point at system roots before heartbeat
falls back to agent home workspace.
- Skip sockets, FIFOs, devices, and other non-file entries during
workspace restore snapshot capture/comparison.
- Pass a small model-provider API-key allowlist only to plugins
declaring `environment.drivers.register`.
- Added focused regression tests for remote Claude session params,
unsafe session cwd detection, plugin worker env filtering, and non-file
snapshot entries.

Credits: ports host-side fixes from Jannes' #5823.

## Verification

- `pnpm vitest run
packages/adapter-utils/src/workspace-restore-merge.test.ts
server/src/services/session-workspace-cwd.test.ts
server/src/__tests__/claude-local-execute.test.ts
server/src/__tests__/plugin-database.test.ts` (25 passed, 7 skipped by
existing embedded-Postgres host guard)
- `pnpm --filter @paperclipai/adapter-utils typecheck`
- `pnpm --filter @paperclipai/adapter-claude-local typecheck`
- `pnpm --filter @paperclipai/server typecheck`

## Risks

- Low risk: changes are scoped to remote sandbox/session metadata,
workspace snapshot filtering, and plugin worker env setup.
- Sandbox-provider plugins now receive only the explicit model-provider
key allowlist; any provider needing another key name will need a
deliberate allowlist update.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex, GPT-5-based coding agent, tool-enabled local code
execution and repository editing.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-05-13 16:23:04 -05:00

264 lines
8.6 KiB
TypeScript

import { createHash } from "node:crypto";
import { createReadStream } from "node:fs";
import { constants as fsConstants, promises as fs } from "node:fs";
import path from "node:path";
type SnapshotEntry =
| { kind: "dir" }
| { kind: "file"; mode: number; hash: string }
| { kind: "symlink"; target: string };
export interface DirectorySnapshot {
exclude: string[];
entries: Map<string, SnapshotEntry>;
}
function isRelativePathOrDescendant(relative: string, candidate: string): boolean {
return relative === candidate || relative.startsWith(`${candidate}/`);
}
function shouldExclude(relative: string, exclude: readonly string[]): boolean {
return exclude.some((candidate) => isRelativePathOrDescendant(relative, candidate));
}
async function hashFile(filePath: string): Promise<string> {
return await new Promise((resolve, reject) => {
const hash = createHash("sha256");
const stream = createReadStream(filePath);
stream.on("data", (chunk) => hash.update(chunk));
stream.on("error", reject);
stream.on("end", () => resolve(hash.digest("hex")));
});
}
async function walkDirectory(
root: string,
exclude: readonly string[],
relative = "",
out: Map<string, SnapshotEntry> = new Map(),
): Promise<Map<string, SnapshotEntry>> {
const current = relative ? path.join(root, relative) : root;
const entries = await fs.readdir(current, { withFileTypes: true }).catch(() => []);
entries.sort((left, right) => left.name.localeCompare(right.name));
for (const entry of entries) {
const nextRelative = relative ? path.posix.join(relative, entry.name) : entry.name;
if (shouldExclude(nextRelative, exclude)) continue;
const fullPath = path.join(root, nextRelative);
const stats = await fs.lstat(fullPath);
if (!stats.isDirectory() && !stats.isSymbolicLink() && !stats.isFile()) {
continue;
}
if (stats.isDirectory()) {
out.set(nextRelative, { kind: "dir" });
await walkDirectory(root, exclude, nextRelative, out);
continue;
}
if (stats.isSymbolicLink()) {
out.set(nextRelative, {
kind: "symlink",
target: await fs.readlink(fullPath),
});
continue;
}
out.set(nextRelative, {
kind: "file",
mode: stats.mode,
hash: await hashFile(fullPath),
});
}
return out;
}
async function readSnapshotEntry(root: string, relative: string): Promise<SnapshotEntry | null> {
const fullPath = path.join(root, relative);
let stats;
try {
stats = await fs.lstat(fullPath);
} catch {
return null;
}
if (stats.isDirectory()) return { kind: "dir" };
if (stats.isSymbolicLink()) {
return {
kind: "symlink",
target: await fs.readlink(fullPath),
};
}
if (!stats.isFile()) return null;
return {
kind: "file",
mode: stats.mode,
hash: await hashFile(fullPath),
};
}
function entriesMatch(left: SnapshotEntry | null | undefined, right: SnapshotEntry | null | undefined): boolean {
if (!left || !right) return false;
if (left.kind !== right.kind) return false;
if (left.kind === "dir") return true;
if (left.kind === "symlink" && right.kind === "symlink") {
return left.target === right.target;
}
if (left.kind === "file" && right.kind === "file") {
return left.mode === right.mode && left.hash === right.hash;
}
return false;
}
async function isHolderAlive(lockDir: string): Promise<boolean> {
try {
const raw = await fs.readFile(path.join(lockDir, "owner.json"), "utf8");
const owner = JSON.parse(raw) as { pid?: unknown };
const pid = typeof owner.pid === "number" && Number.isFinite(owner.pid) && owner.pid > 0 ? owner.pid : null;
if (pid === null) {
// Owner record is unparseable / missing pid — treat as stale.
return false;
}
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
} catch {
// owner.json missing or unreadable — treat as stale.
return false;
}
}
async function acquireDirectoryMergeLock(lockDir: string): Promise<() => Promise<void>> {
const deadline = Date.now() + 30_000;
while (true) {
try {
await fs.mkdir(lockDir);
await fs.writeFile(
path.join(lockDir, "owner.json"),
`${JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString() })}\n`,
"utf8",
);
return async () => {
await fs.rm(lockDir, { recursive: true, force: true }).catch(() => undefined);
};
} catch (error) {
const code = error && typeof error === "object" ? (error as { code?: unknown }).code : null;
if (code !== "EEXIST") throw error;
// Stale-lock detection: if the owner PID is dead (SIGKILL / OOM / crash),
// the lockDir would otherwise persist forever and stall restores. Mirror
// the materializePaperclipSkillCopy lock pattern — remove and retry.
if (!(await isHolderAlive(lockDir))) {
await fs.rm(lockDir, { recursive: true, force: true }).catch(() => undefined);
continue;
}
if (Date.now() >= deadline) {
throw new Error(`Timed out waiting for workspace restore lock at ${lockDir}`);
}
await new Promise((resolve) => setTimeout(resolve, 50));
}
}
}
export async function withDirectoryMergeLock<T>(
targetDir: string,
fn: () => Promise<T>,
): Promise<T> {
const releaseLock = await acquireDirectoryMergeLock(`${targetDir}.paperclip-restore.lock`);
try {
return await fn();
} finally {
await releaseLock();
}
}
async function copySnapshotEntry(sourceDir: string, targetDir: string, relative: string, entry: SnapshotEntry): Promise<void> {
const sourcePath = path.join(sourceDir, relative);
const targetPath = path.join(targetDir, relative);
if (entry.kind === "dir") {
const existing = await fs.lstat(targetPath).catch(() => null);
if (existing?.isDirectory()) {
return;
}
if (existing) {
await fs.rm(targetPath, { recursive: true, force: true }).catch(() => undefined);
}
await fs.mkdir(targetPath, { recursive: true });
return;
}
await fs.mkdir(path.dirname(targetPath), { recursive: true });
await fs.rm(targetPath, { recursive: true, force: true }).catch(() => undefined);
if (entry.kind === "symlink") {
await fs.symlink(entry.target, targetPath);
return;
}
await fs.copyFile(sourcePath, targetPath, fsConstants.COPYFILE_FICLONE).catch(async () => {
await fs.copyFile(sourcePath, targetPath);
});
await fs.chmod(targetPath, entry.mode);
}
export async function captureDirectorySnapshot(
rootDir: string,
options: { exclude?: string[] } = {},
): Promise<DirectorySnapshot> {
const exclude = [...new Set(options.exclude ?? [])];
return {
exclude,
entries: await walkDirectory(rootDir, exclude),
};
}
export async function mergeDirectoryWithBaseline(input: {
baseline: DirectorySnapshot;
sourceDir: string;
targetDir: string;
beforeApply?: () => Promise<void>;
}): Promise<void> {
const source = await captureDirectorySnapshot(input.sourceDir, { exclude: input.baseline.exclude });
await withDirectoryMergeLock(input.targetDir, async () => {
await input.beforeApply?.();
const current = await captureDirectorySnapshot(input.targetDir, { exclude: input.baseline.exclude });
const deletedLeafEntries = [...input.baseline.entries.entries()]
.filter(([relative, entry]) => entry.kind !== "dir" && !source.entries.has(relative))
.sort(([left], [right]) => right.length - left.length);
for (const [relative, baselineEntry] of deletedLeafEntries) {
if (!entriesMatch(current.entries.get(relative), baselineEntry)) continue;
await fs.rm(path.join(input.targetDir, relative), { recursive: true, force: true }).catch(() => undefined);
}
const deletedDirs = [...input.baseline.entries.entries()]
.filter(([relative, entry]) => entry.kind === "dir" && !source.entries.has(relative))
.sort(([left], [right]) => right.length - left.length);
for (const [relative] of deletedDirs) {
await fs.rmdir(path.join(input.targetDir, relative)).catch(() => undefined);
}
const changedSourceEntries = [...source.entries.entries()]
.filter(([relative, entry]) => !entriesMatch(input.baseline.entries.get(relative), entry))
.sort(([left], [right]) => left.localeCompare(right));
for (const [relative, entry] of changedSourceEntries) {
await copySnapshotEntry(input.sourceDir, input.targetDir, relative, entry);
}
});
}
export async function directoryEntryMatchesBaseline(
rootDir: string,
relative: string,
baselineEntry: SnapshotEntry,
): Promise<boolean> {
return entriesMatch(await readSnapshotEntry(rootDir, relative), baselineEntry);
}