Harden remote workspace sync and restore flows (#5444)

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies
> - When an agent runs against a remote target, Paperclip syncs the
workspace out to the remote at run start and restores changes back to
the local workspace at run end
> - The previous restore flow naïvely overwrote local files with
whatever the remote returned, so files that the remote run never touched
but had timestamp/mode drift could be needlessly rewritten — and a
single static `refs/paperclip/ssh-sync/imported` ref made concurrent SSH
workspace exports race on the same git ref
> - This pull request adds a `workspace-restore-merge` module that diffs
a pre-run snapshot against the post-run remote state and only writes
back files the remote actually changed; SSH workspace exports now use a
per-import unique ref so concurrent runs can't trample each other
> - Every adapter's execute path threads the snapshot through
`prepareAdapterExecutionTargetRuntime` so the merge has the baseline it
needs
> - The benefit is workspace restores no longer churn untouched files,
and concurrent SSH runs no longer collide on the import ref

## What Changed

- `packages/adapter-utils/src/workspace-restore-merge.{ts,test.ts}`: new
module — directory snapshot (kind/mode/sha256/symlink target) plus
snapshot-aware merge that writes only the files the remote changed
- `packages/adapter-utils/src/ssh.ts`: SSH workspace export uses a
per-import unique ref (`refs/paperclip/ssh-sync/imported/<uuid>`);
restore goes through the new merge helper; `ssh-fixture.test.ts` covers
the unique-ref + merge paths
- `packages/adapter-utils/src/sandbox-managed-runtime.ts` +
`remote-managed-runtime.ts`: thread the snapshot/merge through the
sandbox and SSH paths
- `packages/adapter-utils/src/server-utils.{ts,test.ts}` +
`execution-target.ts`: helpers for capturing the pre-run snapshot;
`prepareAdapterExecutionTargetRuntime` gains required `runId` and
optional `workspaceRemoteDir`, and returns the realized
`workspaceRemoteDir`
- Each adapter's `execute.ts` (acpx, claude, codex, cursor, gemini,
opencode, pi) takes the snapshot at run start and passes it through to
the runtime restore
- Remote execute test mocks updated to match the new
`prepareWorkspaceForSshExecution` return shape and the per-run
`${managedRemoteWorkspace}` cwd subdirectory

## Verification

- `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils
--project @paperclipai/adapter-acpx-local --project
@paperclipai/adapter-claude-local --project
@paperclipai/adapter-codex-local --project
@paperclipai/adapter-cursor-local --project
@paperclipai/adapter-gemini-local --project
@paperclipai/adapter-opencode-local --project
@paperclipai/adapter-pi-local` — 196/196 passing
- `pnpm typecheck` clean across the workspace

## Risks

Medium. The restore path now writes a strict subset of what it
previously did — files the remote did not touch are no longer rewritten.
If any flow was relying on a touch-without-content-change being copied
back (timestamp or permission propagation only), that behavior is now
skipped. Snapshot capture adds an O(N-files-in-workspace) hash pass at
run start; the cost is bounded by the existing exclude list. The `runId`
parameter on `prepareAdapterExecutionTargetRuntime` is now required —
every in-tree caller is updated; out-of-tree adapter authors need to
pass it.

## Model Used

Claude Opus 4.7 (1M context)

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable — new module +
every adapter execute path covered
- [x] If this change affects the UI, I have included before/after
screenshots — N/A (no UI)
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
This commit is contained in:
Devin Foley
2026-05-07 14:44:45 -07:00
committed by GitHub
parent 824298f414
commit 12cb7b40fd
23 changed files with 1234 additions and 183 deletions
@@ -999,6 +999,99 @@ export function shapePaperclipWorkspaceEnvForExecution(input: {
};
}
export function rewriteWorkspaceCwdEnvVarsForExecution(input: {
env: Record<string, unknown>;
workspaceCwd?: string | null;
executionCwd?: string | null;
executionTargetIsRemote?: boolean;
}): Record<string, string> {
const nextEnv = Object.fromEntries(
Object.entries(input.env)
.filter((entry): entry is [string, string] => typeof entry[1] === "string"),
) as Record<string, string>;
const localWorkspaceCwd = typeof input.workspaceCwd === "string" && input.workspaceCwd.trim().length > 0
? path.resolve(input.workspaceCwd)
: null;
const remoteWorkspaceCwd = typeof input.executionCwd === "string" && input.executionCwd.trim().length > 0
? path.resolve(input.executionCwd)
: null;
if (!input.executionTargetIsRemote || !localWorkspaceCwd || !remoteWorkspaceCwd) {
return nextEnv;
}
for (const [key, value] of Object.entries(nextEnv)) {
if (!key.endsWith("_WORKSPACE_CWD")) continue;
const trimmed = value.trim();
if (!trimmed) continue;
if (path.resolve(trimmed) !== localWorkspaceCwd) continue;
nextEnv[key] = remoteWorkspaceCwd;
}
return nextEnv;
}
export function refreshPaperclipWorkspaceEnvForExecution(input: {
env: Record<string, string>;
envConfig?: Record<string, unknown>;
workspaceCwd?: string | null;
workspaceSource?: string | null;
workspaceStrategy?: string | null;
workspaceId?: string | null;
workspaceRepoUrl?: string | null;
workspaceRepoRef?: string | null;
workspaceBranch?: string | null;
workspaceWorktreePath?: string | null;
workspaceHints?: Array<Record<string, unknown>>;
agentHome?: string | null;
executionTargetIsRemote?: boolean;
executionCwd?: string | null;
}): {
workspaceCwd: string | null;
workspaceWorktreePath: string | null;
workspaceHints: Array<Record<string, unknown>>;
} {
const shapedWorkspaceEnv = shapePaperclipWorkspaceEnvForExecution({
workspaceCwd: input.workspaceCwd,
workspaceWorktreePath: input.workspaceWorktreePath,
workspaceHints: input.workspaceHints,
executionTargetIsRemote: input.executionTargetIsRemote,
executionCwd: input.executionCwd,
});
delete input.env.PAPERCLIP_WORKSPACE_CWD;
delete input.env.PAPERCLIP_WORKSPACE_WORKTREE_PATH;
delete input.env.PAPERCLIP_WORKSPACES_JSON;
applyPaperclipWorkspaceEnv(input.env, {
workspaceCwd: shapedWorkspaceEnv.workspaceCwd,
workspaceSource: input.workspaceSource,
workspaceStrategy: input.workspaceStrategy,
workspaceId: input.workspaceId,
workspaceRepoUrl: input.workspaceRepoUrl,
workspaceRepoRef: input.workspaceRepoRef,
workspaceBranch: input.workspaceBranch,
workspaceWorktreePath: shapedWorkspaceEnv.workspaceWorktreePath,
agentHome: input.agentHome,
});
if (shapedWorkspaceEnv.workspaceHints.length > 0) {
input.env.PAPERCLIP_WORKSPACES_JSON = JSON.stringify(shapedWorkspaceEnv.workspaceHints);
}
const shapedEnvConfig = rewriteWorkspaceCwdEnvVarsForExecution({
env: input.envConfig ?? {},
workspaceCwd: input.workspaceCwd,
executionCwd: shapedWorkspaceEnv.workspaceCwd,
executionTargetIsRemote: input.executionTargetIsRemote,
});
for (const [key, value] of Object.entries(shapedEnvConfig)) {
input.env[key] = value;
}
return shapedWorkspaceEnv;
}
export function sanitizeInheritedPaperclipEnv(baseEnv: NodeJS.ProcessEnv): NodeJS.ProcessEnv {
const env: NodeJS.ProcessEnv = { ...baseEnv };
for (const key of Object.keys(env)) {