From 09eceb952af7daa5a23eb5f63d4928c62c67111e Mon Sep 17 00:00:00 2001
From: Devin Foley <devin@devinfoley.com>
Date: Sun, 3 May 2026 13:51:38 -0700
Subject: [PATCH] Avoid resuming stale remote sessions (Pi adapter) (#5120)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

> **Stacked PR (part 7 of 7).** Depends on:
  - PR #5114
  - PR #5115
  - PR #5116
  - PR #5117
  - PR #5118
  - PR #5119
> Diff against `master` includes commits from earlier PRs in the stack —
the new commit in this PR is the topmost one.

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies
> - The Pi adapter persists a session jsonl per agent so subsequent runs
resume
>   conversation context instead of starting cold
> - SSH testing reproduced a real failure: a verification issue reached
terminal
>   `done` and the agent claimed success, but the proof artifact
> `manual-qa/environment-matrix/ssh/pi_local.md` was missing from the
realized
>   SSH workspace on the QA target box
> - Root cause: the saved session header recorded a different cwd than
the new
> execution cwd, but the resume eligibility check only compared
session-params
> cwd via local-style `path.resolve` (which doesn't roundtrip on remote
POSIX
> paths). The stale session got resumed and writes landed in the wrong
cwd
> - This PR tightens resume eligibility for remote targets: it adds
remote-aware
> cwd normalisation, reads the first line of the session jsonl over SSH
(`head
>   -n 1`) to verify the saved header cwd, and only resumes when both
> session-params cwd *and* the on-disk header cwd match the realised
execution
>   cwd. Stale sessions are skipped silently and the run starts cold
> - The benefit is that Pi runs across cwd-changing environments stop
> accidentally resuming each other's sessions, and proof artifacts land
where
>   reviewers expect them

## What Changed

- Added `normalizeExecutionCwd`, `executionCwdsMatch`,
`readSessionHeaderCwd`,
  and `readSavedSessionCwd` helpers in `pi-local/src/server/execute.ts`
- `readSavedSessionCwd` reads the first line of the session jsonl —
locally via
`fs.readFile`, remotely via `runAdapterExecutionTargetShellCommand`
(`head -n 1`)
- Resume eligibility now requires:
  1. Saved session id is non-empty
  2. Execution target shape matches (existing check)
  3. Session-params cwd matches the realised execution cwd
4. Session-header cwd (from the on-disk jsonl) matches the realised
execution cwd
- Stale sessions are skipped silently (run starts cold) instead of
resumed
- `execute.remote.test.ts` extended with: matching header → resume;
mismatched
header → start fresh; missing/unreadable header → start fresh; remote
head
  command failure → start fresh

## Verification

- `pnpm --filter @paperclipai/adapter-pi-local test`
- `pnpm test -- pi-local`
- Manual QA: ran a Pi agent twice in two different remote cwds,
confirmed
the second run did not pick up the first run's session and that
subsequent
  runs in the original cwd still resumed correctly

## Risks

- Adds a `head -n 1` shell call per Pi run on remote targets. Negligible
  latency (single read of session jsonl), bounded by 15s timeout.
- If the `head` call fails for unrelated reasons (transient remote
unreachability), the run will start cold instead of resuming. This is
the
safe default but worth noting — operators may see one extra cold run if
a
  remote glitches mid-session.
- No data is deleted or migrated; stale sessions remain on disk for
manual
  inspection if desired.

## Model Used

- OpenAI GPT-5.4 (reasoning effort: high) via Codex CLI
- Provider: OpenAI
- Used to author the code changes in this PR

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots — N/A
- [ ] I have updated relevant documentation to reflect my changes — N/A
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
---
 .../src/server/execute.remote.test.ts         | 230 ++++++++++++++++++
 .../adapters/pi-local/src/server/execute.ts   |  97 +++++++-
 2 files changed, 323 insertions(+), 4 deletions(-)

diff --git a/packages/adapters/pi-local/src/server/execute.remote.test.ts b/packages/adapters/pi-local/src/server/execute.remote.test.ts
index 9a36bad7..140ec366 100644
--- a/packages/adapters/pi-local/src/server/execute.remote.test.ts
+++ b/packages/adapters/pi-local/src/server/execute.remote.test.ts
@@ -221,6 +221,22 @@ describe("pi remote execution", () => {
     const workspaceDir = path.join(rootDir, "workspace");
     await mkdir(workspaceDir, { recursive: true });
 
+    runSshCommand.mockImplementation(async (...args: unknown[]) => {
+      const command = String(args[1] ?? "");
+      if (command.includes("head -n 1") && command.includes("session-123.jsonl")) {
+        return {
+          stdout: `${JSON.stringify({ type: "session", cwd: "/remote/workspace" })}\n`,
+          stderr: "",
+          exitCode: 0,
+        };
+      }
+      return {
+        stdout: "",
+        stderr: "",
+        exitCode: 0,
+      };
+    });
+
     await execute({
       runId: "run-ssh-resume",
       agent: {
@@ -275,4 +291,218 @@ describe("pi remote execution", () => {
     expect(call?.[2]).toContain("--session");
     expect(call?.[2]).toContain("/remote/workspace/.paperclip-runtime/pi/sessions/session-123.jsonl");
   });
+
+  it("starts a fresh remote Pi session when the saved session header cwd points at a different workspace", async () => {
+    const rootDir = await mkdtemp(path.join(os.tmpdir(), "paperclip-pi-remote-stale-session-"));
+    cleanupDirs.push(rootDir);
+    const workspaceDir = path.join(rootDir, "workspace");
+    await mkdir(workspaceDir, { recursive: true });
+
+    runSshCommand.mockImplementation(async (...args: unknown[]) => {
+      const command = String(args[1] ?? "");
+      if (command.includes("head -n 1") && command.includes("session-123.jsonl")) {
+        return {
+          stdout: `${JSON.stringify({ type: "session", cwd: "/remote/old-workspace" })}\n`,
+          stderr: "",
+          exitCode: 0,
+        };
+      }
+      return {
+        stdout: "",
+        stderr: "",
+        exitCode: 0,
+      };
+    });
+
+    await execute({
+      runId: "run-ssh-stale-session",
+      agent: {
+        id: "agent-1",
+        companyId: "company-1",
+        name: "Pi Builder",
+        adapterType: "pi_local",
+        adapterConfig: {},
+      },
+      runtime: {
+        sessionId: "/remote/workspace/.paperclip-runtime/pi/sessions/session-123.jsonl",
+        sessionParams: {
+          sessionId: "/remote/workspace/.paperclip-runtime/pi/sessions/session-123.jsonl",
+          cwd: "/remote/workspace",
+          remoteExecution: {
+            transport: "ssh",
+            host: "127.0.0.1",
+            port: 2222,
+            username: "fixture",
+            remoteCwd: "/remote/workspace",
+          },
+        },
+        sessionDisplayId: "session-123",
+        taskKey: null,
+      },
+      config: {
+        command: "pi",
+        model: "openai/gpt-5.4-mini",
+      },
+      context: {
+        paperclipWorkspace: {
+          cwd: workspaceDir,
+          source: "project_primary",
+        },
+      },
+      executionTransport: {
+        remoteExecution: {
+          host: "127.0.0.1",
+          port: 2222,
+          username: "fixture",
+          remoteWorkspacePath: "/remote/workspace",
+          remoteCwd: "/remote/workspace",
+          privateKey: "PRIVATE KEY",
+          knownHosts: "[127.0.0.1]:2222 ssh-ed25519 AAAA",
+          strictHostKeyChecking: true,
+        },
+      },
+      onLog: async () => {},
+    });
+
+    const call = runChildProcess.mock.calls[0] as unknown as [string, string, string[]] | undefined;
+    const sessionIndex = call?.[2].indexOf("--session") ?? -1;
+    expect(sessionIndex).toBeGreaterThanOrEqual(0);
+    const usedSession = sessionIndex >= 0 ? call?.[2][sessionIndex + 1] : null;
+    expect(usedSession).toContain("/remote/workspace/.paperclip-runtime/pi/sessions/");
+    expect(usedSession).not.toBe("/remote/workspace/.paperclip-runtime/pi/sessions/session-123.jsonl");
+  });
+
+  it("starts a fresh remote Pi session when the saved session header is empty or unreadable", async () => {
+    const rootDir = await mkdtemp(path.join(os.tmpdir(), "paperclip-pi-remote-empty-header-"));
+    cleanupDirs.push(rootDir);
+    const workspaceDir = path.join(rootDir, "workspace");
+    await mkdir(workspaceDir, { recursive: true });
+
+    runSshCommand.mockImplementation(async (...args: unknown[]) => {
+      const command = String(args[1] ?? "");
+      if (command.includes("head -n 1") && command.includes("session-123.jsonl")) {
+        return { stdout: "", stderr: "", exitCode: 0 };
+      }
+      return { stdout: "", stderr: "", exitCode: 0 };
+    });
+
+    await execute({
+      runId: "run-ssh-empty-header",
+      agent: {
+        id: "agent-1",
+        companyId: "company-1",
+        name: "Pi Builder",
+        adapterType: "pi_local",
+        adapterConfig: {},
+      },
+      runtime: {
+        sessionId: "/remote/workspace/.paperclip-runtime/pi/sessions/session-123.jsonl",
+        sessionParams: {
+          sessionId: "/remote/workspace/.paperclip-runtime/pi/sessions/session-123.jsonl",
+          cwd: "/remote/workspace",
+          remoteExecution: {
+            transport: "ssh",
+            host: "127.0.0.1",
+            port: 2222,
+            username: "fixture",
+            remoteCwd: "/remote/workspace",
+          },
+        },
+        sessionDisplayId: "session-123",
+        taskKey: null,
+      },
+      config: { command: "pi", model: "openai/gpt-5.4-mini" },
+      context: {
+        paperclipWorkspace: { cwd: workspaceDir, source: "project_primary" },
+      },
+      executionTransport: {
+        remoteExecution: {
+          host: "127.0.0.1",
+          port: 2222,
+          username: "fixture",
+          remoteWorkspacePath: "/remote/workspace",
+          remoteCwd: "/remote/workspace",
+          privateKey: "PRIVATE KEY",
+          knownHosts: "[127.0.0.1]:2222 ssh-ed25519 AAAA",
+          strictHostKeyChecking: true,
+        },
+      },
+      onLog: async () => {},
+    });
+
+    const call = runChildProcess.mock.calls[0] as unknown as [string, string, string[]] | undefined;
+    const sessionIndex = call?.[2].indexOf("--session") ?? -1;
+    expect(sessionIndex).toBeGreaterThanOrEqual(0);
+    const usedSession = sessionIndex >= 0 ? call?.[2][sessionIndex + 1] : null;
+    expect(usedSession).not.toBe("/remote/workspace/.paperclip-runtime/pi/sessions/session-123.jsonl");
+  });
+
+  it("starts a fresh remote Pi session when the remote head command fails", async () => {
+    const rootDir = await mkdtemp(path.join(os.tmpdir(), "paperclip-pi-remote-head-failure-"));
+    cleanupDirs.push(rootDir);
+    const workspaceDir = path.join(rootDir, "workspace");
+    await mkdir(workspaceDir, { recursive: true });
+
+    runSshCommand.mockImplementation(async (...args: unknown[]) => {
+      const command = String(args[1] ?? "");
+      if (command.includes("head -n 1") && command.includes("session-123.jsonl")) {
+        throw Object.assign(new Error("ssh: connect failed"), {
+          stdout: "",
+          stderr: "ssh: connect failed",
+          code: "ENOENT",
+        });
+      }
+      return { stdout: "", stderr: "", exitCode: 0 };
+    });
+
+    await execute({
+      runId: "run-ssh-head-failure",
+      agent: {
+        id: "agent-1",
+        companyId: "company-1",
+        name: "Pi Builder",
+        adapterType: "pi_local",
+        adapterConfig: {},
+      },
+      runtime: {
+        sessionId: "/remote/workspace/.paperclip-runtime/pi/sessions/session-123.jsonl",
+        sessionParams: {
+          sessionId: "/remote/workspace/.paperclip-runtime/pi/sessions/session-123.jsonl",
+          cwd: "/remote/workspace",
+          remoteExecution: {
+            transport: "ssh",
+            host: "127.0.0.1",
+            port: 2222,
+            username: "fixture",
+            remoteCwd: "/remote/workspace",
+          },
+        },
+        sessionDisplayId: "session-123",
+        taskKey: null,
+      },
+      config: { command: "pi", model: "openai/gpt-5.4-mini" },
+      context: {
+        paperclipWorkspace: { cwd: workspaceDir, source: "project_primary" },
+      },
+      executionTransport: {
+        remoteExecution: {
+          host: "127.0.0.1",
+          port: 2222,
+          username: "fixture",
+          remoteWorkspacePath: "/remote/workspace",
+          remoteCwd: "/remote/workspace",
+          privateKey: "PRIVATE KEY",
+          knownHosts: "[127.0.0.1]:2222 ssh-ed25519 AAAA",
+          strictHostKeyChecking: true,
+        },
+      },
+      onLog: async () => {},
+    });
+
+    const call = runChildProcess.mock.calls[0] as unknown as [string, string, string[]] | undefined;
+    const sessionIndex = call?.[2].indexOf("--session") ?? -1;
+    expect(sessionIndex).toBeGreaterThanOrEqual(0);
+    const usedSession = sessionIndex >= 0 ? call?.[2][sessionIndex + 1] : null;
+    expect(usedSession).not.toBe("/remote/workspace/.paperclip-runtime/pi/sessions/session-123.jsonl");
+  });
 });
diff --git a/packages/adapters/pi-local/src/server/execute.ts b/packages/adapters/pi-local/src/server/execute.ts
index 1d1172b7..f0b659b8 100644
--- a/packages/adapters/pi-local/src/server/execute.ts
+++ b/packages/adapters/pi-local/src/server/execute.ts
@@ -17,6 +17,7 @@ import {
   readAdapterExecutionTarget,
   resolveAdapterExecutionTargetCommandForLogs,
   runAdapterExecutionTargetProcess,
+  runAdapterExecutionTargetShellCommand,
   startAdapterExecutionTargetPaperclipBridge,
 } from "@paperclipai/adapter-utils/execution-target";
 import {
@@ -41,6 +42,7 @@ import {
   DEFAULT_PAPERCLIP_AGENT_PROMPT_TEMPLATE,
   runChildProcess,
 } from "@paperclipai/adapter-utils/server-utils";
+import { shellQuote } from "@paperclipai/adapter-utils/ssh";
 import { isPiUnknownSessionError, parsePiJsonl } from "./parse.js";
 import { ensurePiModelConfiguredAndAvailable } from "./models.js";
 
@@ -143,6 +145,68 @@ function buildRemoteSessionPath(runtimeRootDir: string, agentId: string, timesta
   return path.posix.join(runtimeRootDir, "sessions", `${safeTimestamp}-${agentId}.jsonl`);
 }
 
+function normalizeExecutionCwd(candidate: string, remote: boolean): string {
+  return remote ? path.posix.normalize(candidate) : path.resolve(candidate);
+}
+
+function executionCwdsMatch(saved: string, current: string, remote: boolean): boolean {
+  return normalizeExecutionCwd(saved, remote) === normalizeExecutionCwd(current, remote);
+}
+
+function readSessionHeaderCwd(raw: string): string | null {
+  const headerLine = raw
+    .split(/\r?\n/)
+    .map((line) => line.trim())
+    .find(Boolean);
+  if (!headerLine) return null;
+  try {
+    const parsed = JSON.parse(headerLine) as Record<string, unknown>;
+    if (parsed.type !== "session") return null;
+    const cwd = typeof parsed.cwd === "string" ? parsed.cwd.trim() : "";
+    return cwd.length > 0 ? cwd : null;
+  } catch {
+    return null;
+  }
+}
+
+async function readSavedSessionCwd(input: {
+  runId: string;
+  sessionPath: string;
+  executionTarget: ReturnType<typeof readAdapterExecutionTarget>;
+  cwd: string;
+  env: Record<string, string>;
+  timeoutSec: number;
+  graceSec: number;
+}): Promise<string | null> {
+  if (!input.sessionPath.trim()) return null;
+
+  if (!adapterExecutionTargetIsRemote(input.executionTarget)) {
+    try {
+      return readSessionHeaderCwd(await fs.readFile(input.sessionPath, "utf8"));
+    } catch {
+      return null;
+    }
+  }
+
+  try {
+    const sessionHeader = await runAdapterExecutionTargetShellCommand(
+      input.runId,
+      input.executionTarget,
+      `if [ -f ${shellQuote(input.sessionPath)} ]; then head -n 1 ${shellQuote(input.sessionPath)}; fi`,
+      {
+        cwd: input.cwd,
+        env: input.env,
+        timeoutSec: input.timeoutSec > 0 ? Math.min(input.timeoutSec, 15) : 15,
+        graceSec: input.graceSec,
+      },
+    );
+    if (sessionHeader.timedOut || (sessionHeader.exitCode ?? 0) !== 0) return null;
+    return readSessionHeaderCwd(sessionHeader.stdout);
+  } catch {
+    return null;
+  }
+}
+
 export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExecutionResult> {
   const { runId, agent, runtime, config, context, onLog, onMeta, onSpawn, authToken } = ctx;
   const executionTarget = readAdapterExecutionTarget({
@@ -373,10 +437,31 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
   const runtimeSessionId = asString(runtimeSessionParams.sessionId, runtime.sessionId ?? "");
   const runtimeSessionCwd = asString(runtimeSessionParams.cwd, "");
   const runtimeRemoteExecution = parseObject(runtimeSessionParams.remoteExecution);
+  const sessionTargetMatches = adapterExecutionTargetSessionMatches(runtimeRemoteExecution, executionTarget);
+  const sessionParamsCwdMatches =
+    runtimeSessionCwd.length === 0 ||
+    executionCwdsMatch(runtimeSessionCwd, effectiveExecutionCwd, executionTargetIsRemote);
+  const savedSessionCwd =
+    runtimeSessionId.length > 0
+      ? await readSavedSessionCwd({
+          runId,
+          sessionPath: runtimeSessionId,
+          executionTarget,
+          cwd,
+          env,
+          timeoutSec,
+          graceSec,
+        })
+      : null;
+  const sessionHeaderCwdMatches =
+    runtimeSessionId.length === 0 ||
+    (savedSessionCwd !== null &&
+      executionCwdsMatch(savedSessionCwd, effectiveExecutionCwd, executionTargetIsRemote));
   const canResumeSession =
     runtimeSessionId.length > 0 &&
-    (runtimeSessionCwd.length === 0 || path.resolve(runtimeSessionCwd) === path.resolve(effectiveExecutionCwd)) &&
-    adapterExecutionTargetSessionMatches(runtimeRemoteExecution, executionTarget);
+    sessionTargetMatches &&
+    sessionParamsCwdMatches &&
+    sessionHeaderCwdMatches;
   const sessionPath = canResumeSession
     ? runtimeSessionId
     : executionTargetIsRemote && remoteRuntimeRootDir
@@ -384,11 +469,15 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
       : buildSessionPath(agent.id, new Date().toISOString());
 
   if (runtimeSessionId && !canResumeSession) {
+    const staleSessionCwdNote =
+      savedSessionCwd !== null && !sessionHeaderCwdMatches
+        ? ` Pi stored cwd "${savedSessionCwd}" in the session header, so Paperclip will start a fresh session for "${effectiveExecutionCwd}".`
+        : "";
     await onLog(
       "stdout",
       executionTargetIsRemote
-        ? `[paperclip] Pi session "${runtimeSessionId}" does not match the current remote execution identity and will not be resumed in "${effectiveExecutionCwd}". Starting a fresh remote session.\n`
-        : `[paperclip] Pi session "${runtimeSessionId}" was saved for cwd "${runtimeSessionCwd}" and will not be resumed in "${effectiveExecutionCwd}".\n`,
+        ? `[paperclip] Pi session "${runtimeSessionId}" does not match the current remote execution state and will not be resumed in "${effectiveExecutionCwd}".${staleSessionCwdNote} Starting a fresh remote session.\n`
+        : `[paperclip] Pi session "${runtimeSessionId}" was saved for cwd "${runtimeSessionCwd}" and will not be resumed in "${effectiveExecutionCwd}".${staleSessionCwdNote}\n`,
     );
   }