Files
paperclip/server/src/__tests__/heartbeat-workspace-session.test.ts
T
Devin Foley 70679a3321 Add sandbox environment support (#4415)
## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies.
> - The environment/runtime layer decides where agent work executes and
how the control plane reaches those runtimes.
> - Today Paperclip can run locally and over SSH, but sandboxed
execution needs a first-class environment model instead of one-off
adapter behavior.
> - We also want sandbox providers to be pluggable so the core does not
hardcode every provider implementation.
> - This branch adds the Sandbox environment path, the provider
contract, and a deterministic fake provider plugin.
> - That required synchronized changes across shared contracts, plugin
SDK surfaces, server runtime orchestration, and the UI
environment/workspace flows.
> - The result is that sandbox execution becomes a core control-plane
capability while keeping provider implementations extensible and
testable.

## What Changed

- Added sandbox runtime support to the environment execution path,
including runtime URL discovery, sandbox execution targeting,
orchestration, and heartbeat integration.
- Added plugin-provider support for sandbox environments so providers
can be supplied via plugins instead of hardcoded server logic.
- Added the fake sandbox provider plugin with deterministic behavior
suitable for local and automated testing.
- Updated shared types, validators, plugin protocol definitions, and SDK
helpers to carry sandbox provider and workspace-runtime contracts across
package boundaries.
- Updated server routes and services so companies can create sandbox
environments, select them for work, and execute work through the sandbox
runtime path.
- Updated the UI environment and workspace surfaces to expose sandbox
environment configuration and selection.
- Added test coverage for sandbox runtime behavior, provider seams,
environment route guards, orchestration, and the fake provider plugin.

## Verification

- Ran locally before the final fixture-only scrub:
  - `pnpm -r typecheck`
  - `pnpm test:run`
  - `pnpm build`
- Ran locally after the final scrub amend:
  - `pnpm vitest run server/src/__tests__/runtime-api.test.ts`
- Reviewer spot checks:
  - create a sandbox environment backed by the fake provider plugin
  - run work through that environment
- confirm sandbox provider execution does not inherit host secrets
implicitly

## Risks

- This touches shared contracts, plugin SDK plumbing, server runtime
orchestration, and UI environment/workspace flows, so regressions would
likely show up as cross-layer mismatches rather than isolated type
errors.
- Runtime URL discovery and sandbox callback selection are sensitive to
host/bind configuration; if that logic is wrong, sandbox-backed
callbacks may fail even when execution succeeds.
- The fake provider plugin is intentionally deterministic and
test-oriented; future providers may expose capability gaps that this
branch does not yet cover.

## Model Used

- OpenAI Codex coding agent on a GPT-5-class backend in the
Paperclip/Codex harness. Exact backend model ID is not exposed
in-session. Tool-assisted workflow with shell execution, file editing,
git history inspection, and local test execution.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-24 12:15:53 -07:00

562 lines
17 KiB
TypeScript

import { describe, expect, it } from "vitest";
import type { agents } from "@paperclipai/db";
import { sessionCodec as codexSessionCodec } from "@paperclipai/adapter-codex-local/server";
import { resolveDefaultAgentWorkspaceDir } from "../home-paths.js";
import {
applyPersistedExecutionWorkspaceConfig,
buildRealizedExecutionWorkspaceFromPersisted,
buildExplicitResumeSessionOverride,
deriveTaskKeyWithHeartbeatFallback,
extractWakeCommentIds,
formatRuntimeWorkspaceWarningLog,
mergeExecutionWorkspaceMetadataForPersistence,
mergeCoalescedContextSnapshot,
prioritizeProjectWorkspaceCandidatesForRun,
parseSessionCompactionPolicy,
resolveRuntimeSessionParamsForWorkspace,
stripWorkspaceRuntimeFromExecutionRunConfig,
shouldResetTaskSessionForWake,
type ResolvedWorkspaceForRun,
} from "../services/heartbeat.ts";
function buildResolvedWorkspace(overrides: Partial<ResolvedWorkspaceForRun> = {}): ResolvedWorkspaceForRun {
return {
cwd: "/tmp/project",
source: "project_primary",
projectId: "project-1",
workspaceId: "workspace-1",
repoUrl: null,
repoRef: null,
workspaceHints: [],
warnings: [],
...overrides,
};
}
function buildAgent(adapterType: string, runtimeConfig: Record<string, unknown> = {}) {
return {
id: "agent-1",
companyId: "company-1",
projectId: null,
goalId: null,
name: "Agent",
role: "engineer",
title: null,
icon: null,
status: "running",
reportsTo: null,
capabilities: null,
adapterType,
adapterConfig: {},
runtimeConfig,
budgetMonthlyCents: 0,
spentMonthlyCents: 0,
permissions: {},
lastHeartbeatAt: null,
metadata: null,
createdAt: new Date(),
updatedAt: new Date(),
} as unknown as typeof agents.$inferSelect;
}
describe("resolveRuntimeSessionParamsForWorkspace", () => {
it("migrates fallback workspace sessions to project workspace when project cwd becomes available", () => {
const agentId = "agent-123";
const fallbackCwd = resolveDefaultAgentWorkspaceDir(agentId);
const result = resolveRuntimeSessionParamsForWorkspace({
agentId,
previousSessionParams: {
sessionId: "session-1",
cwd: fallbackCwd,
workspaceId: "workspace-1",
},
resolvedWorkspace: buildResolvedWorkspace({ cwd: "/tmp/new-project-cwd" }),
});
expect(result.sessionParams).toMatchObject({
sessionId: "session-1",
cwd: "/tmp/new-project-cwd",
workspaceId: "workspace-1",
});
expect(result.warning).toContain("Attempting to resume session");
});
it("does not migrate when previous session cwd is not the fallback workspace", () => {
const result = resolveRuntimeSessionParamsForWorkspace({
agentId: "agent-123",
previousSessionParams: {
sessionId: "session-1",
cwd: "/tmp/some-other-cwd",
workspaceId: "workspace-1",
},
resolvedWorkspace: buildResolvedWorkspace({ cwd: "/tmp/new-project-cwd" }),
});
expect(result.sessionParams).toEqual({
sessionId: "session-1",
cwd: "/tmp/some-other-cwd",
workspaceId: "workspace-1",
});
expect(result.warning).toBeNull();
});
it("does not migrate when resolved workspace id differs from previous session workspace id", () => {
const agentId = "agent-123";
const fallbackCwd = resolveDefaultAgentWorkspaceDir(agentId);
const result = resolveRuntimeSessionParamsForWorkspace({
agentId,
previousSessionParams: {
sessionId: "session-1",
cwd: fallbackCwd,
workspaceId: "workspace-1",
},
resolvedWorkspace: buildResolvedWorkspace({
cwd: "/tmp/new-project-cwd",
workspaceId: "workspace-2",
}),
});
expect(result.sessionParams).toEqual({
sessionId: "session-1",
cwd: fallbackCwd,
workspaceId: "workspace-1",
});
expect(result.warning).toBeNull();
});
});
describe("applyPersistedExecutionWorkspaceConfig", () => {
it("does not add workspace runtime when only the project workspace had manual runtime config", () => {
const result = applyPersistedExecutionWorkspaceConfig({
config: {},
workspaceConfig: null,
mode: "isolated_workspace",
});
expect("workspaceRuntime" in result).toBe(false);
});
it("applies explicit persisted execution workspace runtime config when present", () => {
const result = applyPersistedExecutionWorkspaceConfig({
config: {},
workspaceConfig: {
provisionCommand: null,
teardownCommand: null,
cleanupCommand: null,
desiredState: null,
workspaceRuntime: {
services: [{ name: "workspace-web" }],
},
},
mode: "isolated_workspace",
});
expect(result.workspaceRuntime).toEqual({
services: [{ name: "workspace-web" }],
});
});
});
describe("mergeExecutionWorkspaceMetadataForPersistence", () => {
it("merges config snapshot for newly realized workspaces", () => {
expect(mergeExecutionWorkspaceMetadataForPersistence({
existingMetadata: null,
source: "task_session",
createdByRuntime: true,
configSnapshot: {
environmentId: "env-new",
provisionCommand: "bash ./scripts/provision.sh",
},
shouldReuseExisting: false,
})).toEqual({
source: "task_session",
createdByRuntime: true,
config: {
environmentId: "env-new",
provisionCommand: "bash ./scripts/provision.sh",
teardownCommand: null,
cleanupCommand: null,
desiredState: null,
serviceStates: null,
workspaceRuntime: null,
},
});
});
it("preserves persisted config snapshot when reusing an existing workspace", () => {
expect(mergeExecutionWorkspaceMetadataForPersistence({
existingMetadata: {
config: {
environmentId: "env-old",
provisionCommand: "bash ./scripts/existing-provision.sh",
},
},
source: "task_session",
createdByRuntime: false,
configSnapshot: {
environmentId: "env-new",
provisionCommand: "bash ./scripts/new-provision.sh",
},
shouldReuseExisting: true,
})).toEqual({
config: {
environmentId: "env-old",
provisionCommand: "bash ./scripts/existing-provision.sh",
},
source: "task_session",
createdByRuntime: false,
});
});
});
describe("buildRealizedExecutionWorkspaceFromPersisted", () => {
it("reuses the persisted execution workspace path instead of deriving a new worktree", () => {
const result = buildRealizedExecutionWorkspaceFromPersisted({
base: buildResolvedWorkspace({
cwd: "/tmp/project-primary",
repoRef: "main",
}),
workspace: {
id: "execution-workspace-1",
companyId: "company-1",
projectId: "project-1",
projectWorkspaceId: "workspace-1",
sourceIssueId: "issue-1",
mode: "isolated_workspace",
strategyType: "git_worktree",
name: "PAP-880-thumbs-capture-for-evals-feature",
status: "active",
cwd: "/tmp/reused-worktree",
repoUrl: "https://example.com/paperclip.git",
baseRef: "main",
branchName: "PAP-880-thumbs-capture-for-evals-feature",
providerType: "git_worktree",
providerRef: "/tmp/reused-worktree",
derivedFromExecutionWorkspaceId: null,
lastUsedAt: new Date(),
openedAt: new Date(),
closedAt: null,
cleanupEligibleAt: null,
cleanupReason: null,
config: null,
metadata: null,
createdAt: new Date(),
updatedAt: new Date(),
},
});
expect(result.created).toBe(false);
expect(result.strategy).toBe("git_worktree");
expect(result.cwd).toBe("/tmp/reused-worktree");
expect(result.worktreePath).toBe("/tmp/reused-worktree");
expect(result.branchName).toBe("PAP-880-thumbs-capture-for-evals-feature");
expect(result.source).toBe("task_session");
});
});
describe("stripWorkspaceRuntimeFromExecutionRunConfig", () => {
it("removes workspace runtime before heartbeat execution", () => {
const input = {
cwd: "/tmp/project",
workspaceStrategy: {
type: "git_worktree",
},
workspaceRuntime: {
services: [{ name: "web" }],
},
};
const result = stripWorkspaceRuntimeFromExecutionRunConfig(input);
expect(result).toEqual({
cwd: "/tmp/project",
workspaceStrategy: {
type: "git_worktree",
},
});
expect(input.workspaceRuntime).toEqual({
services: [{ name: "web" }],
});
});
});
describe("shouldResetTaskSessionForWake", () => {
it("resets session context on assignment wake", () => {
expect(shouldResetTaskSessionForWake({ wakeReason: "issue_assigned" })).toBe(true);
});
it("resets session context on execution review wakes", () => {
expect(shouldResetTaskSessionForWake({ wakeReason: "execution_review_requested" })).toBe(true);
});
it("resets session context on execution approval wakes", () => {
expect(shouldResetTaskSessionForWake({ wakeReason: "execution_approval_requested" })).toBe(true);
});
it("resets session context on execution changes-requested wakes", () => {
expect(shouldResetTaskSessionForWake({ wakeReason: "execution_changes_requested" })).toBe(true);
});
it("preserves session context on timer heartbeats", () => {
expect(shouldResetTaskSessionForWake({ wakeSource: "timer" })).toBe(false);
});
it("preserves session context on manual on-demand invokes by default", () => {
expect(
shouldResetTaskSessionForWake({
wakeSource: "on_demand",
wakeTriggerDetail: "manual",
}),
).toBe(false);
});
it("resets session context when a fresh session is explicitly requested", () => {
expect(
shouldResetTaskSessionForWake({
wakeSource: "on_demand",
wakeTriggerDetail: "manual",
forceFreshSession: true,
}),
).toBe(true);
});
it("does not reset session context on mention wake comment", () => {
expect(
shouldResetTaskSessionForWake({
wakeReason: "issue_comment_mentioned",
wakeCommentId: "comment-1",
}),
).toBe(false);
});
it("does not reset session context when commentId is present", () => {
expect(
shouldResetTaskSessionForWake({
wakeReason: "issue_commented",
commentId: "comment-2",
}),
).toBe(false);
});
it("does not reset for comment wakes", () => {
expect(shouldResetTaskSessionForWake({ wakeReason: "issue_commented" })).toBe(false);
});
it("does not reset when wake reason is missing", () => {
expect(shouldResetTaskSessionForWake({})).toBe(false);
});
it("does not reset session context on callback on-demand invokes", () => {
expect(
shouldResetTaskSessionForWake({
wakeSource: "on_demand",
wakeTriggerDetail: "callback",
}),
).toBe(false);
});
});
describe("deriveTaskKeyWithHeartbeatFallback", () => {
it("returns explicit taskKey when present", () => {
expect(deriveTaskKeyWithHeartbeatFallback({ taskKey: "issue-123" }, null)).toBe("issue-123");
});
it("returns explicit issueId when no taskKey", () => {
expect(deriveTaskKeyWithHeartbeatFallback({ issueId: "issue-456" }, null)).toBe("issue-456");
});
it("returns __heartbeat__ for timer wakes with no explicit key", () => {
expect(deriveTaskKeyWithHeartbeatFallback({ wakeSource: "timer" }, null)).toBe("__heartbeat__");
});
it("prefers explicit key over heartbeat fallback even on timer wakes", () => {
expect(
deriveTaskKeyWithHeartbeatFallback({ wakeSource: "timer", taskKey: "issue-789" }, null),
).toBe("issue-789");
});
it("returns null for non-timer wakes with no explicit key", () => {
expect(deriveTaskKeyWithHeartbeatFallback({ wakeSource: "on_demand" }, null)).toBeNull();
});
it("returns null for empty context", () => {
expect(deriveTaskKeyWithHeartbeatFallback({}, null)).toBeNull();
});
});
describe("comment wake batching", () => {
it("preserves ordered wake comment ids when coalescing queued follow-up wakes", () => {
const merged = mergeCoalescedContextSnapshot(
{
issueId: "issue-1",
wakeReason: "issue_commented",
wakeCommentId: "comment-1",
wakeCommentIds: ["comment-1"],
paperclipWake: {
latestCommentId: "comment-1",
},
},
{
issueId: "issue-1",
wakeReason: "issue_commented",
wakeCommentId: "comment-2",
},
);
expect(extractWakeCommentIds(merged)).toEqual(["comment-1", "comment-2"]);
expect(merged.commentId).toBe("comment-2");
expect(merged.wakeCommentId).toBe("comment-2");
expect(merged.paperclipWake).toBeUndefined();
});
});
describe("buildExplicitResumeSessionOverride", () => {
it("reuses saved task session params when they belong to the selected failed run", () => {
const result = buildExplicitResumeSessionOverride({
resumeFromRunId: "run-1",
resumeRunSessionIdBefore: "session-before",
resumeRunSessionIdAfter: "session-after",
taskSession: {
sessionParamsJson: {
sessionId: "session-after",
cwd: "/tmp/project",
},
sessionDisplayId: "session-after",
lastRunId: "run-1",
},
sessionCodec: codexSessionCodec,
});
expect(result).toEqual({
sessionDisplayId: "session-after",
sessionParams: {
sessionId: "session-after",
cwd: "/tmp/project",
},
});
});
it("falls back to the selected run session id when no matching task session params are available", () => {
const result = buildExplicitResumeSessionOverride({
resumeFromRunId: "run-1",
resumeRunSessionIdBefore: "session-before",
resumeRunSessionIdAfter: "session-after",
taskSession: {
sessionParamsJson: {
sessionId: "other-session",
cwd: "/tmp/project",
},
sessionDisplayId: "other-session",
lastRunId: "run-2",
},
sessionCodec: codexSessionCodec,
});
expect(result).toEqual({
sessionDisplayId: "session-after",
sessionParams: {
sessionId: "session-after",
},
});
});
});
describe("formatRuntimeWorkspaceWarningLog", () => {
it("emits informational workspace warnings on stdout", () => {
expect(formatRuntimeWorkspaceWarningLog("Using fallback workspace")).toEqual({
stream: "stdout",
chunk: "[paperclip] Using fallback workspace\n",
});
});
});
describe("prioritizeProjectWorkspaceCandidatesForRun", () => {
it("moves the explicitly selected workspace to the front", () => {
const rows = [
{ id: "workspace-1", cwd: "/tmp/one" },
{ id: "workspace-2", cwd: "/tmp/two" },
{ id: "workspace-3", cwd: "/tmp/three" },
];
expect(
prioritizeProjectWorkspaceCandidatesForRun(rows, "workspace-2").map((row) => row.id),
).toEqual(["workspace-2", "workspace-1", "workspace-3"]);
});
it("keeps the original order when no preferred workspace is selected", () => {
const rows = [
{ id: "workspace-1" },
{ id: "workspace-2" },
];
expect(
prioritizeProjectWorkspaceCandidatesForRun(rows, null).map((row) => row.id),
).toEqual(["workspace-1", "workspace-2"]);
});
it("keeps the original order when the selected workspace is missing", () => {
const rows = [
{ id: "workspace-1" },
{ id: "workspace-2" },
];
expect(
prioritizeProjectWorkspaceCandidatesForRun(rows, "workspace-9").map((row) => row.id),
).toEqual(["workspace-1", "workspace-2"]);
});
});
describe("parseSessionCompactionPolicy", () => {
it("disables Paperclip-managed rotation by default for codex and claude local", () => {
expect(parseSessionCompactionPolicy(buildAgent("codex_local"))).toEqual({
enabled: true,
maxSessionRuns: 0,
maxRawInputTokens: 0,
maxSessionAgeHours: 0,
});
expect(parseSessionCompactionPolicy(buildAgent("claude_local"))).toEqual({
enabled: true,
maxSessionRuns: 0,
maxRawInputTokens: 0,
maxSessionAgeHours: 0,
});
});
it("keeps conservative defaults for adapters without confirmed native compaction", () => {
expect(parseSessionCompactionPolicy(buildAgent("cursor"))).toEqual({
enabled: true,
maxSessionRuns: 200,
maxRawInputTokens: 2_000_000,
maxSessionAgeHours: 72,
});
expect(parseSessionCompactionPolicy(buildAgent("opencode_local"))).toEqual({
enabled: true,
maxSessionRuns: 200,
maxRawInputTokens: 2_000_000,
maxSessionAgeHours: 72,
});
});
it("lets explicit agent overrides win over adapter defaults", () => {
expect(
parseSessionCompactionPolicy(
buildAgent("codex_local", {
heartbeat: {
sessionCompaction: {
maxSessionRuns: 25,
maxRawInputTokens: 500_000,
},
},
}),
),
).toEqual({
enabled: true,
maxSessionRuns: 25,
maxRawInputTokens: 500_000,
maxSessionAgeHours: 0,
});
});
});