Run explicit-environment adapter tests on the requested target instead of falling back to the host (#5277)

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies
> - When a user clicks "Test" on a configured environment (SSH or
sandbox), the agent-test route exercises the adapter against that target
> - The route previously fell back to running the probe on the Paperclip
host whenever an explicit environment target couldn't be resolved, with
the test report still saying "passed"
> - That hid two real failure modes: misconfigured environments looked
green, and sandbox environments were never actually exercised
> - This pull request acquires an ad-hoc lease and realizes a workspace
for sandbox/plugin test environments, resolves a sandbox execution
target wired to the environment runtime, and returns synthesized
diagnostics instead of running a host probe when an explicit env target
can't be resolved
> - The benefit is the Test action surfaces the real environment state
and never silently exercises the wrong machine

## What Changed

- `server/routes/agents.ts`: acquire an ad-hoc lease and realize a
workspace for sandbox/plugin test environments; resolve a sandbox
execution target wired to the environment runtime
- Return synthesized diagnostics (no host fallback) when an explicit env
target can't be resolved
- `server/services/environment-runtime.ts`: small adjustments to support
the explicit-env-target case
- Clarify test-route messages so they no longer claim a host fallback in
explicit env flows
- New `agent-test-environment-routes.test.ts` covers the guard and
missing-environment path

## Verification

- `pnpm vitest run --no-coverage
server/src/__tests__/agent-test-environment-routes.test.ts`
- `pnpm typecheck` clean
- Manual: a deliberately misconfigured sandbox environment now reports
diagnostics instead of a misleading host-pass

## Risks

Medium — Test route behavior change. Explicit environments that
previously appeared to pass via host fallback will now report their real
state. This is the desired behavior, but operators should expect to see
new failures for environments that were never actually working.

## Model Used

Claude Opus 4.7 (1M context)

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable — new tests cover
guard + missing-env paths
- [x] If this change affects the UI, I have included before/after
screenshots — N/A (no UI)
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
This commit is contained in:
Devin Foley
2026-05-05 08:00:32 -07:00
committed by GitHub
parent 9042b8d042
commit 5c2f9aba9d
3 changed files with 578 additions and 56 deletions
+47 -15
View File
@@ -1,3 +1,4 @@
import { randomUUID } from "node:crypto";
import { and, eq, inArray } from "drizzle-orm";
import type { Db } from "@paperclipai/db";
import { environmentLeases } from "@paperclipai/db";
@@ -102,7 +103,13 @@ export interface EnvironmentDriverAcquireInput {
companyId: string;
environment: Environment;
issueId: string | null;
heartbeatRunId: string;
/**
* UUID of the owning heartbeat run, or null for ad-hoc invocations
* (e.g. operator-initiated `Test` probes) that are not tied to a run.
* Null leases must be released by id via `getDriver(...).releaseRunLease`
* since `releaseRunLeases(heartbeatRunId)` cannot find them.
*/
heartbeatRunId: string | null;
executionWorkspaceId: string | null;
executionWorkspaceMode: ExecutionWorkspace["mode"] | null;
}
@@ -407,14 +414,21 @@ function createSandboxEnvironmentDriver(
const workerConfig = stripSandboxProviderEnvelope(parsed.config);
const storedConfig = storedParsed.config;
const existingLeases = parsed.config.reuseLease
? await environmentsSvc.listLeases(input.environment.id)
// Ad-hoc tests (heartbeatRunId === null) must never resume an existing
// provider lease. If they did, releasing the test lease at the end of
// the probe would tear down the live heartbeat run that owns it.
// We also filter out leases whose policy is not reuse_by_environment
// so any non-reusable lease (including ad-hoc test leases that
// landed in the table from older code paths) cannot be matched.
const reusableExistingLeases = parsed.config.reuseLease && input.heartbeatRunId !== null
? (await environmentsSvc.listLeases(input.environment.id))
.filter((lease) => lease.leasePolicy === "reuse_by_environment")
: [];
const reusableProviderLeaseId = parsed.config.reuseLease
? findReusableSandboxLeaseId({ config: storedConfig, leases: existingLeases })
const reusableProviderLeaseId = parsed.config.reuseLease && input.heartbeatRunId !== null
? findReusableSandboxLeaseId({ config: storedConfig, leases: reusableExistingLeases })
: null;
const reusableLease = reusableProviderLeaseId
? existingLeases.find((lease) => lease.providerLeaseId === reusableProviderLeaseId)
? reusableExistingLeases.find((lease) => lease.providerLeaseId === reusableProviderLeaseId)
: null;
const providerLease = reusableLease?.providerLeaseId
@@ -443,12 +457,18 @@ function createSandboxEnvironmentDriver(
companyId: input.companyId,
environmentId: input.environment.id,
config: workerConfig,
runId: input.heartbeatRunId,
// Plugin SDK requires a string; ad-hoc test leases use a fresh
// UUID so providers that validate or persist the runId still see
// a well-formed identifier.
runId: input.heartbeatRunId ?? randomUUID(),
workspaceMode: input.executionWorkspaceMode ?? undefined,
},
);
const resolvedLeasePolicy = parsed.config.reuseLease
// Ad-hoc test leases are never publishable for reuse: storing them
// as `reuse_by_environment` would let a concurrent heartbeat resume
// the test's provider lease and lose its sandbox when the test ends.
const resolvedLeasePolicy = parsed.config.reuseLease && input.heartbeatRunId !== null
? "reuse_by_environment"
: "ephemeral";
@@ -477,22 +497,33 @@ function createSandboxEnvironmentDriver(
});
}
// Built-in sandbox provider path.
const reusableProviderLeaseId = parsed.config.reuseLease
// Built-in sandbox provider path. Same guard as the plugin-backed path:
// ad-hoc tests (heartbeatRunId === null) must never resume an existing
// provider lease, or releasing the test lease will terminate the live
// heartbeat run that shares it. Filter to leases whose policy is
// reuse_by_environment so non-reusable rows can never be matched.
const reusableProviderLeaseId = parsed.config.reuseLease && input.heartbeatRunId !== null
? (await environmentsSvc
.listLeases(input.environment.id)
.then((leases) => findReusableSandboxLeaseId({ config: parsed.config, leases })))
.then((leases) =>
findReusableSandboxLeaseId({
config: parsed.config,
leases: leases.filter((lease) => lease.leasePolicy === "reuse_by_environment"),
}),
))
: null;
const providerLease = await acquireSandboxProviderLease({
config: parsed.config,
environmentId: input.environment.id,
heartbeatRunId: input.heartbeatRunId,
heartbeatRunId: input.heartbeatRunId ?? randomUUID(),
issueId: input.issueId,
reusableProviderLeaseId,
});
const resolvedLeasePolicy = parsed.config.reuseLease
// Same ephemeral-policy-for-tests guard as the plugin-backed path:
// ad-hoc test leases must not be publishable for reuse.
const resolvedLeasePolicy = parsed.config.reuseLease && input.heartbeatRunId !== null
? "reuse_by_environment"
: "ephemeral";
@@ -831,7 +862,7 @@ function createPluginEnvironmentDriver(
companyId: input.companyId,
environmentId: input.environment.id,
config: parsed.config.driverConfig,
runId: input.heartbeatRunId,
runId: input.heartbeatRunId ?? randomUUID(),
workspaceMode: input.executionWorkspaceMode ?? undefined,
});
@@ -1040,7 +1071,8 @@ export function environmentRuntimeService(
companyId: string;
environment: Environment;
issueId: string | null;
heartbeatRunId: string;
/** Null for ad-hoc invocations (e.g. operator-initiated `Test` probes). */
heartbeatRunId: string | null;
persistedExecutionWorkspace: Pick<ExecutionWorkspace, "id" | "mode"> | null;
}): Promise<EnvironmentRuntimeLeaseRecord> {
if (input.environment.status !== "active") {