forked from farhoodlabs/paperclip
Run explicit-environment adapter tests on the requested target instead of falling back to the host (#5277)
## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - When a user clicks "Test" on a configured environment (SSH or sandbox), the agent-test route exercises the adapter against that target > - The route previously fell back to running the probe on the Paperclip host whenever an explicit environment target couldn't be resolved, with the test report still saying "passed" > - That hid two real failure modes: misconfigured environments looked green, and sandbox environments were never actually exercised > - This pull request acquires an ad-hoc lease and realizes a workspace for sandbox/plugin test environments, resolves a sandbox execution target wired to the environment runtime, and returns synthesized diagnostics instead of running a host probe when an explicit env target can't be resolved > - The benefit is the Test action surfaces the real environment state and never silently exercises the wrong machine ## What Changed - `server/routes/agents.ts`: acquire an ad-hoc lease and realize a workspace for sandbox/plugin test environments; resolve a sandbox execution target wired to the environment runtime - Return synthesized diagnostics (no host fallback) when an explicit env target can't be resolved - `server/services/environment-runtime.ts`: small adjustments to support the explicit-env-target case - Clarify test-route messages so they no longer claim a host fallback in explicit env flows - New `agent-test-environment-routes.test.ts` covers the guard and missing-environment path ## Verification - `pnpm vitest run --no-coverage server/src/__tests__/agent-test-environment-routes.test.ts` - `pnpm typecheck` clean - Manual: a deliberately misconfigured sandbox environment now reports diagnostics instead of a misleading host-pass ## Risks Medium — Test route behavior change. Explicit environments that previously appeared to pass via host fallback will now report their real state. This is the desired behavior, but operators should expect to see new failures for environments that were never actually working. ## Model Used Claude Opus 4.7 (1M context) ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable — new tests cover guard + missing-env paths - [x] If this change affects the UI, I have included before/after screenshots — N/A (no UI) - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { and, eq, inArray } from "drizzle-orm";
|
||||
import type { Db } from "@paperclipai/db";
|
||||
import { environmentLeases } from "@paperclipai/db";
|
||||
@@ -102,7 +103,13 @@ export interface EnvironmentDriverAcquireInput {
|
||||
companyId: string;
|
||||
environment: Environment;
|
||||
issueId: string | null;
|
||||
heartbeatRunId: string;
|
||||
/**
|
||||
* UUID of the owning heartbeat run, or null for ad-hoc invocations
|
||||
* (e.g. operator-initiated `Test` probes) that are not tied to a run.
|
||||
* Null leases must be released by id via `getDriver(...).releaseRunLease`
|
||||
* since `releaseRunLeases(heartbeatRunId)` cannot find them.
|
||||
*/
|
||||
heartbeatRunId: string | null;
|
||||
executionWorkspaceId: string | null;
|
||||
executionWorkspaceMode: ExecutionWorkspace["mode"] | null;
|
||||
}
|
||||
@@ -407,14 +414,21 @@ function createSandboxEnvironmentDriver(
|
||||
|
||||
const workerConfig = stripSandboxProviderEnvelope(parsed.config);
|
||||
const storedConfig = storedParsed.config;
|
||||
const existingLeases = parsed.config.reuseLease
|
||||
? await environmentsSvc.listLeases(input.environment.id)
|
||||
// Ad-hoc tests (heartbeatRunId === null) must never resume an existing
|
||||
// provider lease. If they did, releasing the test lease at the end of
|
||||
// the probe would tear down the live heartbeat run that owns it.
|
||||
// We also filter out leases whose policy is not reuse_by_environment
|
||||
// so any non-reusable lease (including ad-hoc test leases that
|
||||
// landed in the table from older code paths) cannot be matched.
|
||||
const reusableExistingLeases = parsed.config.reuseLease && input.heartbeatRunId !== null
|
||||
? (await environmentsSvc.listLeases(input.environment.id))
|
||||
.filter((lease) => lease.leasePolicy === "reuse_by_environment")
|
||||
: [];
|
||||
const reusableProviderLeaseId = parsed.config.reuseLease
|
||||
? findReusableSandboxLeaseId({ config: storedConfig, leases: existingLeases })
|
||||
const reusableProviderLeaseId = parsed.config.reuseLease && input.heartbeatRunId !== null
|
||||
? findReusableSandboxLeaseId({ config: storedConfig, leases: reusableExistingLeases })
|
||||
: null;
|
||||
const reusableLease = reusableProviderLeaseId
|
||||
? existingLeases.find((lease) => lease.providerLeaseId === reusableProviderLeaseId)
|
||||
? reusableExistingLeases.find((lease) => lease.providerLeaseId === reusableProviderLeaseId)
|
||||
: null;
|
||||
|
||||
const providerLease = reusableLease?.providerLeaseId
|
||||
@@ -443,12 +457,18 @@ function createSandboxEnvironmentDriver(
|
||||
companyId: input.companyId,
|
||||
environmentId: input.environment.id,
|
||||
config: workerConfig,
|
||||
runId: input.heartbeatRunId,
|
||||
// Plugin SDK requires a string; ad-hoc test leases use a fresh
|
||||
// UUID so providers that validate or persist the runId still see
|
||||
// a well-formed identifier.
|
||||
runId: input.heartbeatRunId ?? randomUUID(),
|
||||
workspaceMode: input.executionWorkspaceMode ?? undefined,
|
||||
},
|
||||
);
|
||||
|
||||
const resolvedLeasePolicy = parsed.config.reuseLease
|
||||
// Ad-hoc test leases are never publishable for reuse: storing them
|
||||
// as `reuse_by_environment` would let a concurrent heartbeat resume
|
||||
// the test's provider lease and lose its sandbox when the test ends.
|
||||
const resolvedLeasePolicy = parsed.config.reuseLease && input.heartbeatRunId !== null
|
||||
? "reuse_by_environment"
|
||||
: "ephemeral";
|
||||
|
||||
@@ -477,22 +497,33 @@ function createSandboxEnvironmentDriver(
|
||||
});
|
||||
}
|
||||
|
||||
// Built-in sandbox provider path.
|
||||
const reusableProviderLeaseId = parsed.config.reuseLease
|
||||
// Built-in sandbox provider path. Same guard as the plugin-backed path:
|
||||
// ad-hoc tests (heartbeatRunId === null) must never resume an existing
|
||||
// provider lease, or releasing the test lease will terminate the live
|
||||
// heartbeat run that shares it. Filter to leases whose policy is
|
||||
// reuse_by_environment so non-reusable rows can never be matched.
|
||||
const reusableProviderLeaseId = parsed.config.reuseLease && input.heartbeatRunId !== null
|
||||
? (await environmentsSvc
|
||||
.listLeases(input.environment.id)
|
||||
.then((leases) => findReusableSandboxLeaseId({ config: parsed.config, leases })))
|
||||
.then((leases) =>
|
||||
findReusableSandboxLeaseId({
|
||||
config: parsed.config,
|
||||
leases: leases.filter((lease) => lease.leasePolicy === "reuse_by_environment"),
|
||||
}),
|
||||
))
|
||||
: null;
|
||||
|
||||
const providerLease = await acquireSandboxProviderLease({
|
||||
config: parsed.config,
|
||||
environmentId: input.environment.id,
|
||||
heartbeatRunId: input.heartbeatRunId,
|
||||
heartbeatRunId: input.heartbeatRunId ?? randomUUID(),
|
||||
issueId: input.issueId,
|
||||
reusableProviderLeaseId,
|
||||
});
|
||||
|
||||
const resolvedLeasePolicy = parsed.config.reuseLease
|
||||
// Same ephemeral-policy-for-tests guard as the plugin-backed path:
|
||||
// ad-hoc test leases must not be publishable for reuse.
|
||||
const resolvedLeasePolicy = parsed.config.reuseLease && input.heartbeatRunId !== null
|
||||
? "reuse_by_environment"
|
||||
: "ephemeral";
|
||||
|
||||
@@ -831,7 +862,7 @@ function createPluginEnvironmentDriver(
|
||||
companyId: input.companyId,
|
||||
environmentId: input.environment.id,
|
||||
config: parsed.config.driverConfig,
|
||||
runId: input.heartbeatRunId,
|
||||
runId: input.heartbeatRunId ?? randomUUID(),
|
||||
workspaceMode: input.executionWorkspaceMode ?? undefined,
|
||||
});
|
||||
|
||||
@@ -1040,7 +1071,8 @@ export function environmentRuntimeService(
|
||||
companyId: string;
|
||||
environment: Environment;
|
||||
issueId: string | null;
|
||||
heartbeatRunId: string;
|
||||
/** Null for ad-hoc invocations (e.g. operator-initiated `Test` probes). */
|
||||
heartbeatRunId: string | null;
|
||||
persistedExecutionWorkspace: Pick<ExecutionWorkspace, "id" | "mode"> | null;
|
||||
}): Promise<EnvironmentRuntimeLeaseRecord> {
|
||||
if (input.environment.status !== "active") {
|
||||
|
||||
Reference in New Issue
Block a user