forked from farhoodlabs/paperclip
1bd44c8a0d
## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies. > - Remote-managed adapters need sandbox/environment execution to behave like real agent runs, not just local host probes. > - The Cloudflare sandbox path was the weakest leg in the SSH + Cloudflare QA matrix because bridge execution could truncate output, time out long-running installs, and under-provision the worker instance. > - That made several adapters fail for reasons unrelated to their actual business logic, which blocks confidence in Paperclip's non-local environment model. > - This pull request hardens the Cloudflare bridge/runtime path and adjusts sandbox probe budgets so adapter verification matches the measured behavior of the fixed environment. > - It also corrects the Pi sandbox install command so the QA matrix exercises a real, supported install path. > - The benefit is a materially more reliable SSH + Cloudflare adapter matrix with fewer false negatives and clearer failure boundaries. ## What Changed - Switched the Cloudflare bridge worker instance type to `standard-2` for the QA-matrix execution path. - Raised Cloudflare bridge/plugin-worker timeout budgets and added SSE keepalives so long-running install/exec calls can complete instead of dying at the transport layer. - Fixed Cloudflare bridge-channel command handling to avoid dropped final stdout chunks on short-lived execs. - Made Claude, OpenCode, and Cursor sandbox probe timeouts configurable/sandbox-aware, then tightened the defaults to the measured post-fix range. - Updated the Pi sandbox install command to use the package currently installed by the official `pi.dev` installer, pinned to a specific npm version. - Added/updated tests around Cloudflare bridge behavior and adapter sandbox probe paths. ## Verification - `pnpm --filter @paperclipai/adapter-claude-local typecheck` - `pnpm --filter @paperclipai/adapter-opencode-local typecheck` - `pnpm --filter @paperclipai/adapter-cursor-local typecheck` - `pnpm vitest run packages/adapters/cursor-local packages/adapters/claude-local packages/adapters/opencode-local packages/adapters/pi-local packages/plugins/sandbox-providers/cloudflare server/src/services/__tests__/plugin-worker-manager.test.ts` - Manual QA on the dedicated dev instance using the SSH + Cloudflare environment matrix (`ENV-29` through `ENV-40`). Clean end-to-end passes: SSH `claude_local`, `codex_local`, `cursor`, `gemini_local`; Cloudflare `claude_local`, `codex_local`, `cursor`, `gemini_local`. ## Risks - Cloudflare sandbox cost increases because the bridge worker now runs on `standard-2` instead of `lite`. - Higher timeout ceilings can delay surfacing truly hung Cloudflare bridge calls, even though they remove transport-level false negatives. - The manual heartbeat matrix still exposed follow-on execution/sync/disposition bugs in `opencode_local` and `pi_local`; those are not fixed by this PR. ## Model Used - OpenAI `gpt-5.4` via Paperclip `codex_local`, reasoning effort `high`, tool use enabled, repo search enabled. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots (not applicable) - [x] I have updated relevant documentation to reflect my changes (not applicable) - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip <noreply@paperclip.ing>
442 lines
17 KiB
TypeScript
442 lines
17 KiB
TypeScript
import fs from "node:fs/promises";
|
||
import os from "node:os";
|
||
import path from "node:path";
|
||
import type {
|
||
AdapterEnvironmentCheck,
|
||
AdapterEnvironmentTestContext,
|
||
AdapterEnvironmentTestResult,
|
||
} from "@paperclipai/adapter-utils";
|
||
import type { AdapterExecutionTarget } from "@paperclipai/adapter-utils/execution-target";
|
||
import {
|
||
asBoolean,
|
||
asNumber,
|
||
asString,
|
||
asStringArray,
|
||
parseObject,
|
||
ensurePathInEnv,
|
||
} from "@paperclipai/adapter-utils/server-utils";
|
||
import {
|
||
ensureAdapterExecutionTargetCommandResolvable,
|
||
maybeRunSandboxInstallCommand,
|
||
ensureAdapterExecutionTargetDirectory,
|
||
runAdapterExecutionTargetProcess,
|
||
describeAdapterExecutionTarget,
|
||
resolveAdapterExecutionTargetCwd,
|
||
prepareAdapterExecutionTargetRuntime,
|
||
overrideAdapterExecutionTargetRemoteCwd,
|
||
} from "@paperclipai/adapter-utils/execution-target";
|
||
import { discoverOpenCodeModels, ensureOpenCodeModelConfiguredAndAvailable } from "./models.js";
|
||
import { parseOpenCodeJsonl } from "./parse.js";
|
||
import { SANDBOX_INSTALL_COMMAND } from "../index.js";
|
||
import { prepareOpenCodeRuntimeConfig } from "./runtime-config.js";
|
||
|
||
function summarizeStatus(checks: AdapterEnvironmentCheck[]): AdapterEnvironmentTestResult["status"] {
|
||
if (checks.some((check) => check.level === "error")) return "fail";
|
||
if (checks.some((check) => check.level === "warn")) return "warn";
|
||
return "pass";
|
||
}
|
||
|
||
function firstNonEmptyLine(text: string): string {
|
||
return (
|
||
text
|
||
.split(/\r?\n/)
|
||
.map((line) => line.trim())
|
||
.find(Boolean) ?? ""
|
||
);
|
||
}
|
||
|
||
function summarizeProbeDetail(stdout: string, stderr: string, parsedError: string | null): string | null {
|
||
const raw = parsedError?.trim() || firstNonEmptyLine(stderr) || firstNonEmptyLine(stdout);
|
||
if (!raw) return null;
|
||
const clean = raw.replace(/\s+/g, " ").trim();
|
||
const max = 240;
|
||
return clean.length > max ? `${clean.slice(0, max - 1)}...` : clean;
|
||
}
|
||
|
||
function normalizeEnv(input: unknown): Record<string, string> {
|
||
if (typeof input !== "object" || input === null || Array.isArray(input)) return {};
|
||
const env: Record<string, string> = {};
|
||
for (const [key, value] of Object.entries(input as Record<string, unknown>)) {
|
||
if (typeof value === "string") env[key] = value;
|
||
}
|
||
return env;
|
||
}
|
||
|
||
const OPENCODE_AUTH_REQUIRED_RE =
|
||
/(?:auth(?:entication)?\s+required|api\s*key|invalid\s*api\s*key|not\s+logged\s+in|opencode\s+auth\s+login|free\s+usage\s+exceeded)/i;
|
||
|
||
export async function testEnvironment(
|
||
ctx: AdapterEnvironmentTestContext,
|
||
): Promise<AdapterEnvironmentTestResult> {
|
||
const checks: AdapterEnvironmentCheck[] = [];
|
||
const config = parseObject(ctx.config);
|
||
const command = asString(config.command, "opencode");
|
||
const target = ctx.executionTarget ?? null;
|
||
const targetIsRemote = target?.kind === "remote";
|
||
const targetIsSandbox = target?.kind === "remote" && target.transport === "sandbox";
|
||
const cwd = resolveAdapterExecutionTargetCwd(target, asString(config.cwd, ""), process.cwd());
|
||
const targetLabel = targetIsRemote
|
||
? ctx.environmentName ?? describeAdapterExecutionTarget(target)
|
||
: null;
|
||
const runId = `opencode-envtest-${Date.now()}-${Math.random().toString(16).slice(2)}`;
|
||
|
||
if (targetLabel) {
|
||
checks.push({
|
||
code: "opencode_environment_target",
|
||
level: "info",
|
||
message: `Probing inside environment: ${targetLabel}`,
|
||
});
|
||
}
|
||
|
||
try {
|
||
await ensureAdapterExecutionTargetDirectory(runId, target, cwd, {
|
||
cwd,
|
||
env: {},
|
||
createIfMissing: false,
|
||
});
|
||
checks.push({
|
||
code: "opencode_cwd_valid",
|
||
level: "info",
|
||
message: `Working directory is valid: ${cwd}`,
|
||
});
|
||
} catch (err) {
|
||
checks.push({
|
||
code: "opencode_cwd_invalid",
|
||
level: "error",
|
||
message: err instanceof Error ? err.message : "Invalid working directory",
|
||
detail: cwd,
|
||
});
|
||
}
|
||
|
||
const envConfig = parseObject(config.env);
|
||
const env: Record<string, string> = {};
|
||
for (const [key, value] of Object.entries(envConfig)) {
|
||
if (typeof value === "string") env[key] = value;
|
||
}
|
||
|
||
const openaiKeyOverride = "OPENAI_API_KEY" in envConfig ? asString(envConfig.OPENAI_API_KEY, "") : null;
|
||
if (openaiKeyOverride !== null && openaiKeyOverride.trim() === "") {
|
||
checks.push({
|
||
code: "opencode_openai_api_key_missing",
|
||
level: "warn",
|
||
message: "OPENAI_API_KEY override is empty.",
|
||
hint: "The OPENAI_API_KEY override is empty. Set a valid key or remove the override.",
|
||
});
|
||
}
|
||
|
||
// Prevent OpenCode from writing an opencode.json into the working directory.
|
||
env.OPENCODE_DISABLE_PROJECT_CONFIG = "true";
|
||
const preparedRuntimeConfig = await prepareOpenCodeRuntimeConfig({ env, config });
|
||
const localRuntimeConfigHome =
|
||
preparedRuntimeConfig.notes.length > 0 ? preparedRuntimeConfig.env.XDG_CONFIG_HOME : "";
|
||
if (asBoolean(config.dangerouslySkipPermissions, true)) {
|
||
checks.push({
|
||
code: "opencode_headless_permissions_enabled",
|
||
level: "info",
|
||
message: "Headless OpenCode external-directory permissions are auto-approved for unattended runs.",
|
||
});
|
||
}
|
||
let restoreWorkspace: (() => Promise<void>) | null = null;
|
||
// Declared outside `try` so a failure inside `prepareAdapterExecutionTargetRuntime`
|
||
// still has the path available for cleanup in `finally` — otherwise the
|
||
// `fs.mkdtemp` directory leaks on the early-throw path.
|
||
let preparedRuntimeWorkspaceLocalDir: string | null = null;
|
||
try {
|
||
let runtimeTarget: AdapterExecutionTarget | null = target ?? null;
|
||
let runtimeCwd = cwd;
|
||
if (targetIsRemote) {
|
||
preparedRuntimeWorkspaceLocalDir = await fs.mkdtemp(path.join(os.tmpdir(), `paperclip-opencode-envtest-${runId}-`));
|
||
const preparedExecutionTargetRuntime = await prepareAdapterExecutionTargetRuntime({
|
||
runId,
|
||
target,
|
||
adapterKey: "opencode",
|
||
workspaceLocalDir: preparedRuntimeWorkspaceLocalDir,
|
||
workspaceRemoteDir: cwd,
|
||
installCommand: SANDBOX_INSTALL_COMMAND,
|
||
detectCommand: command,
|
||
assets: localRuntimeConfigHome
|
||
? [{
|
||
key: "xdgConfig",
|
||
localDir: localRuntimeConfigHome,
|
||
}]
|
||
: [],
|
||
});
|
||
restoreWorkspace = async () => {
|
||
await preparedExecutionTargetRuntime.restoreWorkspace().catch(() => {});
|
||
if (preparedRuntimeWorkspaceLocalDir) {
|
||
await fs.rm(preparedRuntimeWorkspaceLocalDir, { recursive: true, force: true }).catch(() => {});
|
||
}
|
||
};
|
||
runtimeCwd = preparedExecutionTargetRuntime.workspaceRemoteDir ?? runtimeCwd;
|
||
runtimeTarget = overrideAdapterExecutionTargetRemoteCwd(target ?? null, runtimeCwd) ?? null;
|
||
if (localRuntimeConfigHome && preparedExecutionTargetRuntime.assetDirs.xdgConfig) {
|
||
preparedRuntimeConfig.env.XDG_CONFIG_HOME = preparedExecutionTargetRuntime.assetDirs.xdgConfig;
|
||
}
|
||
}
|
||
const runtimeEnv = normalizeEnv(ensurePathInEnv({ ...process.env, ...preparedRuntimeConfig.env }));
|
||
|
||
const cwdInvalid = checks.some((check) => check.code === "opencode_cwd_invalid");
|
||
if (cwdInvalid) {
|
||
checks.push({
|
||
code: "opencode_command_skipped",
|
||
level: "warn",
|
||
message: "Skipped command check because working directory validation failed.",
|
||
detail: command,
|
||
});
|
||
} else {
|
||
const installCheck = await maybeRunSandboxInstallCommand({
|
||
runId,
|
||
target,
|
||
adapterKey: "opencode",
|
||
installCommand: SANDBOX_INSTALL_COMMAND,
|
||
detectCommand: command,
|
||
env,
|
||
});
|
||
if (installCheck) checks.push(installCheck);
|
||
try {
|
||
await ensureAdapterExecutionTargetCommandResolvable(command, runtimeTarget, runtimeCwd, runtimeEnv);
|
||
checks.push({
|
||
code: "opencode_command_resolvable",
|
||
level: "info",
|
||
message: `Command is executable: ${command}`,
|
||
});
|
||
} catch (err) {
|
||
checks.push({
|
||
code: "opencode_command_unresolvable",
|
||
level: "error",
|
||
message: err instanceof Error ? err.message : "Command is not executable",
|
||
detail: command,
|
||
});
|
||
}
|
||
}
|
||
|
||
const canRunProbe =
|
||
checks.every((check) => check.code !== "opencode_cwd_invalid" && check.code !== "opencode_command_unresolvable");
|
||
|
||
let modelValidationPassed = false;
|
||
const configuredModel = asString(config.model, "").trim();
|
||
|
||
// Model discovery and validation use local child processes against
|
||
// OpenCode's `models` subcommand and JSON config; these are not yet
|
||
// wired through the execution target. When probing a remote env, skip
|
||
// discovery/validation and rely on the remote hello probe to surface
|
||
// model/auth issues directly.
|
||
if (targetIsRemote && configuredModel) {
|
||
checks.push({
|
||
code: "opencode_model_validation_skipped_remote",
|
||
level: "info",
|
||
message: `Skipped local model validation; will be validated by the hello probe inside ${targetLabel}.`,
|
||
});
|
||
modelValidationPassed = true;
|
||
} else if (canRunProbe && configuredModel) {
|
||
try {
|
||
const discovered = await discoverOpenCodeModels({ command, cwd, env: runtimeEnv });
|
||
if (discovered.length > 0) {
|
||
checks.push({
|
||
code: "opencode_models_discovered",
|
||
level: "info",
|
||
message: `Discovered ${discovered.length} model(s) from OpenCode providers.`,
|
||
});
|
||
} else {
|
||
checks.push({
|
||
code: "opencode_models_empty",
|
||
level: "error",
|
||
message: "OpenCode returned no models.",
|
||
hint: "Run `opencode models` and verify provider authentication.",
|
||
});
|
||
}
|
||
} catch (err) {
|
||
const errMsg = err instanceof Error ? err.message : String(err);
|
||
if (/ProviderModelNotFoundError/i.test(errMsg)) {
|
||
checks.push({
|
||
code: "opencode_hello_probe_model_unavailable",
|
||
level: "warn",
|
||
message: "The configured model was not found by the provider.",
|
||
detail: errMsg,
|
||
hint: "Run `opencode models` and choose an available provider/model ID.",
|
||
});
|
||
} else {
|
||
checks.push({
|
||
code: "opencode_models_discovery_failed",
|
||
level: "error",
|
||
message: errMsg || "OpenCode model discovery failed.",
|
||
hint: "Run `opencode models` manually to verify provider auth and config.",
|
||
});
|
||
}
|
||
}
|
||
} else if (!targetIsRemote && canRunProbe && !configuredModel) {
|
||
try {
|
||
const discovered = await discoverOpenCodeModels({ command, cwd, env: runtimeEnv });
|
||
if (discovered.length > 0) {
|
||
checks.push({
|
||
code: "opencode_models_discovered",
|
||
level: "info",
|
||
message: `Discovered ${discovered.length} model(s) from OpenCode providers.`,
|
||
});
|
||
}
|
||
} catch (err) {
|
||
const errMsg = err instanceof Error ? err.message : String(err);
|
||
if (/ProviderModelNotFoundError/i.test(errMsg)) {
|
||
checks.push({
|
||
code: "opencode_hello_probe_model_unavailable",
|
||
level: "warn",
|
||
message: "The configured model was not found by the provider.",
|
||
detail: errMsg,
|
||
hint: "Run `opencode models` and choose an available provider/model ID.",
|
||
});
|
||
} else {
|
||
checks.push({
|
||
code: "opencode_models_discovery_failed",
|
||
level: "warn",
|
||
message: errMsg || "OpenCode model discovery failed (best-effort, no model configured).",
|
||
hint: "Run `opencode models` manually to verify provider auth and config.",
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
const modelUnavailable = checks.some((check) => check.code === "opencode_hello_probe_model_unavailable");
|
||
if (!configuredModel && !modelUnavailable) {
|
||
// No model configured – skip model requirement if no model-related checks exist
|
||
} else if (!targetIsRemote && configuredModel && canRunProbe) {
|
||
try {
|
||
await ensureOpenCodeModelConfiguredAndAvailable({
|
||
model: configuredModel,
|
||
command,
|
||
cwd,
|
||
env: runtimeEnv,
|
||
});
|
||
checks.push({
|
||
code: "opencode_model_configured",
|
||
level: "info",
|
||
message: `Configured model: ${configuredModel}`,
|
||
});
|
||
modelValidationPassed = true;
|
||
} catch (err) {
|
||
checks.push({
|
||
code: "opencode_model_invalid",
|
||
level: "error",
|
||
message: err instanceof Error ? err.message : "Configured model is unavailable.",
|
||
hint: "Run `opencode models` and choose a currently available provider/model ID.",
|
||
});
|
||
}
|
||
}
|
||
|
||
if (canRunProbe && modelValidationPassed) {
|
||
const extraArgs = (() => {
|
||
const fromExtraArgs = asStringArray(config.extraArgs);
|
||
if (fromExtraArgs.length > 0) return fromExtraArgs;
|
||
return asStringArray(config.args);
|
||
})();
|
||
const variant = asString(config.variant, "").trim();
|
||
const probeModel = configuredModel;
|
||
|
||
const args = ["run", "--format", "json"];
|
||
args.push("--model", probeModel);
|
||
if (variant) args.push("--variant", variant);
|
||
if (extraArgs.length > 0) args.push(...extraArgs);
|
||
|
||
// Sandbox bridges still add cold-start and transport overhead, but the
|
||
// standard-2 Cloudflare tier now probes quickly enough that 90s keeps
|
||
// useful headroom without letting slow hangs linger.
|
||
const helloProbeTimeoutSec = Math.max(
|
||
1,
|
||
asNumber(config.helloProbeTimeoutSec, targetIsSandbox ? 90 : 60),
|
||
);
|
||
|
||
try {
|
||
const probe = await runAdapterExecutionTargetProcess(
|
||
runId,
|
||
runtimeTarget,
|
||
command,
|
||
args,
|
||
{
|
||
cwd: runtimeCwd,
|
||
env: runtimeEnv,
|
||
timeoutSec: helloProbeTimeoutSec,
|
||
graceSec: 5,
|
||
stdin: "Respond with hello.",
|
||
onLog: async () => {},
|
||
},
|
||
);
|
||
|
||
const parsed = parseOpenCodeJsonl(probe.stdout);
|
||
const detail = summarizeProbeDetail(probe.stdout, probe.stderr, parsed.errorMessage);
|
||
const authEvidence = `${parsed.errorMessage ?? ""}\n${probe.stdout}\n${probe.stderr}`.trim();
|
||
|
||
if (probe.timedOut) {
|
||
checks.push({
|
||
code: "opencode_hello_probe_timed_out",
|
||
level: "warn",
|
||
message: "OpenCode hello probe timed out.",
|
||
hint: "Retry the probe. If this persists, run OpenCode manually in this working directory.",
|
||
});
|
||
} else if ((probe.exitCode ?? 1) === 0 && !parsed.errorMessage) {
|
||
const summary = parsed.summary.trim();
|
||
const hasHello = /\bhello\b/i.test(summary);
|
||
checks.push({
|
||
code: hasHello ? "opencode_hello_probe_passed" : "opencode_hello_probe_unexpected_output",
|
||
level: hasHello ? "info" : "warn",
|
||
message: hasHello
|
||
? "OpenCode hello probe succeeded."
|
||
: "OpenCode probe ran but did not return `hello` as expected.",
|
||
...(summary ? { detail: summary.replace(/\s+/g, " ").trim().slice(0, 240) } : {}),
|
||
...(hasHello
|
||
? {}
|
||
: {
|
||
hint: "Run `opencode run --format json` manually and prompt `Respond with hello` to inspect output.",
|
||
}),
|
||
});
|
||
} else if (/ProviderModelNotFoundError/i.test(authEvidence)) {
|
||
checks.push({
|
||
code: "opencode_hello_probe_model_unavailable",
|
||
level: "warn",
|
||
message: "The configured model was not found by the provider.",
|
||
...(detail ? { detail } : {}),
|
||
hint: "Run `opencode models` and choose an available provider/model ID.",
|
||
});
|
||
} else if (OPENCODE_AUTH_REQUIRED_RE.test(authEvidence)) {
|
||
checks.push({
|
||
code: "opencode_hello_probe_auth_required",
|
||
level: "warn",
|
||
message: "OpenCode is installed, but provider authentication is not ready.",
|
||
...(detail ? { detail } : {}),
|
||
hint: "Run `opencode auth login` or set provider credentials, then retry the probe.",
|
||
});
|
||
} else {
|
||
checks.push({
|
||
code: "opencode_hello_probe_failed",
|
||
level: "error",
|
||
message: "OpenCode hello probe failed.",
|
||
...(detail ? { detail } : {}),
|
||
hint: "Run `opencode run --format json` manually in this working directory to debug.",
|
||
});
|
||
}
|
||
} catch (err) {
|
||
checks.push({
|
||
code: "opencode_hello_probe_failed",
|
||
level: "error",
|
||
message: "OpenCode hello probe failed.",
|
||
detail: err instanceof Error ? err.message : String(err),
|
||
hint: "Run `opencode run --format json` manually in this working directory to debug.",
|
||
});
|
||
}
|
||
}
|
||
} finally {
|
||
await restoreWorkspace?.();
|
||
if (!restoreWorkspace && preparedRuntimeWorkspaceLocalDir) {
|
||
// Reached when `prepareAdapterExecutionTargetRuntime` threw before
|
||
// assigning `restoreWorkspace`: clean up the temp dir directly.
|
||
await fs.rm(preparedRuntimeWorkspaceLocalDir, { recursive: true, force: true }).catch(() => {});
|
||
}
|
||
await preparedRuntimeConfig.cleanup();
|
||
}
|
||
|
||
return {
|
||
adapterType: ctx.adapterType,
|
||
status: summarizeStatus(checks),
|
||
checks,
|
||
testedAt: new Date().toISOString(),
|
||
};
|
||
}
|