Files
paperclip/packages/adapters/grok-local/src/server/test.ts
T
Devin Foley ab8b471685 Add built-in grok_local adapter (#6087)
## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies, so
adapter quality directly affects what runtimes the control plane can
supervise.
> - Local CLI adapters are one of the core execution surfaces because
they turn real coding tools into Paperclip-managed employees with
heartbeats, transcripts, and reviewability.
> - Grok Build was installed on the Paperclip host, but Paperclip had no
built-in `grok_local` adapter, so the runtime could not be configured
through the normal server/UI/CLI adapter path.
> - That gap needed to be closed with the same built-in registry,
environment diagnostics, transcript parsing, and skill/instructions
behavior that the other local adapters already rely on.
> - After the initial adapter landed, a real follow-up run showed that
Grok streaming text was being rendered one fragment per line, which made
transcripts harder to read even though the runtime itself was working.
> - This pull request adds the built-in `grok_local` adapter end-to-end
and then fixes the transcript parser so streamed Grok output is
coalesced into readable assistant/thinking blocks.
> - The benefit is that Grok Build becomes a first-class Paperclip
runtime with a usable operator experience instead of a partially wired
runtime with noisy transcript output.

## What Changed

- Added a new built-in `@paperclipai/adapter-grok-local` package with
server, UI, and CLI entrypoints.
- Implemented Grok execution, session handling, environment diagnostics,
config building, skill syncing, and parser coverage inside the new
adapter package.
- Registered `grok_local` across the built-in adapter inventories and
capability/display metadata in server, UI, CLI, and shared constants.
- Added adapter route coverage for the new built-in type.
- Fixed Grok transcript readability by emitting streamed `text` and
`thought` fragments as deltas so the shared transcript builder coalesces
them into readable message blocks.
- Added regression tests for the Grok parser and transcript coalescing
behavior.

## Verification

- `pnpm vitest run
packages/adapters/grok-local/src/ui/parse-stdout.test.ts
ui/src/adapters/transcript.test.ts`
- `pnpm --filter @paperclipai/adapter-grok-local build`
- Manual runtime verification on the Paperclip host during
implementation and follow-up review:
  - confirmed the Grok CLI was installed and authenticated
- confirmed the worktree dev server could be restarted cleanly and
health-checked after the parser follow-up
- No screenshots attached. This change is primarily adapter plumbing
plus transcript formatting behavior; reviewers can verify via the
Grok-backed run surfaces directly.

## Risks

- This adds a new built-in adapter, so any missed registration surface
could create inconsistencies between server, UI, and CLI behavior.
- The adapter depends on Grok Build's current event/output shape; if
upstream Grok streaming JSON changes, transcript parsing or session
extraction may need follow-up updates.
- The transcript readability fix intentionally changes how Grok
fragments are grouped, so any downstream code that implicitly expected
one entry per fragment would behave differently.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex via Paperclip `codex_local` agent runtime.
- GPT-5-class coding model with tool use, shell execution, file editing,
and repo inspection enabled.
- Exact backend model ID/context window were not surfaced to the agent
in this Paperclip session.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [ ] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-05-16 09:51:09 -07:00

314 lines
10 KiB
TypeScript

import type {
AdapterEnvironmentCheck,
AdapterEnvironmentTestContext,
AdapterEnvironmentTestResult,
} from "@paperclipai/adapter-utils";
import {
asNumber,
asString,
asStringArray,
ensurePathInEnv,
parseObject,
} from "@paperclipai/adapter-utils/server-utils";
import {
describeAdapterExecutionTarget,
ensureAdapterExecutionTargetCommandResolvable,
ensureAdapterExecutionTargetDirectory,
resolveAdapterExecutionTargetCwd,
runAdapterExecutionTargetProcess,
} from "@paperclipai/adapter-utils/execution-target";
import { DEFAULT_GROK_LOCAL_MODEL } from "../index.js";
import { parseGrokJsonl } from "./parse.js";
export interface GrokModelsProbe {
authenticated: boolean;
defaultModel: string | null;
models: string[];
}
function summarizeStatus(checks: AdapterEnvironmentCheck[]): AdapterEnvironmentTestResult["status"] {
if (checks.some((check) => check.level === "error")) return "fail";
if (checks.some((check) => check.level === "warn")) return "warn";
return "pass";
}
function firstNonEmptyLine(text: string): string {
return (
text
.split(/\r?\n/)
.map((line) => line.trim())
.find(Boolean) ?? ""
);
}
function summarizeProbeDetail(stdout: string, stderr: string, parsedError: string | null): string | null {
const raw = parsedError?.trim() || firstNonEmptyLine(stderr) || firstNonEmptyLine(stdout);
if (!raw) return null;
const clean = raw.replace(/\s+/g, " ").trim();
const max = 240;
return clean.length > max ? `${clean.slice(0, max - 3)}...` : clean;
}
function normalizeEnv(input: unknown): Record<string, string> {
if (typeof input !== "object" || input === null || Array.isArray(input)) return {};
const env: Record<string, string> = {};
for (const [key, value] of Object.entries(input as Record<string, unknown>)) {
if (typeof value === "string") env[key] = value;
}
return env;
}
const GROK_AUTH_REQUIRED_RE =
/(?:not\s+logged\s+in|login\s+required|run\s+`?grok\s+login`?|authentication\s+required|unauthorized|invalid\s+credentials)/i;
export function parseGrokModelsOutput(stdout: string): GrokModelsProbe {
const trimmedLines = stdout
.split(/\r?\n/)
.map((line) => line.trim())
.filter(Boolean);
const models: string[] = [];
let defaultModel: string | null = null;
let authenticated = false;
let inModelsBlock = false;
for (const line of trimmedLines) {
if (/logged in/i.test(line)) authenticated = true;
const defaultMatch = /^Default model:\s*(.+)$/i.exec(line);
if (defaultMatch?.[1]) {
defaultModel = defaultMatch[1].trim();
continue;
}
if (/^Available models:/i.test(line)) {
inModelsBlock = true;
continue;
}
if (!inModelsBlock) continue;
const bulletMatch = /^[*-]\s*(.+?)(?:\s+\(default\))?$/.exec(line);
if (bulletMatch?.[1]) {
models.push(bulletMatch[1].trim());
continue;
}
if (line.length > 0) {
models.push(line.replace(/\s+\(default\)$/, "").trim());
}
}
return {
authenticated,
defaultModel,
models: Array.from(new Set(models.filter(Boolean))),
};
}
export async function testEnvironment(
ctx: AdapterEnvironmentTestContext,
): Promise<AdapterEnvironmentTestResult> {
const checks: AdapterEnvironmentCheck[] = [];
const config = parseObject(ctx.config);
const command = asString(config.command, "grok");
const target = ctx.executionTarget ?? null;
const targetIsRemote = target?.kind === "remote";
const cwd = resolveAdapterExecutionTargetCwd(target, asString(config.cwd, ""), process.cwd());
const targetLabel = targetIsRemote
? ctx.environmentName ?? describeAdapterExecutionTarget(target)
: null;
const runId = `grok-envtest-${Date.now()}-${Math.random().toString(16).slice(2)}`;
if (targetLabel) {
checks.push({
code: "grok_environment_target",
level: "info",
message: `Probing inside environment: ${targetLabel}`,
});
}
try {
await ensureAdapterExecutionTargetDirectory(runId, target, cwd, {
cwd,
env: {},
createIfMissing: true,
});
checks.push({
code: "grok_cwd_valid",
level: "info",
message: `Working directory is valid: ${cwd}`,
});
} catch (err) {
checks.push({
code: "grok_cwd_invalid",
level: "error",
message: err instanceof Error ? err.message : "Invalid working directory",
detail: cwd,
});
}
const env = normalizeEnv(config.env);
const runtimeEnv = ensurePathInEnv({ ...process.env, ...env });
try {
await ensureAdapterExecutionTargetCommandResolvable(command, target, cwd, runtimeEnv);
checks.push({
code: "grok_command_resolvable",
level: "info",
message: `Command is executable: ${command}`,
});
} catch (err) {
checks.push({
code: "grok_command_unresolvable",
level: "error",
message: err instanceof Error ? err.message : "Command is not executable",
detail: command,
});
}
const canRunProbe =
checks.every((check) => check.code !== "grok_cwd_invalid" && check.code !== "grok_command_unresolvable");
const configuredModel = asString(config.model, DEFAULT_GROK_LOCAL_MODEL).trim();
if (canRunProbe) {
const modelsProbe = await runAdapterExecutionTargetProcess(
runId,
target,
command,
["models"],
{
cwd,
env,
timeoutSec: Math.max(1, asNumber(config.helloProbeTimeoutSec, 45)),
graceSec: 5,
onLog: async () => {},
},
);
const probeOutput = `${modelsProbe.stdout}\n${modelsProbe.stderr}`;
const parsedModels = parseGrokModelsOutput(modelsProbe.stdout);
const authRequired = GROK_AUTH_REQUIRED_RE.test(probeOutput);
if (modelsProbe.timedOut) {
checks.push({
code: "grok_models_probe_timed_out",
level: "warn",
message: "`grok models` timed out.",
hint: "Retry the probe. If this persists, run `grok models` manually from the target environment.",
});
} else if ((modelsProbe.exitCode ?? 1) !== 0) {
checks.push({
code: authRequired ? "grok_auth_required" : "grok_models_probe_failed",
level: authRequired ? "warn" : "error",
message: authRequired
? "Grok CLI is not authenticated."
: "`grok models` failed.",
detail: summarizeProbeDetail(modelsProbe.stdout, modelsProbe.stderr, null),
hint: authRequired ? "Run `grok login` on the target host, then retry." : undefined,
});
} else {
checks.push({
code: "grok_models_probe_passed",
level: "info",
message: parsedModels.authenticated
? "Grok CLI authentication is configured."
: "`grok models` completed.",
detail: parsedModels.defaultModel ? `Default model: ${parsedModels.defaultModel}` : undefined,
});
if (parsedModels.models.length > 0) {
checks.push({
code: "grok_models_discovered",
level: "info",
message: `Discovered ${parsedModels.models.length} Grok model(s).`,
});
} else {
checks.push({
code: "grok_models_empty",
level: "warn",
message: "Grok returned no available models.",
hint: "Run `grok models` manually and verify the account has access to a model.",
});
}
if (configuredModel) {
checks.push({
code: parsedModels.models.includes(configuredModel) ? "grok_model_configured" : "grok_model_not_found",
level: parsedModels.models.includes(configuredModel) ? "info" : "warn",
message: parsedModels.models.includes(configuredModel)
? `Configured model: ${configuredModel}`
: `Configured model "${configuredModel}" not found in available models.`,
hint: parsedModels.models.includes(configuredModel)
? undefined
: "Run `grok models` and choose an available model id.",
});
}
}
}
if (canRunProbe) {
const probeArgs = [
"--output-format",
"streaming-json",
"--always-approve",
"--permission-mode",
"dontAsk",
"--disable-web-search",
];
if (configuredModel && configuredModel !== DEFAULT_GROK_LOCAL_MODEL) {
probeArgs.push("--model", configuredModel);
}
probeArgs.push("--single", "Respond with exactly hello.");
const helloProbe = await runAdapterExecutionTargetProcess(
runId,
target,
command,
probeArgs,
{
cwd,
env,
timeoutSec: Math.max(1, asNumber(config.helloProbeTimeoutSec, 45)),
graceSec: 5,
onLog: async () => {},
},
);
const parsed = parseGrokJsonl(helloProbe.stdout);
const detail = summarizeProbeDetail(helloProbe.stdout, helloProbe.stderr, parsed.errorMessage);
const authRequired = GROK_AUTH_REQUIRED_RE.test(`${helloProbe.stdout}\n${helloProbe.stderr}`);
if (helloProbe.timedOut) {
checks.push({
code: "grok_hello_probe_timed_out",
level: "warn",
message: "Grok hello probe timed out.",
hint: "Retry the probe. If this persists, verify Grok can run a simple `--single` prompt manually.",
});
} else if ((helloProbe.exitCode ?? 1) !== 0) {
checks.push({
code: authRequired ? "grok_hello_probe_auth_required" : "grok_hello_probe_failed",
level: authRequired ? "warn" : "error",
message: authRequired
? "Grok CLI could not answer the hello probe because authentication is missing."
: "Grok hello probe failed.",
...(detail ? { detail } : {}),
hint: authRequired ? "Run `grok login` on the target host, then retry." : undefined,
});
} else if (/\bhello\b/i.test(parsed.summary)) {
checks.push({
code: "grok_hello_probe_passed",
level: "info",
message: "Grok hello probe succeeded.",
});
} else {
checks.push({
code: "grok_hello_probe_unexpected_output",
level: "warn",
message: "Grok hello probe succeeded but returned unexpected output.",
...(detail ? { detail } : {}),
});
}
}
return {
adapterType: "grok_local",
status: summarizeStatus(checks),
checks,
testedAt: new Date().toISOString(),
};
}