Files
paperclip/packages/adapters/grok-local/src/server/test.test.ts
T
Devin Foley ab8b471685 Add built-in grok_local adapter (#6087)
## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies, so
adapter quality directly affects what runtimes the control plane can
supervise.
> - Local CLI adapters are one of the core execution surfaces because
they turn real coding tools into Paperclip-managed employees with
heartbeats, transcripts, and reviewability.
> - Grok Build was installed on the Paperclip host, but Paperclip had no
built-in `grok_local` adapter, so the runtime could not be configured
through the normal server/UI/CLI adapter path.
> - That gap needed to be closed with the same built-in registry,
environment diagnostics, transcript parsing, and skill/instructions
behavior that the other local adapters already rely on.
> - After the initial adapter landed, a real follow-up run showed that
Grok streaming text was being rendered one fragment per line, which made
transcripts harder to read even though the runtime itself was working.
> - This pull request adds the built-in `grok_local` adapter end-to-end
and then fixes the transcript parser so streamed Grok output is
coalesced into readable assistant/thinking blocks.
> - The benefit is that Grok Build becomes a first-class Paperclip
runtime with a usable operator experience instead of a partially wired
runtime with noisy transcript output.

## What Changed

- Added a new built-in `@paperclipai/adapter-grok-local` package with
server, UI, and CLI entrypoints.
- Implemented Grok execution, session handling, environment diagnostics,
config building, skill syncing, and parser coverage inside the new
adapter package.
- Registered `grok_local` across the built-in adapter inventories and
capability/display metadata in server, UI, CLI, and shared constants.
- Added adapter route coverage for the new built-in type.
- Fixed Grok transcript readability by emitting streamed `text` and
`thought` fragments as deltas so the shared transcript builder coalesces
them into readable message blocks.
- Added regression tests for the Grok parser and transcript coalescing
behavior.

## Verification

- `pnpm vitest run
packages/adapters/grok-local/src/ui/parse-stdout.test.ts
ui/src/adapters/transcript.test.ts`
- `pnpm --filter @paperclipai/adapter-grok-local build`
- Manual runtime verification on the Paperclip host during
implementation and follow-up review:
  - confirmed the Grok CLI was installed and authenticated
- confirmed the worktree dev server could be restarted cleanly and
health-checked after the parser follow-up
- No screenshots attached. This change is primarily adapter plumbing
plus transcript formatting behavior; reviewers can verify via the
Grok-backed run surfaces directly.

## Risks

- This adds a new built-in adapter, so any missed registration surface
could create inconsistencies between server, UI, and CLI behavior.
- The adapter depends on Grok Build's current event/output shape; if
upstream Grok streaming JSON changes, transcript parsing or session
extraction may need follow-up updates.
- The transcript readability fix intentionally changes how Grok
fragments are grouped, so any downstream code that implicitly expected
one entry per fragment would behave differently.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex via Paperclip `codex_local` agent runtime.
- GPT-5-class coding model with tool use, shell execution, file editing,
and repo inspection enabled.
- Exact backend model ID/context window were not surfaced to the agent
in this Paperclip session.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [ ] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-05-16 09:51:09 -07:00

143 lines
3.9 KiB
TypeScript

import { describe, expect, it, vi, beforeEach } from "vitest";
const ensureDirectoryMock = vi.hoisted(() => vi.fn(async () => {}));
const ensureCommandMock = vi.hoisted(() => vi.fn(async () => {}));
const runProcessMock = vi.hoisted(() => vi.fn());
vi.mock("@paperclipai/adapter-utils/execution-target", () => ({
describeAdapterExecutionTarget: () => "local",
ensureAdapterExecutionTargetCommandResolvable: ensureCommandMock,
ensureAdapterExecutionTargetDirectory: ensureDirectoryMock,
resolveAdapterExecutionTargetCwd: (_target: unknown, configuredCwd: string, fallbackCwd: string) =>
configuredCwd || fallbackCwd,
runAdapterExecutionTargetProcess: runProcessMock,
}));
import { parseGrokModelsOutput, testEnvironment } from "./test.js";
describe("parseGrokModelsOutput", () => {
it("extracts auth state and models from `grok models` output", () => {
expect(parseGrokModelsOutput([
"You are logged in with grok.com.",
"",
"Default model: grok-build",
"",
"Available models:",
" * grok-build (default)",
" * grok-code",
].join("\n"))).toEqual({
authenticated: true,
defaultModel: "grok-build",
models: ["grok-build", "grok-code"],
});
});
});
describe("grok_local testEnvironment", () => {
beforeEach(() => {
ensureDirectoryMock.mockClear();
ensureCommandMock.mockClear();
runProcessMock.mockReset();
});
it("reports a healthy authenticated host with a working hello probe", async () => {
runProcessMock
.mockResolvedValueOnce({
exitCode: 0,
signal: null,
timedOut: false,
stdout: [
"You are logged in with grok.com.",
"",
"Default model: grok-build",
"",
"Available models:",
" * grok-build (default)",
].join("\n"),
stderr: "",
})
.mockResolvedValueOnce({
exitCode: 0,
signal: null,
timedOut: false,
stdout: [
JSON.stringify({ type: "text", data: "hello" }),
JSON.stringify({ type: "end", stopReason: "EndTurn", sessionId: "sess-1", requestId: "req-1" }),
].join("\n"),
stderr: "",
});
const result = await testEnvironment({
companyId: "company-1",
adapterType: "grok_local",
config: {
command: "grok",
cwd: "/tmp/project",
model: "grok-build",
},
});
expect(result.status).toBe("pass");
expect(result.checks.map((check: { code: string }) => check.code)).toEqual(
expect.arrayContaining([
"grok_command_resolvable",
"grok_models_probe_passed",
"grok_model_configured",
"grok_hello_probe_passed",
]),
);
expect(runProcessMock).toHaveBeenNthCalledWith(
2,
expect.any(String),
null,
"grok",
expect.arrayContaining([
"--output-format",
"streaming-json",
"--always-approve",
"--permission-mode",
"dontAsk",
"--disable-web-search",
"--single",
"Respond with exactly hello.",
]),
expect.any(Object),
);
});
it("downgrades auth failures to warnings", async () => {
runProcessMock
.mockResolvedValueOnce({
exitCode: 1,
signal: null,
timedOut: false,
stdout: "",
stderr: "Not logged in. Run `grok login`.",
})
.mockResolvedValueOnce({
exitCode: 1,
signal: null,
timedOut: false,
stdout: "",
stderr: "Not logged in. Run `grok login`.",
});
const result = await testEnvironment({
companyId: "company-1",
adapterType: "grok_local",
config: {
command: "grok",
cwd: "/tmp/project",
},
});
expect(result.status).toBe("warn");
expect(result.checks.map((check: { code: string }) => check.code)).toEqual(
expect.arrayContaining([
"grok_auth_required",
"grok_hello_probe_auth_required",
]),
);
});
});