Files
paperclip/packages/adapters/grok-local/src/server/execute.test.ts
T
Devin Foley ab8b471685 Add built-in grok_local adapter (#6087)
## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies, so
adapter quality directly affects what runtimes the control plane can
supervise.
> - Local CLI adapters are one of the core execution surfaces because
they turn real coding tools into Paperclip-managed employees with
heartbeats, transcripts, and reviewability.
> - Grok Build was installed on the Paperclip host, but Paperclip had no
built-in `grok_local` adapter, so the runtime could not be configured
through the normal server/UI/CLI adapter path.
> - That gap needed to be closed with the same built-in registry,
environment diagnostics, transcript parsing, and skill/instructions
behavior that the other local adapters already rely on.
> - After the initial adapter landed, a real follow-up run showed that
Grok streaming text was being rendered one fragment per line, which made
transcripts harder to read even though the runtime itself was working.
> - This pull request adds the built-in `grok_local` adapter end-to-end
and then fixes the transcript parser so streamed Grok output is
coalesced into readable assistant/thinking blocks.
> - The benefit is that Grok Build becomes a first-class Paperclip
runtime with a usable operator experience instead of a partially wired
runtime with noisy transcript output.

## What Changed

- Added a new built-in `@paperclipai/adapter-grok-local` package with
server, UI, and CLI entrypoints.
- Implemented Grok execution, session handling, environment diagnostics,
config building, skill syncing, and parser coverage inside the new
adapter package.
- Registered `grok_local` across the built-in adapter inventories and
capability/display metadata in server, UI, CLI, and shared constants.
- Added adapter route coverage for the new built-in type.
- Fixed Grok transcript readability by emitting streamed `text` and
`thought` fragments as deltas so the shared transcript builder coalesces
them into readable message blocks.
- Added regression tests for the Grok parser and transcript coalescing
behavior.

## Verification

- `pnpm vitest run
packages/adapters/grok-local/src/ui/parse-stdout.test.ts
ui/src/adapters/transcript.test.ts`
- `pnpm --filter @paperclipai/adapter-grok-local build`
- Manual runtime verification on the Paperclip host during
implementation and follow-up review:
  - confirmed the Grok CLI was installed and authenticated
- confirmed the worktree dev server could be restarted cleanly and
health-checked after the parser follow-up
- No screenshots attached. This change is primarily adapter plumbing
plus transcript formatting behavior; reviewers can verify via the
Grok-backed run surfaces directly.

## Risks

- This adds a new built-in adapter, so any missed registration surface
could create inconsistencies between server, UI, and CLI behavior.
- The adapter depends on Grok Build's current event/output shape; if
upstream Grok streaming JSON changes, transcript parsing or session
extraction may need follow-up updates.
- The transcript readability fix intentionally changes how Grok
fragments are grouped, so any downstream code that implicitly expected
one entry per fragment would behave differently.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex via Paperclip `codex_local` agent runtime.
- GPT-5-class coding model with tool use, shell execution, file editing,
and repo inspection enabled.
- Exact backend model ID/context window were not surfaced to the agent
in this Paperclip session.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [ ] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-05-16 09:51:09 -07:00

188 lines
6.9 KiB
TypeScript

import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { AdapterExecutionContext } from "@paperclipai/adapter-utils";
const ensureRuntimeInstalledMock = vi.hoisted(() => vi.fn(async () => {}));
const ensureCommandMock = vi.hoisted(() => vi.fn(async () => {}));
const prepareRuntimeMock = vi.hoisted(() => vi.fn(async () => ({
workspaceRemoteDir: null,
restoreWorkspace: async () => {},
})));
const resolveCommandForLogsMock = vi.hoisted(() => vi.fn(async () => "grok"));
const runProcessMock = vi.hoisted(() => vi.fn());
vi.mock("@paperclipai/adapter-utils/execution-target", () => ({
adapterExecutionTargetIsRemote: () => false,
adapterExecutionTargetRemoteCwd: (_target: unknown, cwd: string) => cwd,
overrideAdapterExecutionTargetRemoteCwd: (target: unknown, _cwd: string) => target,
adapterExecutionTargetSessionIdentity: () => ({ kind: "local" }),
adapterExecutionTargetSessionMatches: () => true,
describeAdapterExecutionTarget: () => "local",
ensureAdapterExecutionTargetCommandResolvable: ensureCommandMock,
ensureAdapterExecutionTargetRuntimeCommandInstalled: ensureRuntimeInstalledMock,
prepareAdapterExecutionTargetRuntime: prepareRuntimeMock,
readAdapterExecutionTarget: ({ executionTarget }: { executionTarget?: unknown }) => executionTarget ?? { kind: "local" },
resolveAdapterExecutionTargetCommandForLogs: resolveCommandForLogsMock,
resolveAdapterExecutionTargetTimeoutSec: (_target: unknown, timeoutSec: number) => timeoutSec,
runAdapterExecutionTargetProcess: runProcessMock,
}));
import { execute } from "./execute.js";
const tempRoots: string[] = [];
async function makeTempRoot() {
const root = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-grok-local-"));
tempRoots.push(root);
return root;
}
async function pathExists(candidate: string): Promise<boolean> {
return fs.access(candidate).then(() => true).catch(() => false);
}
describe("grok_local execute", () => {
beforeEach(() => {
ensureRuntimeInstalledMock.mockClear();
ensureCommandMock.mockClear();
prepareRuntimeMock.mockClear();
resolveCommandForLogsMock.mockClear();
runProcessMock.mockReset();
});
afterEach(async () => {
await Promise.all(tempRoots.splice(0).map((root) => fs.rm(root, { recursive: true, force: true })));
});
it("stages Grok-native instructions and skills into the workspace for the run and cleans them up afterward", async () => {
const root = await makeTempRoot();
const instructionsPath = path.join(root, "managed", "AGENTS.md");
const skillSource = path.join(root, "runtime-skills", "paperclip");
await fs.mkdir(path.dirname(instructionsPath), { recursive: true });
await fs.writeFile(instructionsPath, "You are Grok.\n", "utf8");
await fs.mkdir(skillSource, { recursive: true });
await fs.writeFile(path.join(skillSource, "SKILL.md"), "---\nname: paperclip\ndescription: test\n---\n", "utf8");
runProcessMock.mockImplementation(async (_runId, _target, _command, args, options) => {
expect(args).toEqual(
expect.arrayContaining([
"--output-format",
"streaming-json",
"--always-approve",
"--permission-mode",
"dontAsk",
]),
);
expect(await fs.readFile(path.join(root, "Agents.md"), "utf8")).toContain("You are Grok.");
expect(await pathExists(path.join(root, ".claude", "skills", "paperclip", "SKILL.md"))).toBe(true);
await options.onLog?.("stdout", '{"type":"text","data":"done"}\n');
return {
exitCode: 0,
signal: null,
timedOut: false,
stdout: [
JSON.stringify({ type: "text", data: "done" }),
JSON.stringify({ type: "end", stopReason: "EndTurn", sessionId: "sess-1", requestId: "req-1" }),
].join("\n"),
stderr: "",
};
});
const logs: Array<{ stream: "stdout" | "stderr"; chunk: string }> = [];
const ctx: AdapterExecutionContext = {
runId: "run-1",
agent: {
id: "agent-1",
companyId: "company-1",
name: "Grok Agent",
adapterType: "grok_local",
adapterConfig: {},
},
runtime: {
sessionId: null,
sessionParams: null,
sessionDisplayId: null,
taskKey: null,
},
config: {
cwd: root,
instructionsFilePath: instructionsPath,
paperclipRuntimeSkills: [{
key: "paperclip",
runtimeName: "paperclip",
source: skillSource,
required: false,
}],
paperclipSkillSync: { desiredSkills: ["paperclip"] },
},
context: {},
authToken: "run-token",
onLog: async (stream: "stdout" | "stderr", chunk: string) => {
logs.push({ stream, chunk });
},
};
const result = await execute(ctx);
expect(result).toMatchObject({
exitCode: 0,
errorMessage: null,
summary: "done",
sessionId: "sess-1",
sessionDisplayId: "sess-1",
});
expect(await pathExists(path.join(root, "Agents.md"))).toBe(false);
expect(await pathExists(path.join(root, ".claude", "skills", "paperclip"))).toBe(false);
expect(logs.map((entry) => entry.chunk)).not.toEqual([]);
});
it("cleans up staged assets when setup fails before the Grok process starts", async () => {
const root = await makeTempRoot();
const instructionsPath = path.join(root, "managed", "AGENTS.md");
const skillSource = path.join(root, "runtime-skills", "paperclip");
await fs.mkdir(path.dirname(instructionsPath), { recursive: true });
await fs.writeFile(instructionsPath, "You are Grok.\n", "utf8");
await fs.mkdir(skillSource, { recursive: true });
await fs.writeFile(path.join(skillSource, "SKILL.md"), "---\nname: paperclip\ndescription: test\n---\n", "utf8");
ensureCommandMock.mockRejectedValueOnce(new Error("grok not installed"));
const ctx: AdapterExecutionContext = {
runId: "run-setup-fail",
agent: {
id: "agent-1",
companyId: "company-1",
name: "Grok Agent",
adapterType: "grok_local",
adapterConfig: {},
},
runtime: {
sessionId: null,
sessionParams: null,
sessionDisplayId: null,
taskKey: null,
},
config: {
cwd: root,
instructionsFilePath: instructionsPath,
paperclipRuntimeSkills: [{
key: "paperclip",
runtimeName: "paperclip",
source: skillSource,
required: false,
}],
paperclipSkillSync: { desiredSkills: ["paperclip"] },
},
context: {},
authToken: "run-token",
onLog: async () => {},
};
await expect(execute(ctx)).rejects.toThrow("grok not installed");
expect(runProcessMock).not.toHaveBeenCalled();
expect(await pathExists(path.join(root, "Agents.md"))).toBe(false);
expect(await pathExists(path.join(root, ".claude", "skills", "paperclip"))).toBe(false);
});
});