forked from farhoodlabs/paperclip
ab8b471685
## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies, so adapter quality directly affects what runtimes the control plane can supervise. > - Local CLI adapters are one of the core execution surfaces because they turn real coding tools into Paperclip-managed employees with heartbeats, transcripts, and reviewability. > - Grok Build was installed on the Paperclip host, but Paperclip had no built-in `grok_local` adapter, so the runtime could not be configured through the normal server/UI/CLI adapter path. > - That gap needed to be closed with the same built-in registry, environment diagnostics, transcript parsing, and skill/instructions behavior that the other local adapters already rely on. > - After the initial adapter landed, a real follow-up run showed that Grok streaming text was being rendered one fragment per line, which made transcripts harder to read even though the runtime itself was working. > - This pull request adds the built-in `grok_local` adapter end-to-end and then fixes the transcript parser so streamed Grok output is coalesced into readable assistant/thinking blocks. > - The benefit is that Grok Build becomes a first-class Paperclip runtime with a usable operator experience instead of a partially wired runtime with noisy transcript output. ## What Changed - Added a new built-in `@paperclipai/adapter-grok-local` package with server, UI, and CLI entrypoints. - Implemented Grok execution, session handling, environment diagnostics, config building, skill syncing, and parser coverage inside the new adapter package. - Registered `grok_local` across the built-in adapter inventories and capability/display metadata in server, UI, CLI, and shared constants. - Added adapter route coverage for the new built-in type. - Fixed Grok transcript readability by emitting streamed `text` and `thought` fragments as deltas so the shared transcript builder coalesces them into readable message blocks. - Added regression tests for the Grok parser and transcript coalescing behavior. ## Verification - `pnpm vitest run packages/adapters/grok-local/src/ui/parse-stdout.test.ts ui/src/adapters/transcript.test.ts` - `pnpm --filter @paperclipai/adapter-grok-local build` - Manual runtime verification on the Paperclip host during implementation and follow-up review: - confirmed the Grok CLI was installed and authenticated - confirmed the worktree dev server could be restarted cleanly and health-checked after the parser follow-up - No screenshots attached. This change is primarily adapter plumbing plus transcript formatting behavior; reviewers can verify via the Grok-backed run surfaces directly. ## Risks - This adds a new built-in adapter, so any missed registration surface could create inconsistencies between server, UI, and CLI behavior. - The adapter depends on Grok Build's current event/output shape; if upstream Grok streaming JSON changes, transcript parsing or session extraction may need follow-up updates. - The transcript readability fix intentionally changes how Grok fragments are grouped, so any downstream code that implicitly expected one entry per fragment would behave differently. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex via Paperclip `codex_local` agent runtime. - GPT-5-class coding model with tool use, shell execution, file editing, and repo inspection enabled. - Exact backend model ID/context window were not surfaced to the agent in this Paperclip session. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [ ] If this change affects the UI, I have included before/after screenshots - [ ] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
188 lines
6.9 KiB
TypeScript
188 lines
6.9 KiB
TypeScript
import fs from "node:fs/promises";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
|
import type { AdapterExecutionContext } from "@paperclipai/adapter-utils";
|
|
|
|
const ensureRuntimeInstalledMock = vi.hoisted(() => vi.fn(async () => {}));
|
|
const ensureCommandMock = vi.hoisted(() => vi.fn(async () => {}));
|
|
const prepareRuntimeMock = vi.hoisted(() => vi.fn(async () => ({
|
|
workspaceRemoteDir: null,
|
|
restoreWorkspace: async () => {},
|
|
})));
|
|
const resolveCommandForLogsMock = vi.hoisted(() => vi.fn(async () => "grok"));
|
|
const runProcessMock = vi.hoisted(() => vi.fn());
|
|
|
|
vi.mock("@paperclipai/adapter-utils/execution-target", () => ({
|
|
adapterExecutionTargetIsRemote: () => false,
|
|
adapterExecutionTargetRemoteCwd: (_target: unknown, cwd: string) => cwd,
|
|
overrideAdapterExecutionTargetRemoteCwd: (target: unknown, _cwd: string) => target,
|
|
adapterExecutionTargetSessionIdentity: () => ({ kind: "local" }),
|
|
adapterExecutionTargetSessionMatches: () => true,
|
|
describeAdapterExecutionTarget: () => "local",
|
|
ensureAdapterExecutionTargetCommandResolvable: ensureCommandMock,
|
|
ensureAdapterExecutionTargetRuntimeCommandInstalled: ensureRuntimeInstalledMock,
|
|
prepareAdapterExecutionTargetRuntime: prepareRuntimeMock,
|
|
readAdapterExecutionTarget: ({ executionTarget }: { executionTarget?: unknown }) => executionTarget ?? { kind: "local" },
|
|
resolveAdapterExecutionTargetCommandForLogs: resolveCommandForLogsMock,
|
|
resolveAdapterExecutionTargetTimeoutSec: (_target: unknown, timeoutSec: number) => timeoutSec,
|
|
runAdapterExecutionTargetProcess: runProcessMock,
|
|
}));
|
|
|
|
import { execute } from "./execute.js";
|
|
|
|
const tempRoots: string[] = [];
|
|
|
|
async function makeTempRoot() {
|
|
const root = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-grok-local-"));
|
|
tempRoots.push(root);
|
|
return root;
|
|
}
|
|
|
|
async function pathExists(candidate: string): Promise<boolean> {
|
|
return fs.access(candidate).then(() => true).catch(() => false);
|
|
}
|
|
|
|
describe("grok_local execute", () => {
|
|
beforeEach(() => {
|
|
ensureRuntimeInstalledMock.mockClear();
|
|
ensureCommandMock.mockClear();
|
|
prepareRuntimeMock.mockClear();
|
|
resolveCommandForLogsMock.mockClear();
|
|
runProcessMock.mockReset();
|
|
});
|
|
|
|
afterEach(async () => {
|
|
await Promise.all(tempRoots.splice(0).map((root) => fs.rm(root, { recursive: true, force: true })));
|
|
});
|
|
|
|
it("stages Grok-native instructions and skills into the workspace for the run and cleans them up afterward", async () => {
|
|
const root = await makeTempRoot();
|
|
const instructionsPath = path.join(root, "managed", "AGENTS.md");
|
|
const skillSource = path.join(root, "runtime-skills", "paperclip");
|
|
await fs.mkdir(path.dirname(instructionsPath), { recursive: true });
|
|
await fs.writeFile(instructionsPath, "You are Grok.\n", "utf8");
|
|
await fs.mkdir(skillSource, { recursive: true });
|
|
await fs.writeFile(path.join(skillSource, "SKILL.md"), "---\nname: paperclip\ndescription: test\n---\n", "utf8");
|
|
|
|
runProcessMock.mockImplementation(async (_runId, _target, _command, args, options) => {
|
|
expect(args).toEqual(
|
|
expect.arrayContaining([
|
|
"--output-format",
|
|
"streaming-json",
|
|
"--always-approve",
|
|
"--permission-mode",
|
|
"dontAsk",
|
|
]),
|
|
);
|
|
expect(await fs.readFile(path.join(root, "Agents.md"), "utf8")).toContain("You are Grok.");
|
|
expect(await pathExists(path.join(root, ".claude", "skills", "paperclip", "SKILL.md"))).toBe(true);
|
|
await options.onLog?.("stdout", '{"type":"text","data":"done"}\n');
|
|
return {
|
|
exitCode: 0,
|
|
signal: null,
|
|
timedOut: false,
|
|
stdout: [
|
|
JSON.stringify({ type: "text", data: "done" }),
|
|
JSON.stringify({ type: "end", stopReason: "EndTurn", sessionId: "sess-1", requestId: "req-1" }),
|
|
].join("\n"),
|
|
stderr: "",
|
|
};
|
|
});
|
|
|
|
const logs: Array<{ stream: "stdout" | "stderr"; chunk: string }> = [];
|
|
const ctx: AdapterExecutionContext = {
|
|
runId: "run-1",
|
|
agent: {
|
|
id: "agent-1",
|
|
companyId: "company-1",
|
|
name: "Grok Agent",
|
|
adapterType: "grok_local",
|
|
adapterConfig: {},
|
|
},
|
|
runtime: {
|
|
sessionId: null,
|
|
sessionParams: null,
|
|
sessionDisplayId: null,
|
|
taskKey: null,
|
|
},
|
|
config: {
|
|
cwd: root,
|
|
instructionsFilePath: instructionsPath,
|
|
paperclipRuntimeSkills: [{
|
|
key: "paperclip",
|
|
runtimeName: "paperclip",
|
|
source: skillSource,
|
|
required: false,
|
|
}],
|
|
paperclipSkillSync: { desiredSkills: ["paperclip"] },
|
|
},
|
|
context: {},
|
|
authToken: "run-token",
|
|
onLog: async (stream: "stdout" | "stderr", chunk: string) => {
|
|
logs.push({ stream, chunk });
|
|
},
|
|
};
|
|
|
|
const result = await execute(ctx);
|
|
|
|
expect(result).toMatchObject({
|
|
exitCode: 0,
|
|
errorMessage: null,
|
|
summary: "done",
|
|
sessionId: "sess-1",
|
|
sessionDisplayId: "sess-1",
|
|
});
|
|
expect(await pathExists(path.join(root, "Agents.md"))).toBe(false);
|
|
expect(await pathExists(path.join(root, ".claude", "skills", "paperclip"))).toBe(false);
|
|
expect(logs.map((entry) => entry.chunk)).not.toEqual([]);
|
|
});
|
|
|
|
it("cleans up staged assets when setup fails before the Grok process starts", async () => {
|
|
const root = await makeTempRoot();
|
|
const instructionsPath = path.join(root, "managed", "AGENTS.md");
|
|
const skillSource = path.join(root, "runtime-skills", "paperclip");
|
|
await fs.mkdir(path.dirname(instructionsPath), { recursive: true });
|
|
await fs.writeFile(instructionsPath, "You are Grok.\n", "utf8");
|
|
await fs.mkdir(skillSource, { recursive: true });
|
|
await fs.writeFile(path.join(skillSource, "SKILL.md"), "---\nname: paperclip\ndescription: test\n---\n", "utf8");
|
|
ensureCommandMock.mockRejectedValueOnce(new Error("grok not installed"));
|
|
|
|
const ctx: AdapterExecutionContext = {
|
|
runId: "run-setup-fail",
|
|
agent: {
|
|
id: "agent-1",
|
|
companyId: "company-1",
|
|
name: "Grok Agent",
|
|
adapterType: "grok_local",
|
|
adapterConfig: {},
|
|
},
|
|
runtime: {
|
|
sessionId: null,
|
|
sessionParams: null,
|
|
sessionDisplayId: null,
|
|
taskKey: null,
|
|
},
|
|
config: {
|
|
cwd: root,
|
|
instructionsFilePath: instructionsPath,
|
|
paperclipRuntimeSkills: [{
|
|
key: "paperclip",
|
|
runtimeName: "paperclip",
|
|
source: skillSource,
|
|
required: false,
|
|
}],
|
|
paperclipSkillSync: { desiredSkills: ["paperclip"] },
|
|
},
|
|
context: {},
|
|
authToken: "run-token",
|
|
onLog: async () => {},
|
|
};
|
|
|
|
await expect(execute(ctx)).rejects.toThrow("grok not installed");
|
|
expect(runProcessMock).not.toHaveBeenCalled();
|
|
expect(await pathExists(path.join(root, "Agents.md"))).toBe(false);
|
|
expect(await pathExists(path.join(root, ".claude", "skills", "paperclip"))).toBe(false);
|
|
});
|
|
});
|