Files
paperclip/ui/src/adapters/transcript.test.ts
T
Devin Foley ab8b471685 Add built-in grok_local adapter (#6087)
## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies, so
adapter quality directly affects what runtimes the control plane can
supervise.
> - Local CLI adapters are one of the core execution surfaces because
they turn real coding tools into Paperclip-managed employees with
heartbeats, transcripts, and reviewability.
> - Grok Build was installed on the Paperclip host, but Paperclip had no
built-in `grok_local` adapter, so the runtime could not be configured
through the normal server/UI/CLI adapter path.
> - That gap needed to be closed with the same built-in registry,
environment diagnostics, transcript parsing, and skill/instructions
behavior that the other local adapters already rely on.
> - After the initial adapter landed, a real follow-up run showed that
Grok streaming text was being rendered one fragment per line, which made
transcripts harder to read even though the runtime itself was working.
> - This pull request adds the built-in `grok_local` adapter end-to-end
and then fixes the transcript parser so streamed Grok output is
coalesced into readable assistant/thinking blocks.
> - The benefit is that Grok Build becomes a first-class Paperclip
runtime with a usable operator experience instead of a partially wired
runtime with noisy transcript output.

## What Changed

- Added a new built-in `@paperclipai/adapter-grok-local` package with
server, UI, and CLI entrypoints.
- Implemented Grok execution, session handling, environment diagnostics,
config building, skill syncing, and parser coverage inside the new
adapter package.
- Registered `grok_local` across the built-in adapter inventories and
capability/display metadata in server, UI, CLI, and shared constants.
- Added adapter route coverage for the new built-in type.
- Fixed Grok transcript readability by emitting streamed `text` and
`thought` fragments as deltas so the shared transcript builder coalesces
them into readable message blocks.
- Added regression tests for the Grok parser and transcript coalescing
behavior.

## Verification

- `pnpm vitest run
packages/adapters/grok-local/src/ui/parse-stdout.test.ts
ui/src/adapters/transcript.test.ts`
- `pnpm --filter @paperclipai/adapter-grok-local build`
- Manual runtime verification on the Paperclip host during
implementation and follow-up review:
  - confirmed the Grok CLI was installed and authenticated
- confirmed the worktree dev server could be restarted cleanly and
health-checked after the parser follow-up
- No screenshots attached. This change is primarily adapter plumbing
plus transcript formatting behavior; reviewers can verify via the
Grok-backed run surfaces directly.

## Risks

- This adds a new built-in adapter, so any missed registration surface
could create inconsistencies between server, UI, and CLI behavior.
- The adapter depends on Grok Build's current event/output shape; if
upstream Grok streaming JSON changes, transcript parsing or session
extraction may need follow-up updates.
- The transcript readability fix intentionally changes how Grok
fragments are grouped, so any downstream code that implicitly expected
one entry per fragment would behave differently.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex via Paperclip `codex_local` agent runtime.
- GPT-5-class coding model with tool use, shell execution, file editing,
and repo inspection enabled.
- Exact backend model ID/context window were not surfaced to the agent
in this Paperclip session.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [ ] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-05-16 09:51:09 -07:00

203 lines
6.4 KiB
TypeScript

import { describe, expect, it } from "vitest";
import { buildTranscript, type RunLogChunk } from "./transcript";
import { grokLocalUIAdapter } from "./grok-local";
import type { UIAdapterModule } from "./types";
describe("buildTranscript", () => {
const ts = "2026-03-20T13:00:00.000Z";
const chunks: RunLogChunk[] = [
{ ts, stream: "stdout", chunk: "opened /Users/dotta/project\n" },
{ ts, stream: "stderr", chunk: "stderr /Users/dotta/project" },
];
it("defaults username censoring to off when options are omitted", () => {
const entries = buildTranscript(chunks, (line, entryTs) => [{ kind: "stdout", ts: entryTs, text: line }]);
expect(entries).toEqual([
{ kind: "stdout", ts, text: "opened /Users/dotta/project" },
{ kind: "stderr", ts, text: "stderr /Users/dotta/project" },
]);
});
it("still redacts usernames when explicitly enabled", () => {
const entries = buildTranscript(chunks, (line, entryTs) => [{ kind: "stdout", ts: entryTs, text: line }], {
censorUsernameInLogs: true,
});
expect(entries).toEqual([
{ kind: "stdout", ts, text: "opened /Users/d****/project" },
{ kind: "stderr", ts, text: "stderr /Users/d****/project" },
]);
});
it("creates a fresh stateful parser for each transcript build", () => {
const statefulAdapter: UIAdapterModule = {
type: "stateful_test",
label: "Stateful Test",
parseStdoutLine: (line, entryTs) => [{ kind: "stdout", ts: entryTs, text: line }],
createStdoutParser: () => {
let pending: string | null = null;
return {
parseLine: (line, entryTs) => {
if (line.startsWith("begin:")) {
pending = line.slice("begin:".length);
return [];
}
if (line === "finish" && pending) {
const text = `completed:${pending}`;
pending = null;
return [{ kind: "stdout", ts: entryTs, text }];
}
return [{ kind: "stdout", ts: entryTs, text: `literal:${line}` }];
},
reset: () => {
pending = null;
},
};
},
ConfigFields: () => null,
buildAdapterConfig: () => ({}),
};
const first = buildTranscript(
[{ ts, stream: "stdout", chunk: "begin:task-a\n" }],
statefulAdapter,
);
const second = buildTranscript(
[{ ts, stream: "stdout", chunk: "finish\n" }],
statefulAdapter,
);
expect(first).toEqual([]);
expect(second).toEqual([{ kind: "stdout", ts, text: "literal:finish" }]);
});
it("converts parser failures into transcript error entries and keeps going", () => {
const entries = buildTranscript(
[
{ ts, stream: "stdout", chunk: "ok\nexplode\nlater\n" },
],
(line, entryTs) => {
if (line === "explode") {
throw new Error("boom");
}
return [{ kind: "stdout", ts: entryTs, text: line }];
},
);
expect(entries).toEqual([
{ kind: "stdout", ts, text: "ok" },
{
kind: "result",
ts,
text: "Chat transcript error: boom. Falling back for line: explode",
inputTokens: 0,
outputTokens: 0,
cachedTokens: 0,
costUsd: 0,
subtype: "transcript_parse_error",
isError: true,
errors: [],
},
{ kind: "stdout", ts, text: "later" },
]);
});
it("resets stateful parsers after a failure before parsing later lines", () => {
const statefulAdapter: UIAdapterModule = {
type: "stateful_test",
label: "Stateful Test",
parseStdoutLine: (line, entryTs) => [{ kind: "stdout", ts: entryTs, text: line }],
createStdoutParser: () => {
let pending: string | null = null;
return {
parseLine: (line, entryTs) => {
if (line.startsWith("begin:")) {
pending = line.slice("begin:".length);
return [];
}
if (line === "explode") {
throw new Error(`bad state:${pending ?? "none"}`);
}
if (line === "finish" && pending) {
const text = `completed:${pending}`;
pending = null;
return [{ kind: "stdout", ts: entryTs, text }];
}
return [{ kind: "stdout", ts: entryTs, text: `literal:${line}` }];
},
reset: () => {
pending = null;
},
};
},
ConfigFields: () => null,
buildAdapterConfig: () => ({}),
};
const entries = buildTranscript(
[{ ts, stream: "stdout", chunk: "begin:task-a\nexplode\nfinish\n" }],
statefulAdapter,
);
expect(entries).toEqual([
{
kind: "result",
ts,
text: "Chat transcript error: bad state:task-a. Falling back for line: explode",
inputTokens: 0,
outputTokens: 0,
cachedTokens: 0,
costUsd: 0,
subtype: "transcript_parse_error",
isError: true,
errors: [],
},
{ kind: "stdout", ts, text: "literal:finish" },
]);
});
it("handles trailing buffered parser failures without throwing", () => {
const entries = buildTranscript(
[{ ts, stream: "stdout", chunk: "explode" }],
(line, entryTs) => {
if (line === "explode") {
throw new Error("trailing boom");
}
return [{ kind: "stdout", ts: entryTs, text: line }];
},
);
expect(entries).toEqual([
{
kind: "result",
ts,
text: "Chat transcript error: trailing boom. Falling back for line: explode",
inputTokens: 0,
outputTokens: 0,
cachedTokens: 0,
costUsd: 0,
subtype: "transcript_parse_error",
isError: true,
errors: [],
},
]);
});
it("coalesces grok_local streaming text fragments into one assistant entry", () => {
const entries = buildTranscript(
[
{ ts, stream: "stdout", chunk: `${JSON.stringify({ type: "text", data: "Hello " })}\n` },
{ ts, stream: "stdout", chunk: `${JSON.stringify({ type: "text", data: "world" })}\n` },
{ ts, stream: "stdout", chunk: `${JSON.stringify({ type: "end", stopReason: "EndTurn", sessionId: "sess-1" })}\n` },
],
grokLocalUIAdapter,
);
expect(entries).toEqual([
{ kind: "assistant", ts, text: "Hello world", delta: true },
{ kind: "system", ts, text: "stop_reason=EndTurn session=sess-1" },
]);
});
});