Handle Gemini CLI v0.38 stream-json wire format across parser, UI, and CLI formatter (#5273)

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies
> - Each agent uses an adapter that drives a CLI (Claude, Gemini, Codex,
etc.)
> - The Gemini adapter parses a JSONL transcript stream the CLI emits to
learn what the model said
> - Gemini CLI v0.38 changed the transcript shape: assistant text now
comes through `type=message` with `role`/`content` and terminal status
comes through `type=status` / `type=stats`
> - The existing parser was written against the older `type=assistant` /
`type=result` shape, so post-v0.38 outputs left the parsed summary empty
and downgraded the SSH hello probe to "unexpected output"
> - This pull request updates every Gemini consumer (server parser, UI
parser, CLI formatter) to accept the v0.38 shape while keeping the
legacy shape working
> - The benefit is the Gemini adapter handles current upstream output
without losing backward compatibility, with explicit test coverage for
both shapes

## What Changed

- `packages/adapters/gemini-local/src/server/parse.ts` recognizes
`type=message` events with role/content and stops downgrading them
- `packages/adapters/gemini-local/src/ui/parse-stdout.ts` mirrors the
parser changes for the live UI transcript
- `packages/adapters/gemini-local/src/cli/format-event.ts` formats the
new event shape correctly for CLI output
- `parse.test.ts` and `parse-stdout.test.ts` add v0.38 coverage;
`gemini-local-adapter.test.ts` and `execute.remote.test.ts` switch
happy-path fixtures to the current real wire format and keep dedicated
tests for the older schema

## Verification

- `pnpm vitest run --no-coverage --project
@paperclipai/adapter-gemini-local` — full suite passes including new
v0.38 cases and preserved legacy cases
- `pnpm typecheck` clean

## Risks

Low risk — additive event handling. Legacy event shape path is preserved
with its own tests, so existing fixtures continue to parse identically.

## Model Used

Claude Opus 4.7 (1M context)

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots — N/A (no UI)
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
This commit is contained in:
Devin Foley
2026-05-05 08:00:14 -07:00
committed by GitHub
parent 3c73ed26b5
commit ea7f53fd7d
7 changed files with 253 additions and 60 deletions
@@ -93,14 +93,14 @@ function printTextMessage(prefix: string, colorize: (text: string) => string, me
}
function printUsage(parsed: Record<string, unknown>) {
const usage = asRecord(parsed.usage) ?? asRecord(parsed.usageMetadata);
const usage = asRecord(parsed.usage) ?? asRecord(parsed.usageMetadata) ?? asRecord(parsed.stats);
const usageMetadata = asRecord(usage?.usageMetadata);
const source = usageMetadata ?? usage ?? {};
const input = asNumber(source.input_tokens, asNumber(source.inputTokens, asNumber(source.promptTokenCount)));
const output = asNumber(source.output_tokens, asNumber(source.outputTokens, asNumber(source.candidatesTokenCount)));
const cached = asNumber(
source.cached_input_tokens,
asNumber(source.cachedInputTokens, asNumber(source.cachedContentTokenCount)),
asNumber(source.cachedInputTokens, asNumber(source.cachedContentTokenCount, asNumber(source.cached))),
);
const cost = asNumber(parsed.total_cost_usd, asNumber(parsed.cost_usd, asNumber(parsed.cost)));
console.log(pc.blue(`tokens: in=${input} out=${output} cached=${cached} cost=$${cost.toFixed(6)}`));
@@ -154,6 +154,21 @@ export function printGeminiStreamEvent(raw: string, _debug: boolean): void {
return;
}
// Gemini CLI v0.38+ stream-json schema:
// {"type":"message","role":"assistant"|"user","content":"...","delta":?true}
if (type === "message") {
const role = asString(parsed.role).trim().toLowerCase();
if (role === "assistant") {
printTextMessage("assistant", pc.green, parsed.content);
return;
}
if (role === "user") {
printTextMessage("user", pc.gray, parsed.content);
return;
}
return;
}
if (type === "thinking") {
const text = asString(parsed.text).trim() || asString(asRecord(parsed.delta)?.text).trim();
if (text) console.log(pc.gray(`thinking: ${text}`));
@@ -190,11 +205,17 @@ export function printGeminiStreamEvent(raw: string, _debug: boolean): void {
if (type === "result") {
printUsage(parsed);
const subtype = asString(parsed.subtype, "result");
const isError = parsed.is_error === true;
const status = asString(parsed.status).toLowerCase();
const isError =
parsed.is_error === true || status === "error" || status === "failed";
const subtype = asString(parsed.subtype, status || "result");
if (subtype || isError) {
console.log((isError ? pc.red : pc.blue)(`result: subtype=${subtype} is_error=${isError ? "true" : "false"}`));
}
if (isError) {
const text = errorText(parsed.error ?? parsed.message ?? parsed.result);
if (text) console.log(pc.red(`error: ${text}`));
}
return;
}
@@ -19,13 +19,12 @@ const {
timedOut: false,
stdout: [
JSON.stringify({ type: "system", subtype: "init", session_id: "gemini-session-1", model: "gemini-2.5-pro" }),
JSON.stringify({ type: "assistant", message: { content: [{ type: "output_text", text: "hello" }] } }),
JSON.stringify({ type: "message", role: "assistant", content: "hello" }),
JSON.stringify({
type: "result",
subtype: "success",
status: "success",
session_id: "gemini-session-1",
usage: { promptTokenCount: 1, cachedContentTokenCount: 0, candidatesTokenCount: 1 },
result: "hello",
stats: { input_tokens: 1, cached_input_tokens: 0, output_tokens: 1 },
}),
].join("\n"),
stderr: "",
@@ -43,4 +43,91 @@ describe("parseGeminiJsonl", () => {
expect(parsed.summary).toBe("visible response");
});
it("captures assistant text from gemini CLI v0.38 stream-json schema", () => {
const stdout = [
JSON.stringify({
type: "init",
timestamp: "2026-05-04T05:43:41.203Z",
session_id: "session-abc",
model: "auto-gemini-3",
}),
JSON.stringify({
type: "message",
timestamp: "2026-05-04T05:43:41.205Z",
role: "user",
content: "Respond with hello.",
}),
JSON.stringify({
type: "message",
timestamp: "2026-05-04T05:43:45.198Z",
role: "assistant",
content: "hello.",
delta: true,
}),
JSON.stringify({
type: "result",
timestamp: "2026-05-04T05:43:45.819Z",
status: "success",
stats: {
total_tokens: 9468,
input_tokens: 9095,
output_tokens: 29,
cached: 8132,
duration_ms: 4616,
},
}),
].join("\n");
const result = parseGeminiJsonl(stdout);
expect(result.summary).toBe("hello.");
expect(result.sessionId).toBe("session-abc");
expect(result.errorMessage).toBeNull();
expect(result.usage.inputTokens).toBe(9095);
expect(result.usage.outputTokens).toBe(29);
expect(result.usage.cachedInputTokens).toBe(8132);
});
it("ignores user messages and only collects assistant content", () => {
const stdout = [
JSON.stringify({ type: "message", role: "user", content: "ignore me" }),
JSON.stringify({ type: "message", role: "assistant", content: "first" }),
JSON.stringify({ type: "message", role: "assistant", content: "second" }),
].join("\n");
const result = parseGeminiJsonl(stdout);
expect(result.summary).toBe("first\n\nsecond");
});
it("preserves the legacy claude-style `assistant` event handler", () => {
const stdout = [
JSON.stringify({
type: "system",
subtype: "init",
session_id: "legacy-session",
}),
JSON.stringify({
type: "assistant",
message: { content: [{ type: "output_text", text: "legacy hello" }] },
}),
JSON.stringify({ type: "result", subtype: "success", result: "legacy hello" }),
].join("\n");
const result = parseGeminiJsonl(stdout);
expect(result.summary).toBe("legacy hello");
expect(result.sessionId).toBe("legacy-session");
});
it("flags result events with status=error", () => {
const stdout = [
JSON.stringify({
type: "result",
status: "error",
error: "boom",
}),
].join("\n");
const result = parseGeminiJsonl(stdout);
expect(result.errorMessage).toBe("boom");
});
});
@@ -64,7 +64,10 @@ function accumulateUsage(
);
target.cachedInputTokens += asNumber(
source.cached_input_tokens,
asNumber(source.cachedInputTokens, asNumber(source.cachedContentTokenCount, 0)),
asNumber(
source.cachedInputTokens,
asNumber(source.cachedContentTokenCount, asNumber(source.cached, 0)),
),
);
target.outputTokens += asNumber(
source.output_tokens,
@@ -121,14 +124,14 @@ export function parseGeminiJsonl(stdout: string) {
continue;
}
// Gemini CLI v0.38+ stream-json schema emits assistant turns as:
// {"type":"message","role":"assistant","content":"...","delta":true}
// These are discrete final messages (one per assistant turn), not
// cumulative streaming tokens, so collecting all of them produces the
// expected concatenated turn-by-turn summary rather than duplicated text.
if (type === "message") {
const role = asString(event.role, "").trim().toLowerCase();
if (role === "assistant") {
// Mirror the assistant-event handling above: collect every assistant
// message including deltas. Gemini CLI emits these as discrete final
// messages (one per assistant turn), not as cumulative streaming
// tokens, so collecting all of them produces the expected concatenated
// turn-by-turn summary rather than duplicated text.
messages.push(...collectMessageText(event.content));
}
continue;
@@ -136,14 +139,19 @@ export function parseGeminiJsonl(stdout: string) {
if (type === "result") {
resultEvent = event;
accumulateUsage(usage, event.usage ?? event.usageMetadata);
accumulateUsage(usage, event.usage ?? event.usageMetadata ?? event.stats);
const resultText =
asString(event.result, "").trim() ||
asString(event.text, "").trim() ||
asString(event.response, "").trim();
if (resultText && messages.length === 0) messages.push(resultText);
costUsd = asNumber(event.total_cost_usd, asNumber(event.cost_usd, asNumber(event.cost, costUsd ?? 0))) || costUsd;
const isError = event.is_error === true || asString(event.subtype, "").toLowerCase() === "error";
const status = asString(event.status, "").toLowerCase();
const isError =
event.is_error === true ||
asString(event.subtype, "").toLowerCase() === "error" ||
status === "error" ||
status === "failed";
if (isError) {
const text = asErrorText(event.error ?? event.message ?? event.result).trim();
if (text) errorMessage = text;
@@ -0,0 +1,73 @@
import { describe, expect, it } from "vitest";
import { parseGeminiStdoutLine } from "./parse-stdout.js";
const ts = "2026-05-04T05:43:45.198Z";
describe("parseGeminiStdoutLine", () => {
it("renders v0.38 message+role:assistant as an assistant transcript entry", () => {
const line = JSON.stringify({
type: "message",
role: "assistant",
content: "hello.",
delta: true,
});
const entries = parseGeminiStdoutLine(line, ts);
expect(entries).toEqual([{ kind: "assistant", ts, text: "hello." }]);
});
it("renders v0.38 message+role:user as a user transcript entry", () => {
const line = JSON.stringify({
type: "message",
role: "user",
content: "Respond with hello.",
});
const entries = parseGeminiStdoutLine(line, ts);
expect(entries).toEqual([{ kind: "user", ts, text: "Respond with hello." }]);
});
it("preserves the legacy claude-style assistant event handler", () => {
const line = JSON.stringify({
type: "assistant",
message: { content: [{ type: "output_text", text: "legacy hello" }] },
});
const entries = parseGeminiStdoutLine(line, ts);
expect(entries).toEqual([{ kind: "assistant", ts, text: "legacy hello" }]);
});
it("reads token usage from v0.38 result.stats", () => {
const line = JSON.stringify({
type: "result",
status: "success",
stats: {
total_tokens: 9468,
input_tokens: 9095,
output_tokens: 29,
cached: 8132,
},
});
const [entry] = parseGeminiStdoutLine(line, ts);
expect(entry).toMatchObject({
kind: "result",
inputTokens: 9095,
outputTokens: 29,
cachedTokens: 8132,
isError: false,
subtype: "success",
});
});
it("flags v0.38 result.status=error as an error", () => {
const line = JSON.stringify({
type: "result",
status: "error",
error: "boom",
});
const [entry] = parseGeminiStdoutLine(line, ts);
expect(entry).toMatchObject({ kind: "result", isError: true, errors: ["boom"] });
});
it("ignores message events without an actionable role", () => {
const line = JSON.stringify({ type: "message", role: "system", content: "ignored" });
expect(parseGeminiStdoutLine(line, ts)).toEqual([]);
});
});
@@ -195,7 +195,7 @@ function readSessionId(parsed: Record<string, unknown>): string {
}
function readUsage(parsed: Record<string, unknown>) {
const usage = asRecord(parsed.usage) ?? asRecord(parsed.usageMetadata);
const usage = asRecord(parsed.usage) ?? asRecord(parsed.usageMetadata) ?? asRecord(parsed.stats);
const usageMetadata = asRecord(usage?.usageMetadata);
const source = usageMetadata ?? usage ?? {};
return {
@@ -203,7 +203,7 @@ function readUsage(parsed: Record<string, unknown>) {
outputTokens: asNumber(source.output_tokens, asNumber(source.outputTokens, asNumber(source.candidatesTokenCount))),
cachedTokens: asNumber(
source.cached_input_tokens,
asNumber(source.cachedInputTokens, asNumber(source.cachedContentTokenCount)),
asNumber(source.cachedInputTokens, asNumber(source.cachedContentTokenCount, asNumber(source.cached))),
),
};
}
@@ -237,6 +237,19 @@ export function parseGeminiStdoutLine(line: string, ts: string): TranscriptEntry
return collectTextEntries(parsed.message, ts, "user");
}
// Gemini CLI v0.38+ stream-json schema:
// {"type":"message","role":"assistant"|"user","content":"...","delta":?true}
if (type === "message") {
const role = asString(parsed.role).trim().toLowerCase();
if (role === "assistant") {
return parseAssistantMessage(parsed.content, ts);
}
if (role === "user") {
return collectTextEntries(parsed.content, ts, "user");
}
return [];
}
if (type === "thinking") {
const text = asString(parsed.text).trim() || asString(asRecord(parsed.delta)?.text).trim();
return text ? [{ kind: "thinking", ts, text }] : [];
@@ -248,7 +261,10 @@ export function parseGeminiStdoutLine(line: string, ts: string): TranscriptEntry
if (type === "result") {
const usage = readUsage(parsed);
const errors = parsed.is_error === true
const status = asString(parsed.status).toLowerCase();
const isError =
parsed.is_error === true || status === "error" || status === "failed";
const errors = isError
? [errorText(parsed.error ?? parsed.message ?? parsed.result)].filter(Boolean)
: [];
return [{
@@ -259,8 +275,8 @@ export function parseGeminiStdoutLine(line: string, ts: string): TranscriptEntry
outputTokens: usage.outputTokens,
cachedTokens: usage.cachedTokens,
costUsd: asNumber(parsed.total_cost_usd, asNumber(parsed.cost_usd, asNumber(parsed.cost))),
subtype: asString(parsed.subtype, "result"),
isError: parsed.is_error === true,
subtype: asString(parsed.subtype, status || "result"),
isError,
errors,
}];
}
@@ -8,26 +8,24 @@ import { parseGeminiStdoutLine } from "@paperclipai/adapter-gemini-local/ui";
import { printGeminiStreamEvent } from "@paperclipai/adapter-gemini-local/cli";
describe("gemini_local parser", () => {
it("extracts session, summary, usage, cost, and terminal error message", () => {
it("extracts session, summary, usage, cost, and terminal error message from v0.38 stream-json output", () => {
const stdout = [
JSON.stringify({ type: "system", subtype: "init", session_id: "gemini-session-1", model: "gemini-2.5-pro" }),
JSON.stringify({
type: "assistant",
message: {
content: [{ type: "output_text", text: "hello" }],
},
type: "message",
role: "assistant",
content: "hello",
}),
JSON.stringify({
type: "result",
subtype: "success",
status: "success",
session_id: "gemini-session-1",
usage: {
promptTokenCount: 12,
cachedContentTokenCount: 3,
candidatesTokenCount: 7,
stats: {
input_tokens: 12,
cached_input_tokens: 3,
output_tokens: 7,
},
total_cost_usd: 0.00123,
result: "done",
}),
JSON.stringify({ type: "error", message: "model access denied" }),
].join("\n");
@@ -105,44 +103,34 @@ describe("gemini_local turn-limit detection", () => {
});
describe("gemini_local ui stdout parser", () => {
it("parses assistant, thinking, and result events", () => {
it("parses v0.38 assistant message and result events", () => {
const ts = "2026-03-08T00:00:00.000Z";
expect(
parseGeminiStdoutLine(
JSON.stringify({
type: "assistant",
message: {
content: [
{ type: "output_text", text: "I checked the repo." },
{ type: "thinking", text: "Reviewing adapter registry" },
{ type: "tool_call", name: "shell", input: { command: "ls -1" } },
{ type: "tool_result", tool_use_id: "tool_1", output: "AGENTS.md\n", status: "ok" },
],
},
type: "message",
role: "assistant",
content: "I checked the repo.",
}),
ts,
),
).toEqual([
{ kind: "assistant", ts, text: "I checked the repo." },
{ kind: "thinking", ts, text: "Reviewing adapter registry" },
{ kind: "tool_call", ts, name: "shell", input: { command: "ls -1" } },
{ kind: "tool_result", ts, toolUseId: "tool_1", content: "AGENTS.md\n", isError: false },
]);
expect(
parseGeminiStdoutLine(
JSON.stringify({
type: "result",
subtype: "success",
result: "Done",
usage: {
promptTokenCount: 10,
candidatesTokenCount: 5,
cachedContentTokenCount: 2,
status: "success",
text: "Done",
stats: {
input_tokens: 10,
output_tokens: 5,
cached_input_tokens: 2,
},
total_cost_usd: 0.00042,
is_error: false,
}),
ts,
),
@@ -168,7 +156,7 @@ function stripAnsi(value: string): string {
}
describe("gemini_local cli formatter", () => {
it("prints init, assistant, result, and error events", () => {
it("prints init, v0.38 assistant, result, and error events", () => {
const spy = vi.spyOn(console, "log").mockImplementation(() => {});
let joined = "";
@@ -179,19 +167,20 @@ describe("gemini_local cli formatter", () => {
);
printGeminiStreamEvent(
JSON.stringify({
type: "assistant",
message: { content: [{ type: "output_text", text: "hello" }] },
type: "message",
role: "assistant",
content: "hello",
}),
false,
);
printGeminiStreamEvent(
JSON.stringify({
type: "result",
subtype: "success",
usage: {
promptTokenCount: 10,
candidatesTokenCount: 5,
cachedContentTokenCount: 2,
status: "success",
stats: {
input_tokens: 10,
output_tokens: 5,
cached_input_tokens: 2,
},
total_cost_usd: 0.00042,
}),