forked from farhoodlabs/paperclip
b74d94ba1e
## Thinking Path Paperclip orchestrates AI agent runs and reports their success or failure. The Pi adapter spawns a local Pi process and interprets its JSONL output to determine the run outcome. When Pi hits a quota limit (429 RESOURCE_EXHAUSTED), it retries internally and emits an `auto_retry_end` event with `success: false` — but still exits with code 0. The current adapter trusts the exit code, so Paperclip marks the run as succeeded even though it produced no useful work. This PR teaches the parser to detect quota exhaustion and synthesize a failure. Closes #2234 ## Changes - Parse `auto_retry_end` events with `success: false` into `result.errors` - Parse standalone `error` events into `result.errors` - Synthesize exit code 1 when Pi exits 0 but parsed errors exist - Use the parsed error as `errorMessage` so the failure reason is visible in the UI ## Verification ```bash pnpm vitest run pi-local-execute pnpm vitest run --reporter=verbose 2>&1 | grep pi-local ``` - `parse.test.ts`: covers failed retry, successful retry (no error), standalone error events, and empty error messages - `pi-local-execute.test.ts`: end-to-end test with a fake Pi binary that emits `auto_retry_end` + exits 0, asserts the run is marked failed ## Risks - **Low**: Only affects runs where Pi exits 0 with a parsed error — no change to normal successful or already-failing runs - If Pi emits `auto_retry_end { success: false }` but the run actually produced valid output, this would incorrectly mark it as failed. This seems unlikely given the semantics of the event. ## Model Used - Claude Opus 4.6 (Anthropic) — assisted with test additions and PR template ## Checklist - [x] Thinking path documented - [x] Model specified - [x] Tests pass locally - [x] Test coverage for new parse branches (success path, error events, empty messages) - [x] No UI changes - [x] Risk analysis included --------- Co-authored-by: Dawid Piaskowski <dawid@MacBook-Pro.local> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
274 lines
7.4 KiB
TypeScript
274 lines
7.4 KiB
TypeScript
import { describe, expect, it } from "vitest";
|
|
import { parsePiJsonl, isPiUnknownSessionError } from "./parse.js";
|
|
|
|
describe("parsePiJsonl", () => {
|
|
it("parses agent lifecycle and messages", () => {
|
|
const stdout = [
|
|
JSON.stringify({ type: "agent_start" }),
|
|
JSON.stringify({
|
|
type: "turn_end",
|
|
message: {
|
|
role: "assistant",
|
|
content: [{ type: "text", text: "Hello from Pi" }],
|
|
},
|
|
}),
|
|
JSON.stringify({ type: "agent_end", messages: [] }),
|
|
].join("\n");
|
|
|
|
const parsed = parsePiJsonl(stdout);
|
|
expect(parsed.messages).toContain("Hello from Pi");
|
|
expect(parsed.finalMessage).toBe("Hello from Pi");
|
|
});
|
|
|
|
it("parses streaming text deltas", () => {
|
|
const stdout = [
|
|
JSON.stringify({
|
|
type: "message_update",
|
|
assistantMessageEvent: { type: "text_delta", delta: "Hello " },
|
|
}),
|
|
JSON.stringify({
|
|
type: "message_update",
|
|
assistantMessageEvent: { type: "text_delta", delta: "World" },
|
|
}),
|
|
JSON.stringify({
|
|
type: "turn_end",
|
|
message: {
|
|
role: "assistant",
|
|
content: "Hello World",
|
|
},
|
|
}),
|
|
].join("\n");
|
|
|
|
const parsed = parsePiJsonl(stdout);
|
|
expect(parsed.messages).toContain("Hello World");
|
|
});
|
|
|
|
it("parses tool execution", () => {
|
|
const stdout = [
|
|
JSON.stringify({
|
|
type: "tool_execution_start",
|
|
toolCallId: "tool_1",
|
|
toolName: "read",
|
|
args: { path: "/tmp/test.txt" },
|
|
}),
|
|
JSON.stringify({
|
|
type: "tool_execution_end",
|
|
toolCallId: "tool_1",
|
|
toolName: "read",
|
|
result: "file contents",
|
|
isError: false,
|
|
}),
|
|
JSON.stringify({
|
|
type: "turn_end",
|
|
message: { role: "assistant", content: "Done" },
|
|
toolResults: [
|
|
{
|
|
toolCallId: "tool_1",
|
|
content: "file contents",
|
|
isError: false,
|
|
},
|
|
],
|
|
}),
|
|
].join("\n");
|
|
|
|
const parsed = parsePiJsonl(stdout);
|
|
expect(parsed.toolCalls).toHaveLength(1);
|
|
expect(parsed.toolCalls[0].toolName).toBe("read");
|
|
expect(parsed.toolCalls[0].result).toBe("file contents");
|
|
expect(parsed.toolCalls[0].isError).toBe(false);
|
|
});
|
|
|
|
it("handles errors in tool execution", () => {
|
|
const stdout = [
|
|
JSON.stringify({
|
|
type: "tool_execution_start",
|
|
toolCallId: "tool_1",
|
|
toolName: "read",
|
|
args: { path: "/missing.txt" },
|
|
}),
|
|
JSON.stringify({
|
|
type: "tool_execution_end",
|
|
toolCallId: "tool_1",
|
|
toolName: "read",
|
|
result: "File not found",
|
|
isError: true,
|
|
}),
|
|
].join("\n");
|
|
|
|
const parsed = parsePiJsonl(stdout);
|
|
expect(parsed.toolCalls).toHaveLength(1);
|
|
expect(parsed.toolCalls[0].isError).toBe(true);
|
|
expect(parsed.toolCalls[0].result).toBe("File not found");
|
|
});
|
|
|
|
it("extracts usage and cost from turn_end events", () => {
|
|
const stdout = [
|
|
JSON.stringify({
|
|
type: "turn_end",
|
|
message: {
|
|
role: "assistant",
|
|
content: "Response with usage",
|
|
usage: {
|
|
input: 100,
|
|
output: 50,
|
|
cacheRead: 20,
|
|
totalTokens: 170,
|
|
cost: {
|
|
input: 0.001,
|
|
output: 0.0015,
|
|
cacheRead: 0.0001,
|
|
cacheWrite: 0,
|
|
total: 0.0026,
|
|
},
|
|
},
|
|
},
|
|
toolResults: [],
|
|
}),
|
|
].join("\n");
|
|
|
|
const parsed = parsePiJsonl(stdout);
|
|
expect(parsed.usage.inputTokens).toBe(100);
|
|
expect(parsed.usage.outputTokens).toBe(50);
|
|
expect(parsed.usage.cachedInputTokens).toBe(20);
|
|
expect(parsed.usage.costUsd).toBeCloseTo(0.0026, 4);
|
|
});
|
|
|
|
it("accumulates usage from multiple turns", () => {
|
|
const stdout = [
|
|
JSON.stringify({
|
|
type: "turn_end",
|
|
message: {
|
|
role: "assistant",
|
|
content: "First response",
|
|
usage: {
|
|
input: 50,
|
|
output: 25,
|
|
cacheRead: 0,
|
|
cost: { total: 0.001 },
|
|
},
|
|
},
|
|
}),
|
|
JSON.stringify({
|
|
type: "turn_end",
|
|
message: {
|
|
role: "assistant",
|
|
content: "Second response",
|
|
usage: {
|
|
input: 30,
|
|
output: 20,
|
|
cacheRead: 10,
|
|
cost: { total: 0.0015 },
|
|
},
|
|
},
|
|
}),
|
|
].join("\n");
|
|
|
|
const parsed = parsePiJsonl(stdout);
|
|
expect(parsed.usage.inputTokens).toBe(80);
|
|
expect(parsed.usage.outputTokens).toBe(45);
|
|
expect(parsed.usage.cachedInputTokens).toBe(10);
|
|
expect(parsed.usage.costUsd).toBeCloseTo(0.0025, 4);
|
|
});
|
|
|
|
it("handles standalone usage events with Pi format", () => {
|
|
const stdout = [
|
|
JSON.stringify({
|
|
type: "usage",
|
|
usage: {
|
|
input: 200,
|
|
output: 100,
|
|
cacheRead: 50,
|
|
cost: { total: 0.005 },
|
|
},
|
|
}),
|
|
].join("\n");
|
|
|
|
const parsed = parsePiJsonl(stdout);
|
|
expect(parsed.usage.inputTokens).toBe(200);
|
|
expect(parsed.usage.outputTokens).toBe(100);
|
|
expect(parsed.usage.cachedInputTokens).toBe(50);
|
|
expect(parsed.usage.costUsd).toBe(0.005);
|
|
});
|
|
|
|
it("handles standalone usage events with generic format", () => {
|
|
const stdout = [
|
|
JSON.stringify({
|
|
type: "usage",
|
|
usage: {
|
|
inputTokens: 150,
|
|
outputTokens: 75,
|
|
cachedInputTokens: 25,
|
|
costUsd: 0.003,
|
|
},
|
|
}),
|
|
].join("\n");
|
|
|
|
const parsed = parsePiJsonl(stdout);
|
|
expect(parsed.usage.inputTokens).toBe(150);
|
|
expect(parsed.usage.outputTokens).toBe(75);
|
|
expect(parsed.usage.cachedInputTokens).toBe(25);
|
|
expect(parsed.usage.costUsd).toBe(0.003);
|
|
});
|
|
|
|
it("surfaces failed auto-retry exhaustion as an error", () => {
|
|
const stdout = [
|
|
JSON.stringify({
|
|
type: "auto_retry_end",
|
|
success: false,
|
|
attempt: 3,
|
|
finalError: "Cloud Code Assist API error (429): RESOURCE_EXHAUSTED",
|
|
}),
|
|
].join("\n");
|
|
|
|
const parsed = parsePiJsonl(stdout);
|
|
expect(parsed.errors).toEqual(["Cloud Code Assist API error (429): RESOURCE_EXHAUSTED"]);
|
|
});
|
|
|
|
it("does not treat successful auto-retry as an error", () => {
|
|
const stdout = [
|
|
JSON.stringify({
|
|
type: "auto_retry_end",
|
|
success: true,
|
|
attempt: 2,
|
|
}),
|
|
].join("\n");
|
|
|
|
const parsed = parsePiJsonl(stdout);
|
|
expect(parsed.errors).toEqual([]);
|
|
});
|
|
|
|
it("surfaces standalone error events", () => {
|
|
const stdout = [
|
|
JSON.stringify({
|
|
type: "error",
|
|
message: "Connection to model provider lost",
|
|
}),
|
|
].join("\n");
|
|
|
|
const parsed = parsePiJsonl(stdout);
|
|
expect(parsed.errors).toEqual(["Connection to model provider lost"]);
|
|
});
|
|
|
|
it("ignores error events with empty messages", () => {
|
|
const stdout = [
|
|
JSON.stringify({
|
|
type: "error",
|
|
message: "",
|
|
}),
|
|
].join("\n");
|
|
|
|
const parsed = parsePiJsonl(stdout);
|
|
expect(parsed.errors).toEqual([]);
|
|
});
|
|
});
|
|
|
|
describe("isPiUnknownSessionError", () => {
|
|
it("detects unknown session errors", () => {
|
|
expect(isPiUnknownSessionError("session not found: s_123", "")).toBe(true);
|
|
expect(isPiUnknownSessionError("", "unknown session id")).toBe(true);
|
|
expect(isPiUnknownSessionError("", "no session available")).toBe(true);
|
|
expect(isPiUnknownSessionError("all good", "")).toBe(false);
|
|
expect(isPiUnknownSessionError("working fine", "no errors")).toBe(false);
|
|
});
|
|
});
|