diff --git a/src/server/execute.test.ts b/src/server/execute.test.ts index d28b87a..2a2e4ee 100644 --- a/src/server/execute.test.ts +++ b/src/server/execute.test.ts @@ -170,12 +170,35 @@ describe("buildPartialRunError", () => { expect(msg).toBe("Claude exited with code 1: Error: no API key configured"); }); - it("uses first non-system JSON event as content", () => { + it("skips result events (structured protocol artefact — not surfaced verbatim)", () => { + // In production, buildPartialRunError is only called when parseClaudeStreamJson + // returns null (no result event). If somehow a result event appears here, the + // raw JSON blob must not be shown — the "did not produce a result" message is + // cleaner and avoids leaking protocol internals to the UI. const resultLike = JSON.stringify({ type: "result", subtype: "error", result: "rate limit" }); const stdout = [initLine, resultLike].join("\n"); const msg = buildPartialRunError(2, "claude-sonnet-4-6", stdout); - expect(msg).toContain("rate limit"); - expect(msg).toContain("code 2"); + expect(msg).toContain("did not produce a result"); + expect(msg).toContain("claude-sonnet-4-6"); + expect(msg).not.toMatch(/\{.*type.*result/); + }); + + it("skips rate_limit_event and surfaces model hint (FAR-32 Anthropic/Nancy repro)", () => { + // Reproduces the second variant from FAR-32: init event + rate_limit_event + + // assistant event (thinking only, no result). The rate_limit_event JSON blob + // must not appear verbatim in the error message. + const rateLimitEvent = JSON.stringify({ + type: "rate_limit_event", + rate_limit_info: { status: "allowed", resetsAt: 1777056000, rateLimitType: "five_hour" }, + uuid: "3ab8f9eb-b9d6-4bf6-9c39-4608427717fc", + session_id: "ad5f3e11-3c0c-4144-b53d-d4b959e57cee", + }); + const stdout = [initLine, rateLimitEvent].join("\n"); + const msg = buildPartialRunError(null, "claude-opus-4-7", stdout); + expect(msg).toContain("claude-opus-4-7"); + expect(msg).toContain("did not produce a result"); + expect(msg).not.toContain("rate_limit_event"); + expect(msg).not.toContain("rateLimitType"); }); it("skips assistant events and surfaces model hint (FAR-32: MiniMax-M2.7 output_tokens=0)", () => { diff --git a/src/server/execute.ts b/src/server/execute.ts index f8609c5..55ce0db 100644 --- a/src/server/execute.ts +++ b/src/server/execute.ts @@ -117,11 +117,12 @@ export function buildPartialRunError( ): string { if (exitCode === 0) return "Failed to parse Claude JSON output"; - // Walk stdout lines, skip system and intermediate streaming events, return - // the first human-readable content line. assistant/user events are - // intermediate and contain raw JSON blobs that make poor error messages; - // result events are retained because they may carry useful error details - // (e.g. rate-limit messages). + // Walk stdout lines and skip every structured streaming event (any JSON + // object that carries a non-empty "type" field: system, assistant, user, + // rate_limit_event, result, …). All of these are protocol artefacts and + // produce confusing raw-JSON blobs when surfaced verbatim as an error + // message. Only plain-text lines (non-JSON, or JSON without a type field) + // are treated as human-readable content worth including in the error. const firstContentLine = stdout.split(/\r?\n/) .map((l) => l.trim()) .find((l) => { @@ -130,7 +131,7 @@ export function buildPartialRunError( const obj = JSON.parse(l); if (typeof obj === "object" && obj !== null) { const t = (obj as Record).type; - if (t === "system" || t === "assistant" || t === "user") return false; + if (typeof t === "string" && t) return false; } } catch { // not JSON — treat as content @@ -138,9 +139,9 @@ export function buildPartialRunError( return true; }) ?? ""; - // If the stream contains only system/init and intermediate events with no - // plain-text or result output, surface the model name so the operator can - // diagnose missing credentials or unsupported model. + // If the stream contained only structured events with no plain-text output, + // surface the model name so the operator can diagnose missing credentials + // or unsupported/misconfigured model. const initOnlyOutput = stdout.trim() !== "" && model !== "" && !firstContentLine; if (initOnlyOutput) { const modelHint = model ? ` (model: ${model})` : "";