fix: detect mid-stream truncation and emit claude_truncated error code (FAR-95)

When Claude produces assistant content (output_tokens > 0) but the stream ends without a result event, classify the run as truncated mid-stream rather than falling through to the generic "did not produce a result — check API credentials" message. The misleading hint pointed operators at auth/model config when the real cause was pod termination, OOMKill, or CLI crash. Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-26 01:54:35 +00:00
parent 818aa0f1d6
commit a2874c0426
4 changed files with 104 additions and 0 deletions
@@ -994,6 +994,44 @@ describe("execute: happy path", () => {
    expect(result.errorMessage).toContain("output_tokens: 0");
  });

+  it("returns claude_truncated when assistant produced content but no result event arrived (FAR-95)", async () => {
+    const truncatedOutput = [
+      JSON.stringify({ type: "system", subtype: "init", model: "claude-opus-4-7", session_id: "sess_trunc" }),
+      JSON.stringify({
+        type: "assistant",
+        session_id: "sess_trunc",
+        message: {
+          id: "msg_trunc",
+          stop_reason: null,
+          usage: { input_tokens: 1, output_tokens: 35, cache_creation_input_tokens: 523, cache_read_input_tokens: 46295 },
+          content: [{ type: "tool_use", id: "tool_1", name: "Bash", input: { command: "echo hi" } }],
+        },
+      }),
+      JSON.stringify({
+        type: "user",
+        message: { role: "user", content: [{ tool_use_id: "tool_1", type: "tool_result", content: "hi", is_error: false }] },
+      }),
+    ].join("\n") + "\n";
+
+    mockLogFn.mockImplementation(
+      async (_ns: string, _pod: string, _ctr: string, writable: Writable) => {
+        writable.write(truncatedOutput);
+      },
+    );
+    mockCoreListPods.mockResolvedValue({
+      items: [{ metadata: { name: "pod-abc" }, status: { containerStatuses: [{ name: "claude", state: { terminated: { exitCode: 137 } } }] } }],
+    });
+
+    const executePromise = execute(makeCtx());
+    await vi.advanceTimersByTimeAsync(3_100);
+    const result = await executePromise;
+
+    expect(result.errorCode).toBe("claude_truncated");
+    expect(result.errorMessage).toContain("truncated mid-stream");
+    expect(result.errorMessage).toContain("claude-opus-4-7");
+    expect(result.errorMessage).toContain("exit code 137");
+  });
+
  it("reconnects log stream and logs status when job completion takes > 3s", async () => {
    // Make waitForJobCompletion take 4s so the 3s stream reconnect fires first.
    // timeoutSec=4, graceSec=0 → completionTimeoutMs=4000.
@@ -1367,6 +1367,18 @@ export async function execute(ctx: AdapterExecutionContext): Promise<AdapterExec
        resultJson: { stdout },
      };
    }
+    if (parsedStream.truncatedMidStream) {
+      const exitHint = exitCode === null ? "no exit code" : `exit code ${exitCode}`;
+      const modelHint = parsedStream.model ? ` (model: ${parsedStream.model})` : "";
+      return {
+        exitCode,
+        signal: null,
+        timedOut: false,
+        errorMessage: `Claude run was truncated mid-stream${modelHint} — assistant produced content but no result event arrived (${exitHint}); pod may have been terminated, OOMKilled, or the CLI crashed`,
+        errorCode: "claude_truncated",
+        resultJson: { stdout },
+      };
+    }
    return {
      exitCode,
      signal: null,
@@ -224,6 +224,50 @@ more raw output`;
    expect(result.resultJson).not.toBeNull();
  });

+  it("sets truncatedMidStream=true when assistant event with output_tokens>0 has no result (FAR-95)", () => {
+    const initLine = JSON.stringify({ type: "system", subtype: "init", model: "claude-opus-4-7", session_id: "sess_1" });
+    const assistantEvent = JSON.stringify({
+      type: "assistant",
+      session_id: "sess_1",
+      message: {
+        id: "msg_abc",
+        stop_reason: null,
+        usage: { input_tokens: 1, output_tokens: 35, cache_creation_input_tokens: 523, cache_read_input_tokens: 46295 },
+        content: [{ type: "tool_use", id: "tool_1", name: "Bash", input: { command: "echo hi" } }],
+      },
+    });
+    const result = parseClaudeStreamJson([initLine, assistantEvent].join("\n"));
+    expect(result.truncatedMidStream).toBe(true);
+    expect(result.llmApiEmptyResponse).toBe(false);
+    expect(result.resultJson).toBeNull();
+  });
+
+  it("clears truncatedMidStream when a result event follows assistant content", () => {
+    const assistantEvent = JSON.stringify({
+      type: "assistant",
+      message: { stop_reason: null, usage: { output_tokens: 35 }, content: [] },
+    });
+    const resultEvent = JSON.stringify({
+      type: "result",
+      result: "Done",
+      subtype: "stop",
+      total_cost_usd: 0.001,
+      usage: { input_tokens: 10, output_tokens: 5, cache_read_input_tokens: 0 },
+    });
+    const result = parseClaudeStreamJson([assistantEvent, resultEvent].join("\n"));
+    expect(result.truncatedMidStream).toBe(false);
+    expect(result.resultJson).not.toBeNull();
+  });
+
+  it("does not set truncatedMidStream when assistant has output_tokens=0", () => {
+    const assistantEvent = JSON.stringify({
+      type: "assistant",
+      message: { stop_reason: null, usage: { output_tokens: 0 }, content: [] },
+    });
+    const result = parseClaudeStreamJson(assistantEvent);
+    expect(result.truncatedMidStream).toBe(false);
+  });
+
  it("sets llmApiEmptyResponse=false for normal result", () => {
    const resultEvent = JSON.stringify({
      type: "result",
@@ -19,6 +19,10 @@ export function parseClaudeStreamJson(stdout: string) {
  // with no subsequent result event — indicates the upstream LLM API returned
  // an empty/malformed response (e.g. MiniMax degraded performance).
  let llmApiEmptyResponse = false;
+  // Set when an assistant event with output_tokens > 0 was seen but no result
+  // event arrived — indicates the run was truncated mid-stream (pod terminated,
+  // OOMKill, or claude CLI crash after producing content).
+  let assistantContentSeen = false;

  for (const rawLine of stdout.split(/\r?\n/)) {
    const line = rawLine.trim();
@@ -49,6 +53,9 @@ export function parseClaudeStreamJson(stdout: string) {
      if (stopReason === null && outputTokens === 0) {
        llmApiEmptyResponse = true;
      }
+      if (outputTokens > 0) {
+        assistantContentSeen = true;
+      }

      for (let i = 0; i < content.length; i++) {
        const entry = content[i];
@@ -72,6 +79,7 @@ export function parseClaudeStreamJson(stdout: string) {
    if (type === "result") {
      finalResult = event;
      llmApiEmptyResponse = false; // result event means Claude completed normally
+      assistantContentSeen = false; // result event means stream was not truncated
      sessionId = asString(event.session_id, sessionId ?? "") || sessionId;
    }
  }
@@ -85,6 +93,7 @@ export function parseClaudeStreamJson(stdout: string) {
      summary: assistantTexts.join("\n\n").trim(),
      resultJson: null as Record<string, unknown> | null,
      llmApiEmptyResponse,
+      truncatedMidStream: assistantContentSeen,
    };
  }

@@ -106,6 +115,7 @@ export function parseClaudeStreamJson(stdout: string) {
    summary,
    resultJson: finalResult,
    llmApiEmptyResponse: false,
+    truncatedMidStream: false,
  };
 }