feat: use structured outputs for vuln agent exploitation queues (#267)

* feat: add structured outputs for vuln agent exploitation queues Use Claude Agent SDK's native outputFormat to get schema-validated JSON queue data from vulnerability analysis agents instead of relying on save-deliverable tool calls for queue files. - Add Zod schemas for all 5 vuln types (injection, xss, auth, ssrf, authz) - Thread outputFormat through SDK call chain (executor → message handlers) - Write structured_output to disk as queue JSON before validation - Handle error_max_structured_output_retries as retryable failure - Update vuln prompts to use structured output for queues - Keep save-deliverable for markdown deliverables (unchanged) * fix: correct structured output schema conversion for Claude Agent SDK Use draft-07 target for z.toJSONSchema() instead of the default draft-2020-12, which the SDK's AJV validator doesn't support. Update pipeline-testing prompts to use structured output instead of raw JSON responses. * refactor: remove save-deliverable references for queues in vuln prompts Queues are now captured via structured outputs, so vuln agents no longer need to use save-deliverable for queue JSON. Removes references to "structured response/output" phrasing and aligns all prompts to use consistent "exploitation queue" terminology. * refactor: remove queue support from save-deliverable Queues are now produced via structured outputs, so save-deliverable no longer needs queue-related code. Removes queue enum values, filename mappings, JSON validation, and updates all prompt tool descriptions to match the simplified CLI interface. * fix: instruct vuln agents to save deliverable before exploitation queue The structured output tool terminates the agent session when called. Agents were calling it before saving their deliverable markdown, causing output validation failures and unnecessary retries. * refactor: remove explicit exploitation queue output instructions from vuln prompts The Claude Agent SDK automatically captures structured output on the last turn when outputFormat is set. Prompts explicitly telling agents to produce the queue caused them to call StructuredOutput mid-session, conflicting with the SDK mechanism and silently dropping the output. Removed exploitation_queue_requirements sections and queue references from conclusion triggers. Added note that the queue is captured automatically. Updated Your Output to point to the deliverable markdown.
2026-04-02 01:12:00 +05:30
parent 6a0c8ce710
commit 2a433f090f
28 changed files with 273 additions and 236 deletions
@@ -6,7 +6,7 @@

 // Production Claude agent execution with retry, git checkpoints, and audit logging

-import { query } from '@anthropic-ai/claude-agent-sdk';
+import { type JsonSchemaOutputFormat, query } from '@anthropic-ai/claude-agent-sdk';
 import { fs, path } from 'zx';
 import type { AuditSession } from '../audit/index.js';
 import { isRetryableError, PentestError } from '../services/error-handling.js';
@@ -39,6 +39,7 @@ export interface ClaudePromptResult {
  errorType?: string | undefined;
  prompt?: string | undefined;
  retryable?: boolean | undefined;
+  structuredOutput?: unknown;
 }

 function outputLines(lines: string[]): void {
@@ -132,6 +133,7 @@ export async function runClaudePrompt(
  auditSession: AuditSession | null = null,
  logger: ActivityLogger,
  modelTier: ModelTier = 'medium',
+  outputFormat?: JsonSchemaOutputFormat,
 ): Promise<ClaudePromptResult> {
  // 1. Initialize timing and prompt
  const timer = new Timer(`agent-${description.toLowerCase().replace(/\s+/g, '-')}`);
@@ -186,6 +188,7 @@ export async function runClaudePrompt(
    allowDangerouslySkipPermissions: true,
    settingSources: ['user'] as ('user' | 'project' | 'local')[],
    env: sdkEnv,
+    ...(outputFormat && { outputFormat }),
  };

  if (!execContext.useCleanOutput) {
@@ -243,6 +246,9 @@ export async function runClaudePrompt(
      model,
      partialCost: totalCost,
      apiErrorDetected,
+      ...(messageLoopResult.structuredOutput !== undefined && {
+        structuredOutput: messageLoopResult.structuredOutput,
+      }),
    };
  } catch (error) {
    // 9. Handle errors — log, write error file, return failure
@@ -273,6 +279,7 @@ interface MessageLoopResult {
  apiErrorDetected: boolean;
  cost: number;
  model?: string | undefined;
+  structuredOutput?: unknown;
 }

 interface MessageLoopDeps {
@@ -297,6 +304,7 @@ async function processMessageStream(
  let apiErrorDetected = false;
  let cost = 0;
  let model: string | undefined;
+  let structuredOutput: unknown | undefined;
  let lastHeartbeat = Date.now();

  for await (const message of query({ prompt: fullPrompt, options })) {
@@ -327,6 +335,9 @@ async function processMessageStream(
    if (dispatchResult.type === 'complete') {
      result = dispatchResult.result;
      cost = dispatchResult.cost;
+      if (dispatchResult.structuredOutput !== undefined) {
+        structuredOutput = dispatchResult.structuredOutput;
+      }
      break;
    }

@@ -341,5 +352,12 @@ async function processMessageStream(
    }
  }

-  return { turnCount, result, apiErrorDetected, cost, model };
+  return {
+    turnCount,
+    result,
+    apiErrorDetected,
+    cost,
+    model,
+    ...(structuredOutput !== undefined && { structuredOutput }),
+  };
 }
@@ -223,6 +223,10 @@ function handleResultMessage(message: ResultMessage): ResultData {
    }
  }

+  if (message.structured_output !== undefined) {
+    result.structuredOutput = message.structured_output;
+  }
+
  return result;
 }

@@ -259,7 +263,7 @@ function outputLines(lines: string[]): void {

 export type MessageDispatchAction =
  | { type: 'continue'; apiErrorDetected?: boolean | undefined; model?: string | undefined }
-  | { type: 'complete'; result: string | null; cost: number }
+  | { type: 'complete'; result: string | null; cost: number; structuredOutput?: unknown }
  | { type: 'throw'; error: Error };

 export interface MessageDispatchDeps {
@@ -338,7 +342,26 @@ export async function dispatchMessage(
    case 'result': {
      const resultData = handleResultMessage(message as ResultMessage);
      outputLines(formatResultOutput(resultData, !execContext.useCleanOutput));
-      return { type: 'complete', result: resultData.result, cost: resultData.cost };
+
+      if (resultData.subtype === 'error_max_structured_output_retries') {
+        return {
+          type: 'throw',
+          error: new PentestError(
+            'Structured output validation failed after max retries',
+            'validation',
+            true,
+            {},
+            ErrorCode.OUTPUT_VALIDATION_FAILED,
+          ),
+        };
+      }
+
+      return {
+        type: 'complete' as const,
+        result: resultData.result,
+        cost: resultData.cost,
+        ...(resultData.structuredOutput !== undefined && { structuredOutput: resultData.structuredOutput }),
+      };
    }

    default:
@@ -0,0 +1,124 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * Zod schema definitions for vulnerability exploitation queue structured outputs.
+ *
+ * Each vuln agent returns a structured JSON response matching its schema.
+ * The SDK validates the output against the JSON Schema generated from these Zod definitions.
+ */
+
+import type { JsonSchemaOutputFormat } from '@anthropic-ai/claude-agent-sdk';
+import { z } from 'zod';
+import type { AgentName } from '../types/agents.js';
+
+// === Common Fields ===
+
+const baseVulnerability = z.object({
+  ID: z.string(),
+  vulnerability_type: z.string(),
+  externally_exploitable: z.boolean(),
+  confidence: z.string(),
+  notes: z.string().optional(),
+});
+
+// === Per-Vuln-Type Schemas ===
+
+const InjectionVulnerability = baseVulnerability.extend({
+  source: z.string().optional(),
+  combined_sources: z.string().optional(),
+  path: z.string().optional(),
+  sink_call: z.string().optional(),
+  slot_type: z.string().optional(),
+  sanitization_observed: z.string().optional(),
+  concat_occurrences: z.string().optional(),
+  verdict: z.string().optional(),
+  mismatch_reason: z.string().optional(),
+  witness_payload: z.string().optional(),
+});
+
+const XssVulnerability = baseVulnerability.extend({
+  source: z.string().optional(),
+  source_detail: z.string().optional(),
+  path: z.string().optional(),
+  sink_function: z.string().optional(),
+  render_context: z.string().optional(),
+  encoding_observed: z.string().optional(),
+  verdict: z.string().optional(),
+  mismatch_reason: z.string().optional(),
+  witness_payload: z.string().optional(),
+});
+
+const AuthVulnerability = baseVulnerability.extend({
+  source_endpoint: z.string().optional(),
+  vulnerable_code_location: z.string().optional(),
+  missing_defense: z.string().optional(),
+  exploitation_hypothesis: z.string().optional(),
+  suggested_exploit_technique: z.string().optional(),
+});
+
+const SsrfVulnerability = baseVulnerability.extend({
+  source_endpoint: z.string().optional(),
+  vulnerable_parameter: z.string().optional(),
+  vulnerable_code_location: z.string().optional(),
+  missing_defense: z.string().optional(),
+  exploitation_hypothesis: z.string().optional(),
+  suggested_exploit_technique: z.string().optional(),
+});
+
+const AuthzVulnerability = baseVulnerability.extend({
+  endpoint: z.string().optional(),
+  vulnerable_code_location: z.string().optional(),
+  role_context: z.string().optional(),
+  guard_evidence: z.string().optional(),
+  side_effect: z.string().optional(),
+  reason: z.string().optional(),
+  minimal_witness: z.string().optional(),
+});
+
+// === Queue Wrapper Schemas ===
+
+const InjectionQueueSchema = z.object({ vulnerabilities: z.array(InjectionVulnerability) });
+const XssQueueSchema = z.object({ vulnerabilities: z.array(XssVulnerability) });
+const AuthQueueSchema = z.object({ vulnerabilities: z.array(AuthVulnerability) });
+const SsrfQueueSchema = z.object({ vulnerabilities: z.array(SsrfVulnerability) });
+const AuthzQueueSchema = z.object({ vulnerabilities: z.array(AuthzVulnerability) });
+
+// === Convert to JSON Schema for SDK ===
+
+// NOTE: The SDK's AJV validator expects draft-07. Zod defaults to draft-2020-12 which
+// causes the SDK to silently skip structured output.
+function toOutputFormat(zodSchema: z.ZodType): JsonSchemaOutputFormat {
+  return { type: 'json_schema', schema: z.toJSONSchema(zodSchema, { target: 'draft-07' }) as Record<string, unknown> };
+}
+
+// === Lookup Maps ===
+
+const VULN_AGENT_OUTPUT_FORMAT: Partial<Record<AgentName, JsonSchemaOutputFormat>> = {
+  'injection-vuln': toOutputFormat(InjectionQueueSchema),
+  'xss-vuln': toOutputFormat(XssQueueSchema),
+  'auth-vuln': toOutputFormat(AuthQueueSchema),
+  'ssrf-vuln': toOutputFormat(SsrfQueueSchema),
+  'authz-vuln': toOutputFormat(AuthzQueueSchema),
+};
+
+const VULN_AGENT_QUEUE_FILENAMES: Partial<Record<AgentName, string>> = {
+  'injection-vuln': 'injection_exploitation_queue.json',
+  'xss-vuln': 'xss_exploitation_queue.json',
+  'auth-vuln': 'auth_exploitation_queue.json',
+  'ssrf-vuln': 'ssrf_exploitation_queue.json',
+  'authz-vuln': 'authz_exploitation_queue.json',
+};
+
+/** Returns the structured output format for a vuln agent, or undefined for non-vuln agents. */
+export function getOutputFormat(agentName: AgentName): JsonSchemaOutputFormat | undefined {
+  return VULN_AGENT_OUTPUT_FORMAT[agentName];
+}
+
+/** Returns the queue filename for a vuln agent, or undefined for non-vuln agents. */
+export function getQueueFilename(agentName: AgentName): string | undefined {
+  return VULN_AGENT_QUEUE_FILENAMES[agentName];
+}
@@ -34,6 +34,7 @@ export interface ResultData {
  subtype?: string;
  stop_reason?: string | null;
  permissionDenials: number;
+  structuredOutput?: unknown;
 }

 export interface ToolUseData {
@@ -69,6 +70,7 @@ export interface ResultMessage {
  subtype?: string;
  stop_reason?: string | null;
  permission_denials?: unknown[];
+  structured_output?: unknown;
 }

 export interface ToolUseMessage {