fix: critical bug - exploitation phase was always skipped

ROOT CAUSE: - Exploitation phase checked session.validationResults to determine eligibility - validationResults field was removed during audit system refactor - Field never existed in session schema, so all exploits were skipped THE FIX: - Exploitation phase now validates queue files directly when checking eligibility - Reads exploitation_queue.json and checks if vulnerabilities array is non-empty - No need to store validation results - just re-validate on demand CHANGES: 1. runParallelExploit() now calls safeValidateQueueAndDeliverable() directly 2. Removed validationResults parameter from markAgentCompleted() 3. Simplified calculateVulnerabilityAnalysisSummary() - no longer needs validation data 4. Simplified calculateExploitationSummary() - no longer needs validation data IMPACT: - Exploitation agents will now run when vulnerabilities are found - Queue files are the single source of truth for eligibility - Simpler architecture - no duplicate state storage 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-22 17:41:41 -07:00
parent 255956d113
commit cfe8dc8bc8
2 changed files with 37 additions and 46 deletions
@@ -218,12 +218,12 @@ export const runSingleAgent = async (agentName, session, pipelineTestingMode, ru
        const validation = await safeValidateQueueAndDeliverable(vulnType, targetRepo);
        if (validation.success) {
          // Log validation result (don't store - will be re-validated during exploitation phase)
          console.log(chalk.blue(`📋 Validation: ${validation.data.shouldExploit ? `Ready for exploitation (${validation.data.vulnerabilityCount} vulnerabilities)` : 'No vulnerabilities found'}`));
          validationData = {
            shouldExploit: validation.data.shouldExploit,
-            vulnerabilityCount: validation.data.vulnerabilityCount,
+            vulnerabilityCount: validation.data.vulnerabilityCount
            validatedAt: new Date().toISOString()
          };
          console.log(chalk.blue(`📋 Validation: ${validationData.shouldExploit ? `Ready for exploitation (${validationData.vulnerabilityCount} vulnerabilities)` : 'No vulnerabilities found'}`));
        } else {
          console.log(chalk.yellow(`⚠️ Validation failed: ${validation.error.message}`));
        }
@@ -232,8 +232,8 @@ export const runSingleAgent = async (agentName, session, pipelineTestingMode, ru
      }
    }
-    // Mark agent as completed
+    // Mark agent as completed (validation not stored - will be re-checked during exploitation)
-    await markAgentCompleted(session.id, agentName, commitHash, timingData, costData, validationData);
+    await markAgentCompleted(session.id, agentName, commitHash);
    // Only show completion message for sequential execution
    if (!skipWorkspaceClean) {
@@ -429,25 +429,36 @@ export const runParallelExploit = async (session, pipelineTestingMode, runClaude
  const { getSession } = await import('./session-manager.js');
  const freshSession = await getSession(session.id);
  // Load validation module
  const { safeValidateQueueAndDeliverable } = await import('./queue-validation.js');
  // Only run exploit agents whose vuln counterparts completed successfully AND found vulnerabilities
-  const eligibleAgents = exploitAgents.filter(agentName => {
+  const eligibilityChecks = await Promise.all(
-    const vulnAgentName = agentName.replace('-exploit', '-vuln');
+    exploitAgents.map(async (agentName) => {
      const vulnAgentName = agentName.replace('-exploit', '-vuln');
-    // Must have completed the vulnerability analysis
+      // Must have completed the vulnerability analysis
-    if (!freshSession.completedAgents.includes(vulnAgentName)) {
+      if (!freshSession.completedAgents.includes(vulnAgentName)) {
-      return false;
+        return { agentName, eligible: false };
-    }
+      }
-    // Must have found vulnerabilities to exploit
+      // Check if vulnerabilities were found by validating the queue file
-    const validationResult = freshSession.validationResults?.[vulnAgentName];
+      const vulnType = vulnAgentName.replace('-vuln', ''); // "injection-vuln" -> "injection"
-    if (!validationResult || !validationResult.shouldExploit) {
+      const validation = await safeValidateQueueAndDeliverable(vulnType, freshSession.targetRepo);
      console.log(chalk.gray(`⏭️  Skipping ${agentName} (no vulnerabilities found in ${vulnAgentName})`));
      return false;
    }
-    console.log(chalk.blue(`✓ ${agentName} eligible (${validationResult.vulnerabilityCount} vulnerabilities from ${vulnAgentName})`));
+      if (!validation.success || !validation.data.shouldExploit) {
-    return true;
+        console.log(chalk.gray(`⏭️  Skipping ${agentName} (no vulnerabilities found in ${vulnAgentName})`));
-  });
+        return { agentName, eligible: false };
      }
      console.log(chalk.blue(`✓ ${agentName} eligible (${validation.data.vulnerabilityCount} vulnerabilities from ${vulnAgentName})`));
      return { agentName, eligible: true };
    })
  );
  const eligibleAgents = eligibilityChecks
    .filter(check => check.eligible)
    .map(check => check.agentName);
  const activeAgents = eligibleAgents.filter(agent => !freshSession.completedAgents.includes(agent));
@@ -552,25 +552,12 @@ export const getSessionStatus = (session) => {
 export const calculateVulnerabilityAnalysisSummary = (session) => {
  const vulnAgents = PHASES['vulnerability-analysis'];
  const completedVulnAgents = session.completedAgents.filter(agent => vulnAgents.includes(agent));
  const validationResults = session.validationResults || {};
  let totalVulnerabilities = 0;
  let agentsWithVulns = 0;
  for (const agent of completedVulnAgents) {
    const validation = validationResults[agent];
    if (validation?.vulnerabilityCount > 0) {
      totalVulnerabilities += validation.vulnerabilityCount;
      agentsWithVulns++;
    }
  }
  // NOTE: Actual vulnerability counts require reading queue files
  // This summary only shows completion counts
  return Object.freeze({
    totalAnalyses: completedVulnAgents.length,
-    totalVulnerabilities,
+    completedAgents: completedVulnAgents
    agentsWithVulnerabilities: agentsWithVulns,
    successRate: completedVulnAgents.length > 0 ? (agentsWithVulns / completedVulnAgents.length) * 100 : 0,
    exploitationCandidates: Object.values(validationResults).filter(v => v?.shouldExploit).length
  });
 };
@@ -578,19 +565,12 @@ export const calculateVulnerabilityAnalysisSummary = (session) => {
 export const calculateExploitationSummary = (session) => {
  const exploitAgents = PHASES['exploitation'];
  const completedExploitAgents = session.completedAgents.filter(agent => exploitAgents.includes(agent));
  const validationResults = session.validationResults || {};
  // Count how many exploitation agents were eligible to run
  const eligibleExploits = exploitAgents.filter(agentName => {
    const vulnAgentName = agentName.replace('-exploit', '-vuln');
    return validationResults[vulnAgentName]?.shouldExploit;
  });
  // NOTE: Eligibility requires reading queue files
  // This summary only shows completion counts
  return Object.freeze({
    totalAttempts: completedExploitAgents.length,
-    eligibleExploits: eligibleExploits.length,
+    completedAgents: completedExploitAgents
    skippedExploits: eligibleExploits.length - completedExploitAgents.length,
    successRate: eligibleExploits.length > 0 ? (completedExploitAgents.length / eligibleExploits.length) * 100 : 0
  });
 };