refactor: extract services layer, Result type, and ErrorCode classification

- Add DI container (src/services/) with AgentExecutionService, ConfigLoaderService, and ExploitationCheckerService — pure domain logic with no Temporal dependencies - Introduce Result<T, E> type and ErrorCode enum for code-based error classification in classifyErrorForTemporal, replacing scattered string matching - Consolidate billing/spending cap detection into utils/billing-detection.ts with shared pattern lists across message-handlers, claude-executor, and error-handling - Extract LogStream abstraction for append-only logging with backpressure, used by both AgentLogger and WorkflowLogger - Simplify activities.ts from inline lifecycle logic to thin wrappers delegating to services, with heartbeat and error classification - Expand config-parser with human-readable AJV errors, security validation, and rule type-specific checks
2026-02-16 16:12:21 -08:00
parent ae69478541
commit d3816a29fa
31 changed files with 1664 additions and 707 deletions
@@ -0,0 +1,95 @@
+// Copyright (C) 2025 Keygraph, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License version 3
+// as published by the Free Software Foundation.
+
+/**
+ * Consolidated billing/spending cap detection utilities.
+ *
+ * Anthropic's spending cap behavior is inconsistent:
+ * - Sometimes a proper SDK error (billing_error)
+ * - Sometimes Claude responds with text about the cap
+ * - Sometimes partial billing before cutoff
+ *
+ * This module provides defense-in-depth detection with shared pattern lists
+ * to prevent drift between detection points.
+ */
+
+/**
+ * Text patterns for SDK output sniffing (what Claude says).
+ * Used by message-handlers.ts and the behavioral heuristic.
+ */
+export const BILLING_TEXT_PATTERNS = [
+  'spending cap',
+  'spending limit',
+  'cap reached',
+  'budget exceeded',
+  'usage limit',
+  'resets',
+] as const;
+
+/**
+ * API patterns for error message classification (what the API returns).
+ * Used by classifyErrorForTemporal in error-handling.ts.
+ */
+export const BILLING_API_PATTERNS = [
+  'billing_error',
+  'credit balance is too low',
+  'insufficient credits',
+  'usage is blocked due to insufficient credits',
+  'please visit plans & billing',
+  'please visit plans and billing',
+  'usage limit reached',
+  'quota exceeded',
+  'daily rate limit',
+  'limit will reset',
+  'billing limit reached',
+] as const;
+
+/**
+ * Checks if text matches any billing text pattern.
+ * Used for sniffing SDK output content for spending cap messages.
+ */
+export function matchesBillingTextPattern(text: string): boolean {
+  const lowerText = text.toLowerCase();
+  return BILLING_TEXT_PATTERNS.some((pattern) => lowerText.includes(pattern));
+}
+
+/**
+ * Checks if an error message matches any billing API pattern.
+ * Used for classifying API error messages.
+ */
+export function matchesBillingApiPattern(message: string): boolean {
+  const lowerMessage = message.toLowerCase();
+  return BILLING_API_PATTERNS.some((pattern) => lowerMessage.includes(pattern));
+}
+
+/**
+ * Behavioral heuristic for detecting spending cap.
+ *
+ * When Claude hits a spending cap, it often returns a short message
+ * with $0 cost. Legitimate agent work NEVER costs $0 with only 1-2 turns.
+ *
+ * This combines three signals:
+ * 1. Very low turn count (<=2)
+ * 2. Zero cost ($0)
+ * 3. Text matches billing patterns
+ *
+ * @param turns - Number of turns the agent took
+ * @param cost - Total cost in USD
+ * @param resultText - The result text from the agent
+ * @returns true if this looks like a spending cap hit
+ */
+export function isSpendingCapBehavior(
+  turns: number,
+  cost: number,
+  resultText: string
+): boolean {
+  // Only check if turns <= 2 AND cost is exactly 0
+  if (turns > 2 || cost !== 0) {
+    return false;
+  }
+
+  return matchesBillingTextPattern(resultText);
+}
@@ -6,6 +6,8 @@

 import { $ } from 'zx';
 import chalk from 'chalk';
+import { PentestError } from '../error-handling.js';
+import { ErrorCode } from '../types/errors.js';

 /**
 * Check if a directory is a git repository.
@@ -148,7 +150,13 @@ export async function executeGitCommandWithRetry(
        throw error;
      }
    }
-    throw new Error(`Git command failed after ${maxRetries} retries`);
+    throw new PentestError(
+      `Git command failed after ${maxRetries} retries`,
+      'filesystem',
+      true, // Retryable - transient git lock issues
+      { maxRetries, description },
+      ErrorCode.GIT_CHECKPOINT_FAILED
+    );
  } finally {
    gitSemaphore.release();
  }
@@ -189,9 +197,18 @@ export async function rollbackGitWorkspace(
    );
    return { success: true };
  } catch (error) {
-    const result = toErrorResult(error);
-    console.log(chalk.red(`    ❌ Rollback failed after retries: ${result.error?.message}`));
-    return result;
+    const errMsg = error instanceof Error ? error.message : String(error);
+    console.log(chalk.red(`    ❌ Rollback failed after retries: ${errMsg}`));
+    return {
+      success: false,
+      error: new PentestError(
+        `Git rollback failed: ${errMsg}`,
+        'filesystem',
+        false, // Non-retryable - rollback is best-effort cleanup
+        { sourceDir, reason },
+        ErrorCode.GIT_ROLLBACK_FAILED
+      ),
+    };
  }
 }